In [None]:
import pandas as pd
import numpy as np
import json
import time
import os
from dotenv import load_dotenv
from langchain_groq import ChatGroq
from langchain.schema import HumanMessage
from langchain.prompts import PromptTemplate
from pydantic import BaseModel, Field
from collections import Counter
import warnings
import pickle
from pathlib import Path
import datetime
warnings.filterwarnings('ignore')

print("LLM v2: Overnight Bulletproof Processing (1000 logs)")

checkpoint_dir = Path("llm_checkpoints")
checkpoint_dir.mkdir(exist_ok=True)

load_dotenv()
df = pd.read_csv('../results/nova_logs_with_bert.csv')
unclassified_logs = df[(df['regex_label'].isnull()) & (df['bert_label'].isnull())].copy()
print(f"Unclassified logs available: {len(unclassified_logs)}")

RESULTS_LIST = []
PROCESSED_COUNT = 0
CONSECUTIVE_FAILURES = 0
MAX_CONSECUTIVE_FAILURES = 5

LLM v2: Overnight Bulletproof Processing (1000 logs)
Unclassified logs available: 14972


In [None]:
def save_checkpoint_immediately(results_list, count):
    if results_list:
        results_data = []
        for i, result in enumerate(results_list):
            results_data.append({
                'log_index': i,
                'llm_category': result.category,
                'llm_confidence': result.confidence,
                'llm_reasoning': result.reasoning
            })
        results_df = pd.DataFrame(results_data)
        results_df.to_csv(f'llm_v2_results_backup_{count}.csv', index=False)
        print(f"Checkpoint saved: {count} logs processed")


def load_existing_checkpoint(filename="llm_v2_checkpoint.pkl"):
    checkpoint_path = checkpoint_dir / filename
    if checkpoint_path.exists():
        with open(checkpoint_path, 'rb') as f:
            checkpoint_data = pickle.load(f)
        print(f"Checkpoint found: {checkpoint_data['processed_count']} logs previously processed")
        print(f"Previous success rate: {checkpoint_data['success_rate']:.2%}")
        return checkpoint_data['results'], checkpoint_data['processed_count']
    return [], 0

RESULTS_LIST, PROCESSED_COUNT = load_existing_checkpoint()
print(f"Starting from: {PROCESSED_COUNT} logs already processed")

Starting from: 0 logs already processed


In [None]:
def create_strategic_1k_sample(unclassified_logs, target_size=1000, skip_processed=0):
    strategic_sample = []
    error_logs = unclassified_logs[
        unclassified_logs['raw_log_text'].str.contains(
            'ERROR|WARNING|CRITICAL|TIMEOUT|FAILED', case=False, na=False
        )
    ]
    priority_1 = error_logs.sample(n=min(400, len(error_logs)), random_state=42)
    strategic_sample.append(priority_1)
    cluster_targets = {3: 200, 5: 150, 6: 150, 9: 50, 13: 50}
    used_indices = priority_1.index
    for cluster_id, target_count in cluster_targets.items():
        cluster_logs = unclassified_logs[
            (unclassified_logs['cluster_id'] == cluster_id) & 
            (~unclassified_logs.index.isin(used_indices))
        ]
        if len(cluster_logs) > 0:
            sample_size = min(target_count, len(cluster_logs))
            cluster_sample = cluster_logs.sample(n=sample_size, random_state=42)
            strategic_sample.append(cluster_sample)
            used_indices = used_indices.union(cluster_sample.index)
    final_sample = pd.concat(strategic_sample, ignore_index=True)
    if skip_processed > 0:
        final_sample = final_sample.iloc[skip_processed:].reset_index(drop=True)
        print(f"Skipping {skip_processed} already processed logs")
    return final_sample

strategic_1k_logs = create_strategic_1k_sample(unclassified_logs, target_size=1000, skip_processed=PROCESSED_COUNT)
print(f"Strategic sample created: {len(strategic_1k_logs)} logs to process")

Strategic sample created: 1000 logs to process


In [None]:
class LogClassification(BaseModel):
    category: str = Field(..., description="Classification category")
    confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence score")
    reasoning: str = Field(..., description="Brief explanation")
import os
from dotenv import load_dotenv
from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate

dotenv_path = os.path.abspath(os.path.join(os.getcwd(), '../.env'))
print(f"Loading .env from: {dotenv_path}")
load_dotenv(dotenv_path)


llm = ChatGroq(
    groq_api_key=os.getenv('GROQ_API_KEY'),
    model_name='llama-3.1-8b-instant',
    temperature=0.3,
    max_tokens=120
)

optimized_template = """Classify OpenStack log:

CATEGORIES:
SysOps, InstMgmt, NetOps, ResMgmt, SchedOps, BootErr, NetErr, FileErr, ConfigErr, ResErr, SvcErr

EXAMPLES:
- "WARNING _wait_for_boot timeout" → BootErr
- "INFO VIF plugged successfully" → NetOps  
- "ERROR file not found" → FileErr

LOG: {log_message}

Respond ONLY with a valid JSON object in the next line. Do NOT include any explanation, markdown, or formatting.
EXAMPLE RESPONSE: {{"category": "FileErr", "confidence": 0.8, "reasoning": "brief"}}
"""

prompt = PromptTemplate(input_variables=["log_message"], template=optimized_template)
def classify_with_failure_detection(log_message, llm, prompt_template, max_retries=2):
    import re
    for attempt in range(max_retries + 1):
        try:
            formatted_prompt = (
                prompt_template.format(log_message=log_message[:400])
                + "\nRespond ONLY with a valid JSON object in the next line. "
                + "Do NOT include any explanation, markdown, or formatting.\n"
                + 'EXAMPLE RESPONSE: {"category": "FileErr", "confidence": 0.8, "reasoning": "brief"}'
            )
            messages = [HumanMessage(content=formatted_prompt)]
            response = llm.invoke(messages)
            response_text = response.content.strip()
            print(f"LLM raw response: {response_text[:120]}")

            json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
            result_data = None
            if json_match:
                json_str = json_match.group()
                try:
                    result_data = json.loads(json_str)
                except Exception:
                    result_data = None
            if not result_data:
                try:
                    result_data = json.loads(response_text)
                except Exception:
                    result_data = None
            if not result_data:
                cat_match = re.search(r"\*\*(\w+Err(?:ors)?)\*\*", response_text)
                category = cat_match.group(1) if cat_match else "Processing_Error"
                result_data = {
                    "category": category,
                    "confidence": 0.0,
                    "reasoning": "Parsed from non-JSON LLM output"
                }
            category_mapping = {
                'SysOps': 'System_Operations', 'InstMgmt': 'Instance_Management', 
                'NetOps': 'Network_Operations', 'ResMgmt': 'Resource_Management',
                'SchedOps': 'Scheduler_Operations', 'BootErr': 'Boot_Timeout_Errors',
                'NetErr': 'Network_Connection_Errors', 'FileErr': 'File_System_Errors',
                'ConfigErr': 'Configuration_Errors', 'ResErr': 'Resource_Allocation_Errors',
                'SvcErr': 'Service_Communication_Errors'
            }
            category = result_data.get('category', result_data.get('cat', 'Unknown'))
            if category in category_mapping:
                category = category_mapping[category]
            result = LogClassification(
                category=category,
                confidence=result_data.get('confidence', result_data.get('conf', 0.0)),
                reasoning=result_data.get('reasoning', 'Classified')
            )
            return result, False
        except Exception as e:
            if attempt < max_retries:
                print(f"Retrying due to error: {e}")
                time.sleep(2)
                continue
            print(f"General exception: {e}")
            return LogClassification(
                category="Processing_Error",
                confidence=0.0,
                reasoning=f"Error: {str(e)[:50]}"
            ), True

print("Enhanced classification function with robust fallback created")

python-dotenv could not parse statement starting at line 1


Loading .env from: /Users/kxshrx/dev/log-classification/.env
Enhanced classification function with robust fallback created


In [None]:
def process_with_early_stopping_and_checkpoints(logs_list, llm, prompt_template, start_from=0):
    global RESULTS_LIST, PROCESSED_COUNT, CONSECUTIVE_FAILURES
    total_logs = len(logs_list)
    print(f"Starting processing from log {start_from + 1}/{total_logs}")
    print(f"Early stopping after {MAX_CONSECUTIVE_FAILURES} consecutive failures")
    for idx in range(start_from, total_logs):
        log_message = logs_list[idx]
        if (idx + 1) % 10 == 0:
            success_rate = len([r for r in RESULTS_LIST if r.category not in ['Processing_Error', 'Rate_Limit_Error']]) / max(len(RESULTS_LIST), 1)
            print(f"Processed {idx + 1}/{total_logs} ({(idx+1)/total_logs*100:.1f}%) - Success rate: {success_rate:.1%}")
        result, is_failure = classify_with_failure_detection(log_message, llm, prompt_template)
        RESULTS_LIST.append(result)
        PROCESSED_COUNT += 1
        if is_failure:
            CONSECUTIVE_FAILURES += 1
            print(f" Failure {CONSECUTIVE_FAILURES}/{MAX_CONSECUTIVE_FAILURES}: {result.category}")
            if CONSECUTIVE_FAILURES >= MAX_CONSECUTIVE_FAILURES:
                print(f"\nEARLY STOPPING: {CONSECUTIVE_FAILURES} consecutive failures detected")
                print(f" Processed {len(RESULTS_LIST)} logs before stopping")
                break
        else:
            CONSECUTIVE_FAILURES = 0
        if (idx + 1) % 25 == 0:
            save_checkpoint_immediately(RESULTS_LIST, PROCESSED_COUNT)
        time.sleep(2.5)
    save_checkpoint_immediately(RESULTS_LIST, PROCESSED_COUNT)
    print(f"\nProcessing completed: {len(RESULTS_LIST)} total classifications")
    return RESULTS_LIST

strategic_logs_list = strategic_1k_logs['raw_log_text'].tolist()
start_time = datetime.datetime.now()
print(f"Start time: {start_time.strftime('%H:%M:%S')}")
final_results = process_with_early_stopping_and_checkpoints(
    strategic_logs_list, 
    llm, 
    prompt,
    start_from=0
)
end_time = datetime.datetime.now()
print(f"End time: {end_time.strftime('%H:%M:%S')}")
print(f"Duration: {end_time - start_time}")

⏰ Start time: 00:31:48
Starting processing from log 1/1000
Early stopping after 5 consecutive failures
LLM raw response: {"category": "FileErr", "confidence": 0.9, "reasoning": "file not found in path"}
LLM raw response: {"category": "BootErr", "confidence": 0.9, "reasoning": "libvirt guest start failure"}
LLM raw response: {"category": "BootErr", "confidence": 0.9, "reasoning": "unexpected event network-vif-plugged-30e24067-4dc4-48c5-8569-4d
LLM raw response: {"category": "BootErr", "confidence": 0.9, "reasoning": "unexpected event received for instance in building state"}
LLM raw response: {"category": "FileErr", "confidence": 0.9, "reasoning": "error message contains 'file not found'"}
LLM raw response: {"category": "SvcErr", "confidence": 0.8, "reasoning": "brief"}
LLM raw response: {"category": "NetErr", "confidence": 0.9, "reasoning": "Failed to allocate the network(s)"}
LLM raw response: {"category": "NetErr", "confidence": 0.9, "reasoning": "Virtual Interface creation failed"}


In [None]:
def safe_analyze_results(results_list):
    if not results_list:
        print("No results to analyze")
        return
    print(f"\nFINAL RESULTS ANALYSIS")
    print("=" * 50)
    categories = [r.category for r in results_list]
    confidences = [r.confidence for r in results_list if isinstance(r.confidence, (int, float))]
    category_dist = Counter(categories)
    print(f"Total processed: {len(results_list)} logs")
    print(f"\nCategory Distribution:")
    for category, count in category_dist.most_common():
        percentage = count / len(results_list) * 100
        print(f"  {category}: {count} ({percentage:.1f}%)")
    if confidences:
        avg_confidence = sum(confidences) / len(confidences)
        high_conf_count = len([c for c in confidences if c >= 0.65])
        print(f"\nConfidence Analysis:")
        print(f"  Average confidence: {avg_confidence:.3f}")
        print(f"  High confidence (≥0.65): {high_conf_count}/{len(confidences)} ({high_conf_count/len(confidences)*100:.1f}%)")
    successful_classifications = len([r for r in results_list if r.category not in ['Processing_Error', 'Rate_Limit_Error']])
    success_rate = successful_classifications / len(results_list) * 100
    print(f"\nSuccess Rate: {success_rate:.1f}% ({successful_classifications}/{len(results_list)})")
    errors = len([r for r in results_list if 'Error' in r.category])
    if errors > 0:
        print(f"Errors encountered: {errors} ({errors/len(results_list)*100:.1f}%)")

safe_analyze_results(RESULTS_LIST)

if RESULTS_LIST:
    final_results_data = []
    for i, result in enumerate(RESULTS_LIST):
        final_results_data.append({
            'log_index': i,
            'llm_category': result.category,
            'llm_confidence': result.confidence,
            'llm_reasoning': result.reasoning
        })
    final_df = pd.DataFrame(final_results_data)
    final_df.to_csv('../results/llm_v2_final_results_1000.csv', index=False)
    processed_logs = strategic_1k_logs.head(len(RESULTS_LIST)).copy()
    processed_logs['llm_category'] = [r.category for r in RESULTS_LIST]
    processed_logs['llm_confidence'] = [r.confidence for r in RESULTS_LIST]
    processed_logs['llm_reasoning'] = [r.reasoning for r in RESULTS_LIST]
    processed_logs.to_csv('../results/strategic_1k_with_llm_results.csv', index=False)
    print(f"\nFILES SAVED:")
    print(f"llm_v2_final_results_1000.csv ({len(final_df)} classifications)")
    print(f"strategic_1k_with_llm_results.csv (integration ready)")
    print(f"Checkpoint files in llm_checkpoints/ directory")

print(f"\nLLM v2 PROCESSING COMPLETE")
print(f"Successfully processed {len(RESULTS_LIST)} logs with bulletproof error handling")


FINAL RESULTS ANALYSIS
Total processed: 1000 logs

🏷️ Category Distribution:
  Network_Operations: 201 (20.1%)
  Boot_Timeout_Errors: 187 (18.7%)
  Resource_Management: 141 (14.1%)
  Instance_Management: 136 (13.6%)
  File_System_Errors: 96 (9.6%)
  Service_Communication_Errors: 91 (9.1%)
  Network_Connection_Errors: 55 (5.5%)
  Processing_Error: 51 (5.1%)
  Scheduler_Operations: 26 (2.6%)
  Resource_Allocation_Errors: 9 (0.9%)
  Configuration_Errors: 7 (0.7%)

Confidence Analysis:
  Average confidence: 0.856
  High confidence (≥0.65): 945/1000 (94.5%)

Success Rate: 94.9% (949/1000)
Errors encountered: 496 (49.6%)

FILES SAVED:
llm_v2_final_results_1000.csv (1000 classifications)
strategic_1k_with_llm_results.csv (integration ready)
Checkpoint files in llm_checkpoints/ directory

LLM v2 PROCESSING COMPLETE
Successfully processed 1000 logs with bulletproof error handling
