In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pickle

# Step 1: Load the dataset
# Assuming logs.csv is in the same directory; adjust path if needed

df = pd.read_csv('logs.csv')

# Step 2: Preprocess the data
# Handle missing error_type
df['error_type'] = df['error_type'].fillna('None')

# Text feature extraction for message
tfidf = TfidfVectorizer(max_features=100)  # Limit to top 100 terms
message_features = tfidf.fit_transform(df['message']).toarray()

# Categorical feature encoding for source and error_type
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
categorical_features = encoder.fit_transform(df[['source', 'error_type']])

# Combine features
X = np.concatenate([message_features, categorical_features], axis=1)
y = df['label'].values

# Step 3: Split and train the model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42)
model.fit(X_train, y_train)

# Step 4: Evaluate the model
y_pred = model.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=['Normal', 'Anomaly'], zero_division=0))

# Step 5: Save model and preprocessors
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)
with open('tfidf.pkl', 'wb') as f:
    pickle.dump(tfidf, f)
with open('encoder.pkl', 'wb') as f:
    pickle.dump(encoder, f)

# Step 6: Function to predict on a new log
def predict_log(log):
    # log: dict with keys 'source', 'message', 'error_type'
    df_log = pd.DataFrame([log])
    df_log['error_type'] = df_log['error_type'].fillna('None')
    message_vec = tfidf.transform(df_log['message']).toarray()
    cat_vec = encoder.transform(df_log[['source', 'error_type']])
    features = np.concatenate([message_vec, cat_vec], axis=1)
    prediction = model.predict(features)[0]
    return 'Anomaly' if prediction == 1 else 'Normal'

# Step 7: Test with a random log
sample_log = {
    'source': 'frontend',
    'message': 'Uncaught TypeError: undefined is not a function',
    'error_type': 'TypeError'
}
prediction = predict_log(sample_log)
print(f"\nSample Log Prediction: {prediction}")
print(f"Log Details: {sample_log}")

Classification Report:
              precision    recall  f1-score   support

      Normal       0.97      1.00      0.99        37
     Anomaly       1.00      0.97      0.99        36

    accuracy                           0.99        73
   macro avg       0.99      0.99      0.99        73
weighted avg       0.99      0.99      0.99        73


Sample Log Prediction: Anomaly
Log Details: {'source': 'frontend', 'message': 'Uncaught TypeError: undefined is not a function', 'error_type': 'TypeError'}


In [3]:
import smtplib
server = smtplib.SMTP('smtp.gmail.com', 587)
server.starttls()
server.login('amoggha03@gmail.com', 'ixpvyrcguxgvrnzl')
server.quit()
print("SMTP test successful")

SMTP test successful
