In [None]:
!pip install scikit-learn pandas joblib




In [None]:
import pandas as pd
import random

normal_logs = [
    "Service started successfully",
    "User login successful",
    "Database connection established",
    "Cache cleared successfully",
    "API request completed in 120ms",
    "Health check passed",
]

error_logs = [
    "Database connection failed",
    "Memory leak detected",
    "CPU usage exceeded threshold",
    "Disk space critically low",
    "Service crashed unexpectedly",
    "Timeout while calling external API",
]

logs = []

# Normal logs (majority)
for _ in range(800):
    logs.append(random.choice(normal_logs))

# Anomalous logs (minority)
for _ in range(200):
    logs.append(random.choice(error_logs))

df = pd.DataFrame({"log_message": logs})
df.head()


Unnamed: 0,log_message
0,Service started successfully
1,Health check passed
2,Cache cleared successfully
3,Health check passed
4,API request completed in 120ms


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(
    max_features=100,
    stop_words="english"
)

X = vectorizer.fit_transform(df["log_message"])
X.shape


(1000, 35)

In [None]:
from sklearn.ensemble import IsolationForest

model = IsolationForest(
    n_estimators=100,
    contamination=0.2,   # 20% anomalies
    random_state=42
)

model.fit(X)


In [None]:
test_logs = [
    "Service started successfully",
    "Memory leak detected",
    "CPU usage exceeded threshold",
    "User login successful"
]

X_test = vectorizer.transform(test_logs)
predictions = model.predict(X_test)

for log, pred in zip(test_logs, predictions):
    status = "ANOMALY ðŸš¨" if pred == -1 else "NORMAL âœ…"
    print(log, "â†’", status)


Service started successfully â†’ NORMAL âœ…
Memory leak detected â†’ ANOMALY ðŸš¨
CPU usage exceeded threshold â†’ ANOMALY ðŸš¨
User login successful â†’ NORMAL âœ…


In [None]:
import joblib

joblib.dump(model, "isolation_forest_model.pkl")
joblib.dump(vectorizer, "tfidf_vectorizer.pkl")

print("Model and vectorizer saved successfully!")


Model and vectorizer saved successfully!


In [None]:
from google.colab import files

files.download("isolation_forest_model.pkl")
files.download("tfidf_vectorizer.pkl")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>