In [6]:
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler  # Added this line
import numpy as np
import pandas as pd
import joblib

# Load the dataset
train_data = pd.read_csv('../data/logs_train.csv')
test_data = pd.read_csv('../data/logs_test.csv')

# Identify numeric columns
numeric_cols = train_data.select_dtypes(include=[np.number]).columns.tolist()

# Apply scaling only to numeric columns
scaler = StandardScaler()
train_data[numeric_cols] = scaler.fit_transform(train_data[numeric_cols])

# Save the scaler object
joblib.dump(scaler, 'scaler.pkl')

# Initialize and train the model
clf = IsolationForest(contamination=0.01, random_state=42)  
clf.fit(train_data[numeric_cols])

# Make predictions (-1 for anomalies and 1 for normal points)
predictions = clf.predict(train_data[numeric_cols])

# Calculate anomaly score
anomaly_score = clf.decision_function(train_data[numeric_cols])


# Save the model
joblib.dump(clf, 'isolation_forest_model.pkl')

# If you wish, you can print or save 'predictions' and 'anomaly_score' for further analysis
print(train_data.columns)
print(len(numeric_cols))
print("Predictions:", predictions)
print("Anomaly Scores:", anomaly_score)

Index(['Timestamp', 'IP', 'UserAgent', 'Status', 'Method_DELETE', 'Method_GET',
       'Method_POST', 'Method_PUT'],
      dtype='object')
Predictions: [ 1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1 -1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1 -1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1 -1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1