In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.svm import OneClassSVM
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, f1_score, recall_score
import pickle
from my_functions import load_data, preprocess_data, evaluate

In [13]:
# Get data
data_path = 'data/churn_train_data.pcl'
df = preprocess_data(load_data(data_path))

y = df['target']
X = df.drop(columns=['target'])

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_train_majority = X_train[y_train == 0]

In [14]:

# Collect results
results = []

# Isolation Forest
print('Proceeding Isolation Forest model ...')
iso = IsolationForest(n_estimators=500, contamination=0.063, random_state=42)
iso.fit(X_train_majority)
y_pred_iso = iso.predict(X_test)
y_pred_iso = np.where(y_pred_iso == -1, 1, 0)
results.append(evaluate(y_test, y_pred_iso, "Isolation Forest"))

# Local Outlier Factor (novelty mode)
print('\Proceeding Local Outlier Factor (LOF) model ...')
lof = LocalOutlierFactor(n_neighbors=20, contamination=0.063, novelty=True)
lof.fit(X_train_majority)
y_pred_lof = lof.predict(X_test)
y_pred_lof = np.where(y_pred_lof == -1, 1, 0)
results.append(evaluate(y_test, y_pred_lof, "Local Outlier Factor (LOF)"))

# One-Class SVM
print('\Proceeding One-Class SVM model ...')
ocsvm = OneClassSVM(nu=0.063, kernel='rbf', gamma='scale')
ocsvm.fit(X_train_majority)
y_pred_svm = ocsvm.predict(X_test)
y_pred_svm = np.where(y_pred_svm == -1, 1, 0)
results.append(evaluate(y_test, y_pred_svm, "One-Class SVM"))

# Create DataFrame with results
results_df = pd.DataFrame(results)
print("\nSummary of Model Performance:")
results_df

Proceeding Isolation Forest model ...

Isolation Forest:
ROC AUC: 0.6394
F1: 0.3013
Recall: 0.3422
Proceeding Local Outlier Factor (LOF) model ...





Local Outlier Factor (LOF):
ROC AUC: 0.5225
F1: 0.1069
Recall: 0.1116
Proceeding One-Class SVM model ...

One-Class SVM:
ROC AUC: 0.5660
F1: 0.1842
Recall: 0.1951

Summary of Model Performance:


Unnamed: 0,Model,ROC AUC,F1,Recall
0,Isolation Forest,0.639373,0.301263,0.342201
1,Local Outlier Factor (LOF),0.522469,0.106893,0.111633
2,One-Class SVM,0.566035,0.184191,0.195097
