In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.svm import OneClassSVM
from sklearn.metrics import classification_report, roc_auc_score, precision_recall_curve


In [6]:
df = pd.read_csv("FinancialMarketData.csv", parse_dates=["Data"], index_col="Data")


In [8]:
X = df.drop(columns=["Y"])
y = df["Y"]

In [9]:
X

Unnamed: 0_level_0,XAU BGNL,ECSURPUS,BDIY,CRY,DXY,JPY,GBP,Cl1,VIX,USGG30YR,...,LP01TREU,EMUSTRUU,LF94TRUU,MXUS,MXEU,MXJP,MXBR,MXRU,MXIN,MXCN
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-01-11,283.25,0.077,1388.0,157.2600,100.560,105.86,1.6460,25.77,22.50,6.6710,...,116.4640,230.527,123.7620,1416.12,127.75,990.59,856.76,224.33,217.34,34.30
2000-01-18,287.65,0.043,1405.0,165.0100,101.860,105.47,1.6380,28.85,21.50,6.7470,...,117.2670,231.377,123.7620,1428.79,129.50,993.98,925.22,234.37,227.08,32.74
2000-01-25,287.15,0.135,1368.0,167.2400,102.410,106.04,1.6500,28.28,23.02,6.6340,...,117.9950,232.390,123.7620,1385.93,126.48,974.83,886.93,216.82,233.00,32.46
2000-02-01,282.75,0.191,1311.0,166.8500,104.920,107.85,1.6110,28.22,23.45,6.4230,...,120.5100,231.942,122.3280,1385.31,129.19,1007.12,842.60,201.89,237.48,31.29
2000-02-08,298.40,0.312,1277.0,165.4300,104.220,109.30,1.6110,28.02,21.25,6.2310,...,118.7910,237.812,122.3280,1411.95,134.67,1034.58,945.15,218.00,258.02,31.32
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-03-23,1727.96,0.339,2271.0,184.5022,92.336,108.72,1.3760,57.76,20.30,2.3264,...,427.7972,1249.403,389.8500,3800.77,140.32,1199.57,1671.73,681.21,1695.96,110.83
2021-03-30,1685.56,0.303,2103.0,184.8741,93.297,110.29,1.3719,60.55,19.61,2.3687,...,428.6578,1242.264,385.8145,3835.96,142.76,1206.29,1635.57,693.38,1690.76,108.80
2021-04-06,1743.28,0.440,2092.0,185.8393,92.335,109.88,1.3822,59.33,18.12,2.3230,...,430.6649,1250.256,390.6819,3955.16,144.13,1194.62,1689.62,673.20,1689.48,111.92
2021-04-13,1742.69,0.467,2140.0,188.1368,91.852,109.19,1.3737,60.18,16.65,2.2939,...,430.4087,1252.563,390.3848,4027.05,144.23,1197.06,1680.82,680.23,1672.46,108.40


In [10]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [11]:
split_index = int(len(X_scaled)*0.8)
X_train, X_test = X_scaled[:split_index], X_scaled[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

In [12]:
iso = IsolationForest(random_state=42)
iso.fit(X_train)

In [13]:
iso_scores_test = -iso.score_samples(X_test)
threshold_iso = np.percentile(iso_scores_test, 95)  # example threshold
iso_preds = (iso_scores_test > threshold_iso).astype(int)  # 1=crash, 0=normal


In [14]:
print("Isolation Forest")
print(classification_report(y_test, iso_preds))

Isolation Forest
              precision    recall  f1-score   support

           0       0.86      0.94      0.90       194
           1       0.00      0.00      0.00        29

    accuracy                           0.82       223
   macro avg       0.43      0.47      0.45       223
weighted avg       0.75      0.82      0.78       223



In [15]:
lof = LocalOutlierFactor(n_neighbors=20, contamination=0.05)
lof.fit(X_train)
lof_scores_train = -lof.negative_outlier_factor_
lof_test = LocalOutlierFactor(n_neighbors=20, contamination=0.05)
lof_test.fit(X_test)
lof_scores_test = -lof_test.negative_outlier_factor_
threshold_lof = np.percentile(lof_scores_test, 95)
lof_preds = (lof_scores_test > threshold_lof).astype(int)

print("Local Outlier Factor")
print(classification_report(y_test, lof_preds))

Local Outlier Factor
              precision    recall  f1-score   support

           0       0.92      1.00      0.96       194
           1       1.00      0.41      0.59        29

    accuracy                           0.92       223
   macro avg       0.96      0.71      0.77       223
weighted avg       0.93      0.92      0.91       223



In [16]:
oc_svm = OneClassSVM(kernel='rbf', gamma='scale', nu=0.05)
oc_svm.fit(X_train)
ocsvm_scores_test = -oc_svm.decision_function(X_test)
threshold_svm = np.percentile(ocsvm_scores_test, 95)
ocsvm_preds = (ocsvm_scores_test > threshold_svm).astype(int)

print("One-Class SVM")
print(classification_report(y_test, ocsvm_preds))

One-Class SVM
              precision    recall  f1-score   support

           0       0.88      0.96      0.92       194
           1       0.33      0.14      0.20        29

    accuracy                           0.85       223
   macro avg       0.61      0.55      0.56       223
weighted avg       0.81      0.85      0.82       223

