In [203]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

In [None]:
##Training The Random forest

In [205]:
file_label = pd.read_csv('./5_flood/fraud_label.csv')
features = file_label[['inv_time_diff_ms', 'transaction_count']]
labels = file_label['is_fraud']
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

In [207]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

In [208]:
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_rf))
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.9998158435898223
Confusion Matrix:
[[19507     1]
 [    5 13068]]
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99     19508
           1       0.99      0.98      0.98     13073

    accuracy                           0.99     32581
   macro avg       0.99      0.99      0.99     32581
weighted avg       0.99      0.99      0.99     32581



In [209]:
feature_correlation = pd.Series(rf.feature_importances_, index=features.columns).sort_values(ascending=False)
print(feature_correlation)

inv_time_diff_ms     0.803828
transaction_count    0.196172
dtype: float64


In [210]:
#import joblib

#joblib.dump(rf, 'random_forest_model.pkl')

In [None]:
##Training the Support Vector Machine and optimising the parameters

In [211]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
joblib.dump(scaler, 'scaler.pkl')
X_test = scaler.transform(X_test)

In [188]:
param_grid = {
    'C': [0.1, 1, 10, 100, 1000],  
    'gamma': [1, 0.1, 0.01, 0.001], 
    'kernel': ['rbf']
}

svm_rbf = SVC()

##cross-validation
grid_search = GridSearchCV(estimator=svm_rbf, param_grid=param_grid, cv=5, verbose=2, n_jobs=-1)


grid_search.fit(X_train, y_train)


print("Best Parameters:", grid_search.best_params_)


print("Best Cross-Validation Score:", grid_search.best_score_)



Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best Parameters: {'C': 1000, 'gamma': 1, 'kernel': 'rbf'}
Best Cross-Validation Score: 0.9860116634206616


In [199]:
svm_rbf_para = SVC(C=1000, gamma=1, kernel='rbf', random_state=42)
svm_rbf_para.fit(X_train, y_train)
y_pred = svm_rbf_para.predict(X_test)

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99     19508
           1       0.99      0.98      0.98     13073

    accuracy                           0.99     32581
   macro avg       0.99      0.99      0.99     32581
weighted avg       0.99      0.99      0.99     32581

Confusion Matrix:
[[19336   172]
 [  249 12824]]
Accuracy: 0.9870783585525306


In [None]:
#joblib.dump(svm_rbf_para, 'svm_model_grid.pkl')