In [7]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
import pandas as pd
import warnings
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

warnings.filterwarnings("ignore")

Prepare data

In [19]:
data_frame=pd.read_csv("/content/drive/Shareddrives/NHÓM HỌC TẬP - N3/HKII/RPython/Project /Bài 3/fraud_after_preprocessing_and_cut.csv")
print(data_frame.shape)
X = data_frame.drop(columns='is_fraud')
y = data_frame['is_fraud']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

smote = SMOTE()
X_train, y_train = smote.fit_resample(X_train, y_train)

(12966, 19)


Stratified KFold to validate tunnned model

In [20]:
stratified = StratifiedKFold(n_splits=5, shuffle=True)
classifier = RandomForestClassifier()
cross_validation_result = cross_val_score(classifier, X_train, y_train, cv=stratified)
mean = round(cross_validation_result.mean()*100, 2)
standard_deviation = round(cross_validation_result.std()*100, 2)
print(f'Cross validation result: {cross_validation_result}')
print(f'Mean: {mean} ; Standard deviation: {standard_deviation}')

Cross validation result: [0.99781818 0.99757576 0.99830303 0.99927273 0.99830262]
Mean: 99.83 ; Standard deviation: 0.06


Predict after validate model

In [21]:
classifier.fit(X_train, y_train)
y_predict = classifier.predict(X_test)

accuracy = round(accuracy_score(y_test, y_predict)*100, 2)
precision = round(precision_score(y_test, y_predict)*100, 2)
recall = round(recall_score(y_test, y_predict)*100, 2)
f1 = round(f1_score(y_test, y_predict)*100, 2)
confusion = confusion_matrix(y_test, y_predict)
metric_results = {'Accuracy': accuracy, 'Precision': precision, 'Recall': recall, 'F1': f1}

print('Confusion matrix:\n', confusion)
print('Metric results: ', metric_results)

Confusion matrix:
 [[2570    9]
 [   4   11]]
Metric results:  {'Accuracy': 99.5, 'Precision': 55.0, 'Recall': 73.33, 'F1': 62.86}
