# ML. Dataset: 'Credit_Card_Fraud_' from OpenML

This Jupyter notebook is for preliminary training and testing of ML models for fraud detection on dataset 2.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [2]:
df = pd.read_csv("../datasets/Credit_Card_Fraud_.csv")
print(f"rows: {len(df)}")
df.head()

rows: 1000000


Unnamed: 0,distance_from_home,distance_from_last_transaction,ratio_to_median_purchase_price,repeat_retailer,used_chip,used_pin_number,online_order,fraud
0,57.877857,0.31114,1.94594,1.0,1.0,0.0,0.0,0.0
1,10.829943,0.175592,1.294219,1.0,0.0,0.0,0.0,0.0
2,5.091079,0.805153,0.427715,1.0,0.0,0.0,1.0,0.0
3,2.247564,5.600044,0.362663,1.0,1.0,0.0,1.0,0.0
4,44.190936,0.566486,2.222767,1.0,1.0,0.0,1.0,0.0


In [3]:
y = df['fraud']
X = df.drop(columns=['fraud'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=2025)

In [4]:
rf = RandomForestClassifier(n_estimators=10, random_state=2025)
rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)
f1_weighted = f1_score(y_test, y_pred, average='weighted')
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.5f}")
print(f"Recall: {recall:.5f}")
print(f"Precision: {precision:.5f}")
print(f"Weighted F1 Score: {f1_weighted:.5f}")

Accuracy: 0.99997
Recall: 0.99971
Precision: 1.00000
Weighted F1 Score: 0.99997


In [5]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
f1_weighted = f1_score(y_test, y_pred, average='weighted')
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.5f}")
print(f"Recall: {recall:.5f}")
print(f"Precision: {precision:.5f}")
print(f"Weighted F1 Score: {f1_weighted:.5f}")

Accuracy: 0.95734
Recall: 0.58212
Precision: 0.89232
Weighted F1 Score: 0.95320


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
from sklearn.svm import SVC

model = SVC(class_weight='balanced', kernel='linear')
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
f1_weighted = f1_score(y_test, y_pred, average='weighted')
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.5f}")
print(f"Recall: {recall:.5f}")
print(f"Precision: {precision:.5f}")
print(f"Weighted F1 Score: {f1_weighted:.5f}")

In [4]:
from sklearn.naive_bayes import GaussianNB

model = GaussianNB()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
f1_weighted = f1_score(y_test, y_pred, average='weighted')
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.5f}")
print(f"Recall: {recall:.5f}")
print(f"Precision: {precision:.5f}")
print(f"Weighted F1 Score: {f1_weighted:.5f}")

Accuracy: 0.95088
Recall: 0.59333
Precision: 0.79248
Weighted F1 Score: 0.94764


In [5]:
from sklearn.ensemble import GradientBoostingClassifier

model = GradientBoostingClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
f1_weighted = f1_score(y_test, y_pred, average='weighted')
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.5f}")
print(f"Recall: {recall:.5f}")
print(f"Precision: {precision:.5f}")
print(f"Weighted F1 Score: {f1_weighted:.5f}")

Accuracy: 0.99974
Recall: 0.99703
Precision: 1.00000
Weighted F1 Score: 0.99974
