In [3]:
# FRAUD DETECTION USING DECISION TREES

# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score

# Map CSV to DataFrame
df = pd.read_csv("creditcard.csv")

# Declare labels
X = df.drop(["Class"], axis = 1)
y = df["Class"]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

# Create and train the model
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the results
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy:  0.9993153330290369

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56850
           1       0.88      0.76      0.81       112

    accuracy                           1.00     56962
   macro avg       0.94      0.88      0.91     56962
weighted avg       1.00      1.00      1.00     56962



In [2]:
# FRAUD DETECTION USING RANDOM FOREST

# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Map CSV to DataFrame
df = pd.read_csv("creditcard.csv")

# Declare labels
X = df.drop(["Class"], axis = 1)
y = df["Class"]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

# Create and train the model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the results
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy:  0.9995611109160493

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56872
           1       0.95      0.77      0.85        90

    accuracy                           1.00     56962
   macro avg       0.97      0.88      0.92     56962
weighted avg       1.00      1.00      1.00     56962



In [6]:
# FRAUD DETECTION USING LOGISTIC REGRESSION

# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score

# Map CSV to DataFrame
df = pd.read_csv("creditcard.csv")

# Declare labels
X = df.drop(["Class"], axis = 1)
y = df["Class"]

# Scaling X with StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size = 0.2)

# Create and train the model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the results
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy:  0.9990168884519505

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56844
           1       0.86      0.63      0.73       118

    accuracy                           1.00     56962
   macro avg       0.93      0.81      0.86     56962
weighted avg       1.00      1.00      1.00     56962



In [7]:
# FRAUD DETECTION USING KNNs (K-Nearest Neighbors)

# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score

# Map CSV to DataFrame
df = pd.read_csv("creditcard.csv")

# Declare labels
X = df.drop(["Class"], axis = 1)
y = df["Class"]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

# Create and train the model
model = KNeighborsClassifier()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the results
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy:  0.9986306660580738

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56880
           1       1.00      0.05      0.09        82

    accuracy                           1.00     56962
   macro avg       1.00      0.52      0.55     56962
weighted avg       1.00      1.00      1.00     56962



In [3]:
# FRAUD DETECTION USING XGBOOST

# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score

# Map CSV to DataFrame
df = pd.read_csv("creditcard.csv")

# Declare labels
X = df.drop(["Class"], axis = 1)
y = df["Class"]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

# Create and train the model
model = XGBClassifier()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the results
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy:  0.9995962220427653

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56867
           1       0.97      0.78      0.87        95

    accuracy                           1.00     56962
   macro avg       0.99      0.89      0.93     56962
weighted avg       1.00      1.00      1.00     56962



In [1]:
# FRAUD DETECTION USING NAIVE BAYES (GaussianNB)

# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score

# Map CSV to DataFrame
df = pd.read_csv("creditcard.csv")

# Declare labels
X = df.drop(["Class"], axis = 1)
y = df["Class"]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

# Create and train the model
model = GaussianNB()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the results
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy:  0.9927495523331343

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.99      1.00     56870
           1       0.13      0.63      0.22        92

    accuracy                           0.99     56962
   macro avg       0.57      0.81      0.61     56962
weighted avg       1.00      0.99      1.00     56962

