# Machine Learning Models


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.ensemble import (
    RandomForestClassifier,
    AdaBoostClassifier,
    GradientBoostingClassifier,
)
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
df_train = pd.read_csv("dataset/output/train.csv")
df_test = pd.read_csv("dataset/output/test.csv")

In [3]:
# split into X_train, y_train, X_test, y_test
X_train = df_train.drop(columns=["target"])
y_train = df_train["target"]
X_test = df_test.drop(columns=["target"])
y_test = df_test["target"]

## Decision Tree


In [4]:
model = DecisionTreeClassifier(
    criterion="entropy", splitter="best", random_state=42)
model.fit(X_train, y_train)

pred = model.predict(X_test)

print("Accuracy: ", accuracy_score(y_test, pred))
print("Confusion Matrix: \n", confusion_matrix(y_test, pred))
print("Classification Report: \n", classification_report(y_test, pred))

Accuracy:  0.4501897533206831
Confusion Matrix: 
 [[782 583]
 [576 167]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.58      0.57      0.57      1365
           1       0.22      0.22      0.22       743

    accuracy                           0.45      2108
   macro avg       0.40      0.40      0.40      2108
weighted avg       0.45      0.45      0.45      2108



## Logistic Regression


In [5]:
model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix: \n", confusion_matrix(y_test, y_pred))
print("Classification Report: \n", classification_report(y_test, y_pred))

Accuracy: 0.5583491461100569
Confusion Matrix: 
 [[750 615]
 [316 427]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.70      0.55      0.62      1365
           1       0.41      0.57      0.48       743

    accuracy                           0.56      2108
   macro avg       0.56      0.56      0.55      2108
weighted avg       0.60      0.56      0.57      2108



## Random Forest


In [6]:
model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=1)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix: \n", confusion_matrix(y_test, y_pred))
print("Classification Report: \n", classification_report(y_test, y_pred))

Accuracy: 0.6351992409867173
Confusion Matrix: 
 [[1099  266]
 [ 503  240]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.69      0.81      0.74      1365
           1       0.47      0.32      0.38       743

    accuracy                           0.64      2108
   macro avg       0.58      0.56      0.56      2108
weighted avg       0.61      0.64      0.62      2108

