# Machine Learning Models


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.ensemble import (
    RandomForestClassifier,
    AdaBoostClassifier,
    GradientBoostingClassifier,
)
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
df_train = pd.read_csv("dataset/train.csv")
df_test = pd.read_csv("dataset/test.csv")

In [3]:
# split into X_train, y_train, X_test, y_test
X_train = df_train.drop(columns=["target"])
y_train = df_train["target"]
X_test = df_test.drop(columns=["target"])
y_test = df_test["target"]

## Decision Tree


In [4]:
model = DecisionTreeClassifier(
    criterion="entropy", splitter="best", random_state=42)
model.fit(X_train, y_train)

pred = model.predict(X_test)

print("Accuracy: ", accuracy_score(y_test, pred))
print("Confusion Matrix: \n", confusion_matrix(y_test, pred))
print("Classification Report: \n", classification_report(y_test, pred))

Accuracy:  0.4411764705882353
Confusion Matrix: 
 [[763 602]
 [576 167]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.57      0.56      0.56      1365
           1       0.22      0.22      0.22       743

    accuracy                           0.44      2108
   macro avg       0.39      0.39      0.39      2108
weighted avg       0.45      0.44      0.44      2108



## Logistic Regression


In [5]:
model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix: \n", confusion_matrix(y_test, y_pred))
print("Classification Report: \n", classification_report(y_test, y_pred))

Accuracy: 0.5744781783681214
Confusion Matrix: 
 [[749 616]
 [281 462]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.73      0.55      0.63      1365
           1       0.43      0.62      0.51       743

    accuracy                           0.57      2108
   macro avg       0.58      0.59      0.57      2108
weighted avg       0.62      0.57      0.58      2108

