In [None]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.metrics import recall_score
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

X_train = pd.read_csv(r'../../data/train_data.csv')
Y_train = X_train["Category"]

X_test = pd.read_csv(r'../../data/test_data.csv')
Y_test = X_test["Category"]
X_train= X_train.iloc[:, [i for i in range(13,28)]+ [7,8,9]]
X_test= X_test.iloc[:, [i for i in range(13,28)]+ [7,8,9]]

In [None]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train, Y_train)

In [None]:
y_pred = model.predict(X_test)
y_pred_prob = model.predict_proba(X_test)[:, 1]

In [None]:
accuracy = accuracy_score(Y_test, y_pred)
report = classification_report(Y_test, y_pred)
conf_matrix = confusion_matrix(Y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)
print("Confusion Matrix:\n", conf_matrix)

In [None]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(Y_test)  # y is your original labels

In [None]:
fpr, tpr, thresholds = roc_curve(y_encoded, y_pred_prob)
auc = roc_auc_score(y_encoded, y_pred_prob)

In [None]:
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, linewidth=2, label=f'AUC = {auc:.2f}')
plt.plot([0, 1], [0, 1], 'k--', linewidth=2)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curve')
plt.legend(loc="lower right")
plt.show()
