In [1]:
# TASK 1 term deposit prediction
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, f1_score, roc_curve, roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

import shap

df = pd.read_csv("datascience/bank.csv", sep=";")
df.head()
# Convert target to binary
df['y'] = df['y'].map({'yes':1, 'no':0})

# One-hot encoding
df = pd.get_dummies(df, drop_first=True)

# Split
X = df.drop('y', axis=1)
y = df['y']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Scale
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

log_model = LogisticRegression(max_iter=1000)
log_model.fit(X_train, y_train)

y_pred_log = log_model.predict(X_test)
y_prob_log = log_model.predict_proba(X_test)[:,1]

print("Logistic F1:", f1_score(y_test, y_pred_log))
print(confusion_matrix(y_test, y_pred_log))

rf_model = RandomForestClassifier(n_estimators=200, random_state=42)
rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)
y_prob_rf = rf_model.predict_proba(X_test)[:,1]

print("Random Forest F1:", f1_score(y_test, y_pred_rf))
print(confusion_matrix(y_test, y_pred_rf))

fpr, tpr, _ = roc_curve(y_test, y_prob_rf)
auc = roc_auc_score(y_test, y_prob_rf)

plt.figure()
plt.plot(fpr, tpr)
plt.title(f"ROC Curve (AUC = {auc:.2f})")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.show()

explainer = shap.TreeExplainer(rf_model)
shap_values = explainer.shap_values(X_test)

shap.summary_plot(shap_values[1], X_test)

for i in range(5):
    shap.force_plot(
        explainer.expected_value[1],
        shap_values[1][i],
        X_test[i],
        matplotlib=True
    )


ModuleNotFoundError: No module named 'matplotlib'