In [None]:
!pip install pandas numpy scikit-learn xgboost imbalanced-learn joblib -q
print("‚úÖ Packages installed")

‚úÖ Packages installed


In [None]:
# Cell 2: Upload bank-full.csv
from google.colab import files
import pandas as pd
import io
print("üìÇ Please upload bank-full.csv")
uploaded = files.upload()
# Load the file
filename = list(uploaded.keys())[0]
df = pd.read_csv(io.BytesIO(uploaded[filename]), sep=';')
print(f"‚úÖ Dataset loaded: {df.shape[0]} rows, {df.shape[1]} columns")
df.head()

üìÇ Please upload bank-full.csv


Saving bank-full.csv to bank-full.csv
‚úÖ Dataset loaded: 41188 rows, 21 columns


Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,56,housemaid,married,basic.4y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
1,57,services,married,high.school,unknown,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
2,37,services,married,high.school,no,yes,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
3,40,admin.,married,basic.6y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
4,56,services,married,high.school,no,no,yes,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no


In [None]:

# Cell 3: Import all libraries
import numpy as np
import joblib
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from imblearn.over_sampling import SMOTE
from sklearn.metrics import (accuracy_score, roc_auc_score, precision_score,
                          recall_score, f1_score, matthews_corrcoef)
print("‚úÖ Libraries imported")


‚úÖ Libraries imported


In [None]:
print(df.columns.tolist())

['age', 'job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'day_of_week', 'duration', 'campaign', 'pdays', 'previous', 'poutcome', 'emp.var.rate', 'cons.price.idx', 'cons.conf.idx', 'euribor3m', 'nr.employed', 'y']


In [None]:
# Cell 4: Preprocessing
print("\nüõ†Ô∏è Preprocessing...")
# Convert target
df['y'] = df['y'].map({'yes': 1, 'no': 0})
# Categorical columns
categorical_cols = ['job', 'marital', 'education', 'default', 'housing',
                   'loan', 'contact', 'month', 'day_of_week', 'poutcome']
# One-hot encoding
df_encoded = pd.get_dummies(df, columns=categorical_cols, drop_first=True)
print(f"‚úÖ After encoding: {df_encoded.shape[1]} features")
# Split features and target
X = df_encoded.drop('y', axis=1)
y = df_encoded['y']
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
   X, y, test_size=0.2, random_state=42, stratify=y
)
print(f"‚úÖ Training: {X_train.shape[0]} samples")
print(f"‚úÖ Testing: {X_test.shape[0]} samples")
# SMOTE for imbalance
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)
print(f"‚úÖ After SMOTE: {X_train_resampled.shape[0]} samples")
# Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_resampled)
X_test_scaled = scaler.transform(X_test)
print("‚úÖ Features scaled")
# Create model folder
os.makedirs('model', exist_ok=True)
# ============================================



üõ†Ô∏è Preprocessing...
‚úÖ After encoding: 54 features
‚úÖ Training: 32950 samples
‚úÖ Testing: 8238 samples
‚úÖ After SMOTE: 58476 samples
‚úÖ Features scaled


In [None]:
# Cell 5: Train all 6 models
print("\n" + "="*60)
print("TRAINING 6 MODELS")
print("="*60)
models = {
   'Logistic_Regression': LogisticRegression(random_state=42, max_iter=1000),
   'Decision_Tree': DecisionTreeClassifier(random_state=42, max_depth=10),
   'kNN': KNeighborsClassifier(n_neighbors=5),
   'Naive_Bayes': GaussianNB(),
   'Random_Forest': RandomForestClassifier(random_state=42, n_estimators=100),
   'XGBoost': xgb.XGBClassifier(random_state=42, n_estimators=100,
                               use_label_encoder=False, eval_metric='logloss')
}
results = []
for name, model in models.items():
   print(f"\n Training {name}...")
   # Train
   if name in ['Logistic_Regression', 'kNN']:
       model.fit(X_train_scaled, y_train_resampled)
       y_pred = model.predict(X_test_scaled)
       y_prob = model.predict_proba(X_test_scaled)[:, 1]
   else:
       model.fit(X_train_resampled, y_train_resampled)
       y_pred = model.predict(X_test)
       y_prob = model.predict_proba(X_test)[:, 1]
   # Calculate ALL 6 metrics
   acc = accuracy_score(y_test, y_pred)
   auc = roc_auc_score(y_test, y_prob)
   precision = precision_score(y_test, y_pred, zero_division=0)
   recall = recall_score(y_test, y_pred, zero_division=0)
   f1 = f1_score(y_test, y_pred, zero_division=0)
   mcc = matthews_corrcoef(y_test, y_pred)
   results.append({
       'Model': name.replace('_', ' '),
       'Accuracy': round(acc, 4),
       'AUC': round(auc, 4),
       'Precision': round(precision, 4),
       'Recall': round(recall, 4),
       'F1': round(f1, 4),
       'MCC': round(mcc, 4)
   })
   # Save model
   joblib.dump(model, f'model/{name}.pkl')
   print(f"   Saved: model/{name}.pkl")
   print(f"   Acc: {acc:.4f}, AUC: {auc:.4f}, F1: {f1:.4f}, MCC: {mcc:.4f}")
# Save scaler
joblib.dump(scaler, 'model/scaler.pkl')
print(f"\n Scaler saved: model/scaler.pkl")
# ============================================



TRAINING 6 MODELS

 Training Logistic_Regression...
   Saved: model/Logistic_Regression.pkl
   Acc: 0.9057, AUC: 0.9108, F1: 0.5793, MCC: 0.5262

 Training Decision_Tree...
   Saved: model/Decision_Tree.pkl
   Acc: 0.8886, AUC: 0.9033, F1: 0.6033, MCC: 0.5562

 Training kNN...
   Saved: model/kNN.pkl
   Acc: 0.8900, AUC: 0.8172, F1: 0.4658, MCC: 0.4074

 Training Naive_Bayes...
   Saved: model/Naive_Bayes.pkl
   Acc: 0.8593, AUC: 0.8269, F1: 0.4371, MCC: 0.3599

 Training Random_Forest...
   Saved: model/Random_Forest.pkl
   Acc: 0.9142, AUC: 0.9456, F1: 0.6221, MCC: 0.5737

 Training XGBoost...


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


   Saved: model/XGBoost.pkl
   Acc: 0.9165, AUC: 0.9498, F1: 0.6313, MCC: 0.5842

 Scaler saved: model/scaler.pkl


In [None]:
# Cell 6: Display results table
print("\n" + "="*80)
print("üìä MODEL COMPARISON TABLE - COPY THIS TO README.md")
print("="*80)
print("\n| ML Model Name | Accuracy | AUC | Precision | Recall | F1 | MCC |")
print("|---------------|----------|-----|-----------|--------|-----|-----|")
for r in results:
   print(f"| {r['Model']} | {r['Accuracy']} | {r['AUC']} | {r['Precision']} | {r['Recall']} | {r['F1']} | {r['MCC']} |")
# Save results to CSV
results_df = pd.DataFrame(results)
results_df.to_csv('model_results.csv', index=False)
print(f"\n‚úÖ Results saved to model_results.csv")
# ============================================


üìä MODEL COMPARISON TABLE - COPY THIS TO README.md

| ML Model Name | Accuracy | AUC | Precision | Recall | F1 | MCC |
|---------------|----------|-----|-----------|--------|-----|-----|
| Logistic Regression | 0.9057 | 0.9108 | 0.5822 | 0.5765 | 0.5793 | 0.5262 |
| Decision Tree | 0.8886 | 0.9033 | 0.5036 | 0.7522 | 0.6033 | 0.5562 |
| kNN | 0.89 | 0.8172 | 0.5143 | 0.4256 | 0.4658 | 0.4074 |
| Naive Bayes | 0.8593 | 0.8269 | 0.3979 | 0.4849 | 0.4371 | 0.3599 |
| Random Forest | 0.9142 | 0.9456 | 0.6172 | 0.6272 | 0.6221 | 0.5737 |
| XGBoost | 0.9165 | 0.9498 | 0.6279 | 0.6347 | 0.6313 | 0.5842 |

‚úÖ Results saved to model_results.csv


In [None]:
# Cell 7: Download all .pkl files to your computer
from google.colab import files
import os
print("\nüì• Downloading model files to your computer...")
# List of files to download
files_to_download = [
   'model/Logistic_Regression.pkl',
   'model/Decision_Tree.pkl',
   'model/kNN.pkl',
   'model/Naive_Bayes.pkl',
   'model/Random_Forest.pkl',
   'model/XGBoost.pkl',
   'model/scaler.pkl',
   'model_results.csv'
]
for file_path in files_to_download:
   if os.path.exists(file_path):
       files.download(file_path)
       print(f"   ‚úÖ Downloaded: {file_path}")
   else:
       print(f"   ‚ùå Not found: {file_path}")
print("\n" + "="*60)
print("‚úÖ TRAINING COMPLETE!")
print("üìÅ All .pkl files downloaded to your Downloads folder")
print("="*60)


üì• Downloading model files to your computer...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

   ‚úÖ Downloaded: model/Logistic_Regression.pkl


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

   ‚úÖ Downloaded: model/Decision_Tree.pkl


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

   ‚úÖ Downloaded: model/kNN.pkl


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

   ‚úÖ Downloaded: model/Naive_Bayes.pkl


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

   ‚úÖ Downloaded: model/Random_Forest.pkl


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

   ‚úÖ Downloaded: model/XGBoost.pkl


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

   ‚úÖ Downloaded: model/scaler.pkl


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

   ‚úÖ Downloaded: model_results.csv

‚úÖ TRAINING COMPLETE!
üìÅ All .pkl files downloaded to your Downloads folder
