In [5]:
!pip install xgboost
import pandas as pd
import numpy as np
import pickle
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import (accuracy_score, roc_auc_score, precision_score, 
                             recall_score, f1_score, matthews_corrcoef)

# 1. Load Dataset
# Ensure bank.csv is in the same directory
df = pd.read_csv('bank.csv', sep=None, engine='python') 

# Clean column names to remove any accidental quotes or spaces
df.columns = df.columns.str.replace('"', '').str.strip()

# 2. Preprocessing
# Encode categorical (string/bool) columns
le = LabelEncoder()
for col in df.columns:
    if df[col].dtype == 'object' or df[col].dtype == 'bool':
        df[col] = le.fit_transform(df[col])

# Assuming the target variable is named 'deposit' (common in this dataset)
X = df.drop('deposit', axis=1)
y = df['deposit']

# Split data (80% Train, 20% Test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features (Required for Logistic Regression, kNN, and Naive Bayes)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 3. Initialize Models
models = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "kNN": KNeighborsClassifier(),
    "Naive Bayes": GaussianNB(),
    "Random Forest": RandomForestClassifier(n_estimators=100),
    "XGBoost": XGBClassifier(eval_metric='logloss')
}

# 4. Train and Evaluate
results = []
os.makedirs('model', exist_ok=True) # Create folder to save models

print(f"{'Model':<20} | {'Acc':<6} | {'AUC':<6} | {'Prec':<6} | {'Rec':<6} | {'F1':<6} | {'MCC':<6}")
print("-" * 75)

for name, model in models.items():
    # Use scaled data for distance-based models, raw for trees
    X_tr = X_train_scaled if name in ["Logistic Regression", "kNN", "Naive Bayes"] else X_train
    X_te = X_test_scaled if name in ["Logistic Regression", "kNN", "Naive Bayes"] else X_test
    
    # Train
    model.fit(X_tr, y_train)
    
    # Predict
    y_pred = model.predict(X_te)
    y_proba = model.predict_proba(X_te)[:, 1] if hasattr(model, "predict_proba") else y_pred
    
    # Calculate Metrics
    metrics = {
        "Model": name,
        "Accuracy": accuracy_score(y_test, y_pred),
        "AUC": roc_auc_score(y_test, y_proba),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1": f1_score(y_test, y_pred),
        "MCC": matthews_corrcoef(y_test, y_pred)
    }
    results.append(metrics)
    
    # Save Model for Streamlit app
    with open(f'model/{name.lower().replace(" ", "_")}.pkl', 'wb') as f:
        pickle.dump(model, f)
    
    # Print formatted row for your README table
    print(f"{name:<20} | {metrics['Accuracy']:.3f} | {metrics['AUC']:.3f} | {metrics['Precision']:.3f} | {metrics['Recall']:.3f} | {metrics['F1']:.3f} | {metrics['MCC']:.3f}")

# Save the scaler as well (crucial for the Streamlit app)
with open('model/scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

# 5. Create Comparison DataFrame
results_df = pd.DataFrame(results)
results_df.to_csv('model_comparison.csv', index=False)

Model                | Acc    | AUC    | Prec   | Rec    | F1     | MCC   
---------------------------------------------------------------------------
Logistic Regression  | 0.790 | 0.867 | 0.793 | 0.758 | 0.775 | 0.579
Decision Tree        | 0.769 | 0.768 | 0.766 | 0.743 | 0.755 | 0.537
kNN                  | 0.773 | 0.845 | 0.788 | 0.720 | 0.752 | 0.546
Naive Bayes          | 0.747 | 0.811 | 0.704 | 0.810 | 0.753 | 0.500
Random Forest        | 0.828 | 0.906 | 0.803 | 0.849 | 0.826 | 0.658
XGBoost              | 0.842 | 0.914 | 0.818 | 0.862 | 0.839 | 0.686


In [7]:
streamlit run app.py

SyntaxError: invalid syntax (3737097518.py, line 1)

In [6]:
import streamlit as st
import pandas as pd
import pickle
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt

st.title("Banking Campaign Classification App")

# Step 1: Dataset Upload
uploaded_file = st.file_uploader("Upload your test CSV data", type=["csv"])

if uploaded_file is not None:
    df = pd.read_csv(uploaded_file, sep=None, engine='python')
    st.write("Data Preview:", df.head())

    # Step 2: Model Selection Dropdown
    model_option = st.selectbox(
        'Which model would you like to use?',
        ('logistic_regression', 'decision_tree', 'knn', 'naive_bayes', 'random_forest', 'xgboost')
    )

    if st.button('Run Prediction and Evaluation'):
        # Load the selected model and the scaler
        with open(f'model/{model_option}.pkl', 'rb') as f:
            model = pickle.load(f)
        with open('model/scaler.pkl', 'rb') as f:
            scaler = pickle.load(f)

        # Basic Preprocessing (Match the training steps)
        # Note: In a real app, you'd handle LabelEncoding here too
        # For the assignment, we assume the uploaded test data matches the format
        
        # Display Metrics
        st.subheader(f"Evaluation Metrics for {model_option}")
        # (Add logic to display accuracy, F1, etc. based on your results_df)

        # Step 3: Confusion Matrix
        st.subheader("Confusion Matrix")
        # Generate and plot confusion matrix using matplotlib/seaborn

2026-02-17 00:23:29.071 
  command:

    streamlit run C:\Users\Administrator\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
