In [None]:
# ===============================
# 1️⃣ IMPORTS
# ===============================
import pandas as pd
import joblib
import streamlit as st
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression

# ===============================
# 2️⃣ LOAD DATA
# ===============================
DATA_PATH = r"C:\Users\Niteen\Downloads\fraud bank\Churn_Modelling.csv"
data = pd.read_csv(DATA_PATH)

# ===============================
# 3️⃣ SIDEBAR - MODEL SELECTION
# ===============================
st.sidebar.title("Bank Customer Churn Prediction")
model_choice = st.sidebar.selectbox(
    "Select Model",
    ["Logistic Regression", "Random Forest", "Gradient Boosting"]
)

# ===============================
# 4️⃣ PREPROCESSING
# ===============================
X = data.drop(['Exited','RowNumber','CustomerId','Surname'], axis=1)
y = data['Exited']

# Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Columns
cat_cols = [c for c in X_train.columns if X_train[c].dtype=='object']
num_cols = [c for c in X_train.columns if c not in cat_cols]

# Preprocessor
preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(drop='first', sparse_output=False), cat_cols),
    ('num', StandardScaler(), num_cols)
])

# ===============================
# 5️⃣ MODEL PIPELINE
# ===============================
if model_choice == "Logistic Regression":
    classifier = LogisticRegression(class_weight='balanced', max_iter=500, random_state=42)
elif model_choice == "Random Forest":
    classifier = RandomForestClassifier(n_estimators=100, max_depth=5, class_weight='balanced', random_state=42)
else:
    classifier = GradientBoostingClassifier(n_estimators=100, max_depth=3, random_state=42)

pipe = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', classifier)
])

# Train model
pipe.fit(X_train, y_train)
st.success(f"✅ {model_choice} trained successfully!")

# ===============================
# 6️⃣ SIDEBAR - CUSTOMER INPUT
# ===============================
st.sidebar.header("Enter Customer Info")

CreditScore = st.sidebar.number_input("Credit Score", 300, 850, 650)
Geography = st.sidebar.selectbox("Geography", ["France","Spain","Germany"])
Gender = st.sidebar.selectbox("Gender", ["Male","Female"])
Age = st.sidebar.number_input("Age", 18, 100, 35)
Tenure = st.sidebar.number_input("Tenure", 0, 10, 3)
Balance = st.sidebar.number_input("Balance", 0.0, 250000.0, 50000.0)
NumOfProducts = st.sidebar.number_input("Number of Products", 1, 4, 1)
HasCrCard = st.sidebar.selectbox("Has Credit Card", [0,1])
IsActiveMember = st.sidebar.selectbox("Is Active Member", [0,1])
EstimatedSalary = st.sidebar.number_input("Estimated Salary", 0.0, 200000.0, 50000.0)

user_data = pd.DataFrame({
    'CreditScore':[CreditScore],
    'Geography':[Geography],
    'Gender':[Gender],
    'Age':[Age],
    'Tenure':[Tenure],
    'Balance':[Balance],
    'NumOfProducts':[NumOfProducts],
    'HasCrCard':[HasCrCard],
    'IsActiveMember':[IsActiveMember],
    'EstimatedSalary':[EstimatedSalary]
})

# ===============================
# 7️⃣ PREDICTION
# ===============================
if st.button("Predict"):
    pred = pipe.predict(user_data)[0]
    proba = pipe.predict_proba(user_data)[0][1]
    if pred==1:
        st.error(f"⚠️ Customer may churn! Probability: {proba:.2f}")
    else:
        st.success(f"✅ Customer is safe. Probability: {1-proba:.2f}")

# ===============================
# 8️⃣ DATA VISUALIZATION
# ===============================
st.subheader("📊 Churn Data Overview")

# Churn distribution
churn_counts = data['Exited'].value_counts()
fig1 = px.pie(
    names=['Stayed','Churned'],
    values=churn_counts.values,
    hole=0.5,
    color=['Stayed','Churned'],
    color_discrete_map={'Stayed':'blue','Churned':'red'},
    title="Overall Customer Churn Distribution"
)
st.plotly_chart(fig1)

# Churn by Geography
geo_stats = data.groupby('Geography').agg({'Exited':'mean','CustomerId':'count'}).reset_index()
geo_stats.rename(columns={'Exited':'ChurnRate','CustomerId':'Count'}, inplace=True)
fig2 = px.bar(geo_stats, x='Geography', y='Count', text='Count', title="Customers & Churn Rate by Geography")
fig2.add_scatter(x=geo_stats['Geography'], y=geo_stats['ChurnRate'], mode='lines+markers', name='Churn Rate', yaxis='y2')
fig2.update_layout(yaxis2=dict(overlaying='y', side='right', title='Churn Rate'))
st.plotly_chart(fig2)

# ===============================
# Age Group vs Churn
# ===============================
st.subheader("Customer Churn by Age Group")

# Create age bins
data['AgeGroup'] = pd.cut(data['Age'], bins=[20,30,40,50,60,70], labels=['21-30','31-40','41-50','51-60','61-70'])

# Churn counts by age group
age_churn = data.groupby('AgeGroup')['Exited'].value_counts().unstack().fillna(0)
age_churn = age_churn.rename(columns={0:'Stayed',1:'Churned'})

# Stacked bar chart
fig3 = px.bar(
    age_churn,
    x=age_churn.index,
    y=['Stayed','Churned'],
    title="Churn by Age Group",
    labels={'value':'Number of Customers', 'AgeGroup':'Age Group'},
    color_discrete_map={'Stayed':'green','Churned':'red'}
)
st.plotly_chart(fig3)


# Feature Importance (for tree models)
if model_choice in ["Random Forest","Gradient Boosting"]:
    importances = pipe.named_steps['classifier'].feature_importances_
    features = num_cols + list(pipe.named_steps['preprocessor'].named_transformers_['cat'].get_feature_names_out())
    feat_imp = pd.DataFrame({'Feature':features, 'Importance':importances}).sort_values('Importance', ascending=True)
    fig4 = px.bar(feat_imp, x='Importance', y='Feature', orientation='h', text='Importance', title=f"{model_choice} Feature Importance")
    st.plotly_chart(fig4)
