In [30]:
import pandas as pd
file_path = r"C:\Users\linto\Code\churn-x\telcoData\Telco_customer_churn.csv"
df = pd.read_csv(file_path)

In [31]:
df.columns

Index(['CustomerID', 'Count', 'Country', 'State', 'City', 'Zip Code',
       'Lat Long', 'Latitude', 'Longitude', 'Gender', 'Senior Citizen',
       'Partner', 'Dependents', 'Tenure Months', 'Phone Service',
       'Multiple Lines', 'Internet Service', 'Online Security',
       'Online Backup', 'Device Protection', 'Tech Support', 'Streaming TV',
       'Streaming Movies', 'Contract', 'Paperless Billing', 'Payment Method',
       'Monthly Charges', 'Total Charges', 'Churn Label', 'Churn Value',
       'Churn Score', 'CLTV', 'Churn Reason'],
      dtype='object')

In [32]:
df.drop(['Churn Label', 'Churn Value', 'Churn Reason', 'CustomerID', 'Count', 'Country', 'State', 'Lat Long', 'City', 'Zip Code', 'Total Charges'], axis=1, inplace=True)

In [33]:
# Select categorical columns
cat_cols = df.select_dtypes(include=["object", "category"]).columns.tolist()
print("Categorical Columns:", cat_cols)

# One-hot encode categorical columns
df2 = pd.get_dummies(df, columns=cat_cols, drop_first=True)

Categorical Columns: ['Gender', 'Senior Citizen', 'Partner', 'Dependents', 'Phone Service', 'Multiple Lines', 'Internet Service', 'Online Security', 'Online Backup', 'Device Protection', 'Tech Support', 'Streaming TV', 'Streaming Movies', 'Contract', 'Paperless Billing', 'Payment Method']


In [34]:
y = df2['Churn Score']
X = df2.drop('Churn Score', axis=1)


In [35]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2, 
    random_state=42 
)

In [36]:
import time
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR

# Define models and their parameter grids
models = {
    "LinearRegression": (
        LinearRegression(),
        {
            "fit_intercept": [True, False],
            "positive": [True, False]
        }
    ),
    "Ridge": (
        Ridge(),
        {
            "alpha": [0.01, 0.1, 1, 10, 100]
        }
    ),
    "Lasso": (
        Lasso(max_iter=5000),
        {
            "alpha": [0.01, 0.1, 1, 10, 100]
        }
    ),
    "RandomForest": (
        RandomForestRegressor(random_state=42),
        {
            "n_estimators": [100, 200],
            "max_depth": [None, 10, 20],
            "min_samples_split": [2, 5]
        }
    ),
    "GradientBoosting": (
        GradientBoostingRegressor(random_state=42),
        {
            "n_estimators": [100, 200],
            "learning_rate": [0.01, 0.1, 0.2],
            "max_depth": [3, 5]
        }
    ),
    "SVR": (
        SVR(),
        {
            "C": [0.1, 1, 10],
            "kernel": ["linear", "rbf"],
            "gamma": ["scale", "auto"]
        }
    )
}

best_model = None
best_score = -np.inf
best_params = None
best_name = None

results = []

overall_start = time.time()

# Loop through models
for name, (model, param_grid) in models.items():
    print(f"\n🔍 Running GridSearchCV for {name}...")
    start = time.time()
    
    grid = GridSearchCV(
        estimator=model,
        param_grid=param_grid,
        cv=5,
        scoring="r2",   # or "neg_mean_squared_error"
        n_jobs=-1,
        verbose=0
    )
    grid.fit(X_train, y_train)
    elapsed = time.time() - start
    
    print(f"⏱ Time Taken: {elapsed:.2f} seconds")
    print(f"Best Params for {name}: {grid.best_params_}")
    print(f"Best CV Score for {name}: {grid.best_score_:.4f}")
    
    results.append({
        "Model": name,
        "Best Params": grid.best_params_,
        "CV Score": grid.best_score_,
        "Time Taken (s)": round(elapsed, 2)
    })
    
    if grid.best_score_ > best_score:
        best_score = grid.best_score_
        best_model = grid.best_estimator_
        best_params = grid.best_params_
        best_name = name

overall_end = time.time()
total_time = overall_end - overall_start

# Show results in a DataFrame
results_df = pd.DataFrame(results)
print("\n📊 Summary of Models:")
print(results_df)

print("\n✅ Final Best Model:")
print("Name:", best_name)
print("Best Params:", best_params)
print("Best CV Score:", best_score)
print("Test Score:", best_model.score(X_test, y_test))
print(f"⏱ Total Time Taken: {total_time:.2f} seconds")



🔍 Running GridSearchCV for LinearRegression...
⏱ Time Taken: 5.29 seconds
Best Params for LinearRegression: {'fit_intercept': True, 'positive': False}
Best CV Score for LinearRegression: 0.1218

🔍 Running GridSearchCV for Ridge...
⏱ Time Taken: 0.08 seconds
Best Params for Ridge: {'alpha': 100}
Best CV Score for Ridge: 0.1225

🔍 Running GridSearchCV for Lasso...
⏱ Time Taken: 0.40 seconds
Best Params for Lasso: {'alpha': 0.1}
Best CV Score for Lasso: 0.1225

🔍 Running GridSearchCV for RandomForest...
⏱ Time Taken: 35.83 seconds
Best Params for RandomForest: {'max_depth': 10, 'min_samples_split': 5, 'n_estimators': 200}
Best CV Score for RandomForest: 0.1183

🔍 Running GridSearchCV for GradientBoosting...
⏱ Time Taken: 49.99 seconds
Best Params for GradientBoosting: {'learning_rate': 0.01, 'max_depth': 5, 'n_estimators': 200}
Best CV Score for GradientBoosting: 0.1268

🔍 Running GridSearchCV for SVR...


KeyboardInterrupt: 