# Churn data prediction

### Load Data and processing

In [27]:
import pandas as pd

from notebook import y_pred

# Sample dataset URL (can use your own too)
url = 'https://raw.githubusercontent.com/treselle-systems/customer_churn_analysis/refs/heads/master/WA_Fn-UseC_-Telco-Customer-Churn.csv'
df = pd.read_csv(url)

# print(df.head())

df.drop('customerID', axis=1, inplace=True)
df.dropna()

# print("before: \n",df['TotalCharges'])

df['TotalCharges'] = pd.to_numeric(df['TotalCharges'].str.strip(), errors='coerce')

# Now drop rows where TotalCharges became NaN
df = df.dropna(subset=['TotalCharges'])

df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})

cat_cols = df.select_dtypes(include='object').columns.tolist()

df = pd.get_dummies(df, columns=cat_cols)



### Train Logistic regression

In [31]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

x = df.drop('Churn', axis=1)
y = df['Churn']

scaler = StandardScaler()
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

X_train_scaled = pd.DataFrame(x_train_scaled, columns=x_train.columns)
X_test_scaled = pd.DataFrame(x_test_scaled, columns=x_test.columns)

model = LogisticRegression(max_iter=1000)
model.fit(x_train_scaled, y_train)

y_pred = model.predict(x_test_scaled)

### Evaluation

In [32]:
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Confusion Matrix:
 [[915 118]
 [181 193]]
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.89      0.86      1033
           1       0.62      0.52      0.56       374

    accuracy                           0.79      1407
   macro avg       0.73      0.70      0.71      1407
weighted avg       0.78      0.79      0.78      1407

