# Task - 4
# Churn Prediction Model

In [71]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [73]:
df=pd.read_csv('Telco-Customer-Churn.csv')

In [74]:
df.columns

Index(['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents',
       'tenure', 'PhoneService', 'MultipleLines', 'InternetService',
       'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
       'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
       'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')

## Data cleaning and preprocessing

In [75]:
df=df.drop(columns=['cutomerID'],errors='ignore')

## Convert target column to numeric (e.g., Churn: Yes/No -> 1/0)

In [76]:
df['Churn']=df['Churn'].map({'Yes':1,'No':0})

## Identify categorical columns

In [78]:
categorical_columns = df.select_dtypes(include=['object']).columns

## OneHotEncode categorical columns

In [81]:
encoder=OneHotEncoder(sparse_output=False,drop='first')
encoded=pd.DataFrame(encoder.fit_transform(df[categorical_columns]),columns=encoder.get_feature_names_out(categorical_columns))

## Combine encoded columns with numeric columns

In [82]:
df=pd.concat([df.select_dtypes(exclude=['object']),encoded],axis=1)

## Fill missing values with median

In [83]:
df.fillna(df.median(),inplace=True)

## Split the dataset

In [84]:
X=df.drop(columns=['Churn'])
y=df['Churn']
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=42)

## Train the model

In [85]:
model=LogisticRegression(max_iter=1000,random_state=42)

In [86]:
model.fit(X_train,y_train)

## Evaluate the Model

In [87]:
y_pred=model.predict(X_test)

## Print evaluation metrics

In [89]:
print("Accuracy:",accuracy_score(y_test,y_pred))
print("Confusion Matrix:\n",confusion_matrix(y_test,y_pred))
print("Classification Report:\n",classification_report(y_test,y_pred))

Accuracy: 0.8130619971604354
Confusion Matrix:
 [[1392  147]
 [ 248  326]]
Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.90      0.88      1539
           1       0.69      0.57      0.62       574

    accuracy                           0.81      2113
   macro avg       0.77      0.74      0.75      2113
weighted avg       0.81      0.81      0.81      2113

