In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
# Load and Explore the Data

# Load the dataset
url = "https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/master/data/Telco-Customer-Churn.csv"
df = pd.read_csv(url)

# Explore the data
print(df.head())
print(df.info())

   customerID  gender  SeniorCitizen Partner Dependents  tenure PhoneService  \
0  7590-VHVEG  Female              0     Yes         No       1           No   
1  5575-GNVDE    Male              0      No         No      34          Yes   
2  3668-QPYBK    Male              0      No         No       2          Yes   
3  7795-CFOCW    Male              0      No         No      45           No   
4  9237-HQITU  Female              0      No         No       2          Yes   

      MultipleLines InternetService OnlineSecurity  ... DeviceProtection  \
0  No phone service             DSL             No  ...               No   
1                No             DSL            Yes  ...              Yes   
2                No             DSL            Yes  ...               No   
3  No phone service             DSL            Yes  ...              Yes   
4                No     Fiber optic             No  ...               No   

  TechSupport StreamingTV StreamingMovies        Contract Pape

In [3]:
# Encode categorical features
label_enc = LabelEncoder()
df['gender'] = label_enc.fit_transform(df['gender'])
df['Partner'] = label_enc.fit_transform(df['Partner'])
df['Dependents'] = label_enc.fit_transform(df['Dependents'])
df['PhoneService'] = label_enc.fit_transform(df['PhoneService'])
df['PaperlessBilling'] = label_enc.fit_transform(df['PaperlessBilling'])
df['Churn'] = label_enc.fit_transform(df['Churn'])

In [4]:
# Select relevant features
features = ['tenure', 'MonthlyCharges', 'Contract', 'gender', 'SeniorCitizen', 'Partner', 'Dependents', 'PhoneService', 'PaperlessBilling']
X = pd.get_dummies(df[features])  # Get dummies to handle categorical variables
y = df['Churn']

In [5]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [6]:
# Train a Model
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [7]:
# Initialize the model
model = LogisticRegression(max_iter=1000)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.7983909133932797
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.90      0.87      1539
           1       0.67      0.51      0.58       574

    accuracy                           0.80      2113
   macro avg       0.75      0.71      0.72      2113
weighted avg       0.79      0.80      0.79      2113



In [None]:
# Building a Neural Network for Chrun Prediction

In [None]:
"""
	•	Scikit-Learn: Best for quick, straightforward models like Naive Bayes, which work well with text data.
	•	Keras: Allows you to easily build and train neural networks with minimal code, suitable for more complex tasks like deep learning.
	•	TensorFlow: Provides maximum control and flexibility, allowing for custom model architectures and advanced features.
"""