In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Load dataset
df = pd.read_csv("telco_churn.csv")

# Step 2: Drop unnecessary index column if exists
if 'Unnamed: 0' in df.columns:
    df.drop('Unnamed: 0', axis=1, inplace=True)

# Step 3: Convert 'TotalCharges' to numeric
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')

# Step 4: Drop rows where 'Churn' is missing
df = df[df['Churn'].notna()]

# Step 5: Encode target variable
df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})

# Step 6: Encode binary categorical columns
binary_cols = ['Partner', 'Dependents', 'PhoneService', 'PaperlessBilling', 'gender', 'SeniorCitizen']
for col in binary_cols:
    df[col] = df[col].map({'Yes': 1, 'No': 0, 'Male': 1, 'Female': 0, True: 1, False: 0})

# Step 7: One-hot encode the remaining categorical columns
df = pd.get_dummies(df, drop_first=True)

# Step 8: Separate features and target
X = df.drop('Churn', axis=1)
y = df['Churn']

# Step 9: Impute missing values
for col in X.select_dtypes(include=['float64', 'int64']).columns:
    X[col].fillna(X[col].mean(), inplace=True)

for col in X.select_dtypes(include='object').columns:
    X[col].fillna(X[col].mode()[0], inplace=True)

# Step 10: Reset index (to ensure alignment)
X = X.reset_index(drop=True)
y = y.reset_index(drop=True)

# Step 11: Feature scaling
scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

# Step 12: Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

# Step 13: Train Logistic Regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Step 14: Predictions
y_pred = model.predict(X_test)

# Step 15: Evaluation
print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\n📊 Classification Report:\n", classification_report(y_test, y_pred))

# Step 16: Confusion matrix
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

# Step 17: Show shape and head of cleaned data
print("\nCleaned Data Shape:", df.shape)
print("\nCleaned Data Head:\n", df.head())


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X[col].fillna(X[col].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


ValueError: Input y contains NaN.

In [2]:
df

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,PaperlessBilling,MonthlyCharges,TotalCharges,Churn,...,StreamingTV_Yes,StreamingMovies_No,StreamingMovies_No internet service,StreamingMovies_True,StreamingMovies_Yes,Contract_One year,Contract_Two year,PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,0,,,,1,,,29.850000,29.850000,,...,False,False,False,False,False,False,False,False,True,False
1,1,,,,34,,,56.950001,1889.500000,,...,False,False,False,False,False,True,False,False,False,True
2,1,,,,2,,,53.849998,108.150002,,...,False,False,False,False,False,False,False,False,False,True
3,1,,,,45,,,42.299999,1840.750000,,...,False,False,False,False,False,True,False,False,False,False
4,0,,,,2,,,70.699997,151.649994,,...,False,False,False,False,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5038,1,,1.0,1.0,24,1.0,1.0,84.800000,1990.500000,0.0,...,True,False,False,False,True,True,False,False,False,True
5039,0,,1.0,1.0,72,1.0,1.0,103.200000,7362.900000,0.0,...,True,False,False,False,True,True,False,True,False,False
5040,0,,1.0,1.0,11,0.0,1.0,29.600000,346.450000,0.0,...,False,True,False,False,False,False,False,False,True,False
5041,1,,1.0,0.0,4,1.0,1.0,74.400000,306.600000,1.0,...,False,True,False,False,False,False,False,False,False,True
