In [32]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [33]:
data = pd.read_csv("/content/churn-bigml-20.csv")

print(data.head())
print(data.info())


  State  Account length  Area code International plan Voice mail plan  \
0    LA             117        408                 No              No   
1    IN              65        415                 No              No   
2    NY             161        415                 No              No   
3    SC             111        415                 No              No   
4    HI              49        510                 No              No   

   Number vmail messages  Total day minutes  Total day calls  \
0                      0              184.5               97   
1                      0              129.1              137   
2                      0              332.9               67   
3                      0              110.4              103   
4                      0              119.3              117   

   Total day charge  Total eve minutes  Total eve calls  Total eve charge  \
0             31.37              351.6               80             29.89   
1             21.95   

In [34]:
print("Before encoding:", data["Churn"].unique())
data["Churn"] = data["Churn"].astype(int)
print("After encoding:", data["Churn"].unique())



Before encoding: [False  True]
After encoding: [0 1]


In [35]:
cat_cols = data.select_dtypes(include="object").columns
num_cols = data.select_dtypes(include=["int64", "float64"]).columns

data[cat_cols] = data[cat_cols].apply(lambda col: col.fillna(col.mode()[0]))

data[num_cols] = data[num_cols].apply(lambda col: col.fillna(col.mean()))

print("Missing values after filling:\n", data.isnull().sum())


Missing values after filling:
 State                     0
Account length            0
Area code                 0
International plan        0
Voice mail plan           0
Number vmail messages     0
Total day minutes         0
Total day calls           0
Total day charge          0
Total eve minutes         0
Total eve calls           0
Total eve charge          0
Total night minutes       0
Total night calls         0
Total night charge        0
Total intl minutes        0
Total intl calls          0
Total intl charge         0
Customer service calls    0
Churn                     0
dtype: int64


In [36]:
data = pd.get_dummies(data, drop_first=True)

print("Shape after encoding:", data.shape)


Shape after encoding: (667, 69)


In [37]:
X = data.drop(columns="Churn")
y = data["Churn"]

print("Features shape:", X.shape)
print("Target shape:", y.shape)


Features shape: (667, 68)
Target shape: (667,)


In [38]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("Training set shape:", X_train.shape)
print("Testing set shape:", X_test.shape)


Training set shape: (533, 68)
Testing set shape: (134, 68)


In [39]:
scaler = StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train),
                       columns=X_train.columns,
                       index=X_train.index)
X_test = pd.DataFrame(scaler.transform(X_test),
                      columns=X_test.columns,
                      index=X_test.index)

print("First 5 rows of scaled training set:\n", X_train.head())


First 5 rows of scaled training set:
      Account length  Area code  Number vmail messages  Total day minutes  \
234       -1.179904   1.738958              -0.595854           0.150424   
398       -0.080576  -0.684404              -0.595854          -1.972756   
278       -0.911179  -0.684404              -0.595854          -1.670984   
61         0.432443  -0.684404              -0.595854          -0.183680   
145        1.776066  -0.518095              -0.595854           0.764745   

     Total day calls  Total day charge  Total eve minutes  Total eve calls  \
234        -0.060953          0.150366          -1.708028        -0.069068   
398        -0.354521         -1.972374           0.228910         0.088029   
278        -1.235225         -1.671238           1.154491         0.559321   
61          0.330470         -0.183524           2.919878        -1.692405   
145        -1.235225          0.764260           0.204973         1.606635   

     Total eve charge  Total night m