In [4]:
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Loading the dataset
file_path = r'C:\nexford -\capstone\m3 work\teleconnect.csv'
df = pd.read_csv(file_path)

# Drop irrelevant columns
df = df.drop(columns=['customerID'], errors='ignore')

# Handling missing values in 'TotalCharges'
# Converting 'TotalCharges' column to numeric
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')

# Removing rows where 'TotalCharges' is missing (NaN)
df = df.dropna(subset=['TotalCharges'])

# Checking for any other missing values in the dataset
print(df.isnull().sum())  

# Encoding categorical variables
df_encoded = pd.get_dummies(df, drop_first=True)

# Scaling numerical features
scaler = StandardScaler()
df_encoded[['tenure', 'MonthlyCharges', 'TotalCharges']] = scaler.fit_transform(
    df_encoded[['tenure', 'MonthlyCharges', 'TotalCharges']]
)

# Creating a new feature 'TotalSpend' as tenure * MonthlyCharges
df_encoded['TotalSpend'] = df_encoded['tenure'] * df_encoded['MonthlyCharges']

# Saving the cleaned dataset to a valid local directory
cleaned_file_path = r'C:\nexford -\capstone\m3 work\cleaned_telco_data.csv'
df_encoded.to_csv(cleaned_file_path, index=False)

# Displaying the first few rows of the cleaned dataset
print(df_encoded.head())

gender              0
SeniorCitizen       0
Partner             0
Dependents          0
tenure              0
PhoneService        0
MultipleLines       0
InternetService     0
OnlineSecurity      0
OnlineBackup        0
DeviceProtection    0
TechSupport         0
StreamingTV         0
StreamingMovies     0
Contract            0
PaperlessBilling    0
PaymentMethod       0
MonthlyCharges      0
TotalCharges        0
Churn               0
dtype: int64
   SeniorCitizen    tenure  MonthlyCharges  TotalCharges  gender_Male  \
0              0 -1.280248       -1.161694     -0.994194        False   
1              0  0.064303       -0.260878     -0.173740         True   
2              0 -1.239504       -0.363923     -0.959649         True   
3              0  0.512486       -0.747850     -0.195248         True   
4              0 -1.239504        0.196178     -0.940457        False   

   Partner_Yes  Dependents_Yes  PhoneService_Yes  \
0         True           False             False   
1   