In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
import pickle

# Load the dataset
data = pd.read_csv('TelcoCustomerChurn.csv')

# Drop the customerID column
rdf = data.drop('customerID', axis=1)

# Create a new feature
rdf['Has_TechSupport_and_OnlineSecurity'] = (rdf["TechSupport"] == 'Yes') & (rdf["OnlineSecurity"] == 'Yes')

# Convert TotalCharges to numeric, coercing errors to NaN
rdf['TotalCharges'] = pd.to_numeric(rdf['TotalCharges'], errors='coerce')

# Drop rows with NaN values
rdf.dropna(inplace=True)

# Select relevant features for training
relevant_features = ['tenure', 'Contract', 'TechSupport', 'OnlineSecurity', 'MonthlyCharges', 'TotalCharges']

# Prepare the feature matrix X and target vector y
X = rdf[relevant_features]
y = rdf["Churn"]

# Encode categorical features
le = LabelEncoder()
for col in X.select_dtypes(include='object').columns:
    X[col] = le.fit_transform(X[col])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the specific hyperparameters found by GridSearchCV
best_params = {'max_depth': 8, 'n_estimators': 200}

# Create and train the random forest model with the best hyperparameters
model = RandomForestClassifier(**best_params)
model.fit(X_train, y_train)

# Save the trained model
with open('churnmodel.pkl', 'wb') as f:
    pickle.dump(model, f)

print("Model saved successfully!")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = le.fit_transform(X[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = le.fit_transform(X[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = le.fit_transform(X[col])


Model saved successfully!


In [4]:
rdf

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,...,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,Has_TechSupport_and_OnlineSecurity
0,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,...,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No,False
1,Male,0,No,No,34,Yes,No,DSL,Yes,No,...,No,No,No,One year,No,Mailed check,56.95,1889.50,No,False
2,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,...,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,False
3,Male,0,No,No,45,No,No phone service,DSL,Yes,No,...,Yes,No,No,One year,No,Bank transfer (automatic),42.30,1840.75,No,True
4,Female,0,No,No,2,Yes,No,Fiber optic,No,No,...,No,No,No,Month-to-month,Yes,Electronic check,70.70,151.65,Yes,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,Male,0,Yes,Yes,24,Yes,Yes,DSL,Yes,No,...,Yes,Yes,Yes,One year,Yes,Mailed check,84.80,1990.50,No,True
7039,Female,0,Yes,Yes,72,Yes,Yes,Fiber optic,No,Yes,...,No,Yes,Yes,One year,Yes,Credit card (automatic),103.20,7362.90,No,False
7040,Female,0,Yes,Yes,11,No,No phone service,DSL,Yes,No,...,No,No,No,Month-to-month,Yes,Electronic check,29.60,346.45,No,False
7041,Male,1,Yes,No,4,Yes,Yes,Fiber optic,No,No,...,No,No,No,Month-to-month,Yes,Mailed check,74.40,306.60,Yes,False


In [5]:
rdf.dtypes

gender                                 object
SeniorCitizen                           int64
Partner                                object
Dependents                             object
tenure                                  int64
PhoneService                           object
MultipleLines                          object
InternetService                        object
OnlineSecurity                         object
OnlineBackup                           object
DeviceProtection                       object
TechSupport                            object
StreamingTV                            object
StreamingMovies                        object
Contract                               object
PaperlessBilling                       object
PaymentMethod                          object
MonthlyCharges                        float64
TotalCharges                          float64
Churn                                  object
Has_TechSupport_and_OnlineSecurity       bool
dtype: object