In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score

In [5]:
df=pd.read_csv("Ass.csv")

In [6]:
# Convert 'TotalCharges' to numeric values, coercing errors to NaN
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')

# Fill NaN values with 0
df['TotalCharges'].fillna(0, inplace=True)

# Convert the 'Churn' column to binary values
df['Churn'] = df['Churn'].map({'No': 0, 'Yes': 1})

# Define the lists of categorical and numerical features
categorical = ['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'InternetService',
               'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies',
               'Contract', 'PaperlessBilling', 'PaymentMethod']
numerical = ['tenure', 'MonthlyCharges', 'TotalCharges']

# Split the data into features (X) and target (y)
X = df[categorical + numerical]
y = df['Churn']

# Perform the train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Initialize the StandardScaler
scaler = StandardScaler()

# Scale the numerical features
X_train_numerical = scaler.fit_transform(X_train[numerical])
X_test_numerical = scaler.transform(X_test[numerical])

# Convert the scaled numerical features back to DataFrame
X_train_numerical_df = pd.DataFrame(X_train_numerical, columns=numerical, index=X_train.index)
X_test_numerical_df = pd.DataFrame(X_test_numerical, columns=numerical, index=X_test.index)

# Initialize the OneHotEncoder
encoder = OneHotEncoder(sparse_output=False)

# One-hot encode the categorical features
X_train_categorical = encoder.fit_transform(X_train[categorical])
X_test_categorical = encoder.transform(X_test[categorical])

# Convert the one-hot encoded features back to DataFrame
X_train_categorical_df = pd.DataFrame(X_train_categorical, columns=encoder.get_feature_names_out(categorical), index=X_train.index)
X_test_categorical_df = pd.DataFrame(X_test_categorical, columns=encoder.get_feature_names_out(categorical), index=X_test.index)

# Combine scaled numerical and one-hot encoded categorical features
X_train_combined = pd.concat([X_train_numerical_df, X_train_categorical_df], axis=1)
X_test_combined = pd.concat([X_test_numerical_df, X_test_categorical_df], axis=1)

# Initialize the models with random_state=1
models = {
    'Random Forest': RandomForestClassifier(random_state=1),
    'Extra Trees': ExtraTreesClassifier(random_state=1),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=1),
    'LightGBM': LGBMClassifier(random_state=1)
}

# Train and evaluate the models
results = {}
for name, model in models.items():
    model.fit(X_train_combined, y_train)
    y_pred = model.predict(X_test_combined)
    accuracy = accuracy_score(y_test, y_pred)
    results[name] = accuracy

# Print the results
for name, accuracy in results.items():
    print(f"{name} Accuracy: {accuracy:.4f}")

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['TotalCharges'].fillna(0, inplace=True)


[LightGBM] [Info] Number of positive: 1521, number of negative: 4113
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000318 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 670
[LightGBM] [Info] Number of data points in the train set: 5634, number of used features: 46
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.269968 -> initscore=-0.994785
[LightGBM] [Info] Start training from score -0.994785
Random Forest Accuracy: 0.7913
Extra Trees Accuracy: 0.7672
XGBoost Accuracy: 0.7935
LightGBM Accuracy: 0.8034
