In [2]:

"""
This Python code performs a customer churn analysis by examining the relationships between various customer attributes and churn. 
It first loads the dataset and identifies categorical and numerical columns, excluding 'customerID' and 'Churn'. 
The 'Churn' column is then transformed into a binary variable. 
For categorical variables, chi-square tests are conducted to determine the association with churn, with p-values stored and sorted for significance.
For numerical variables, correlation coefficients with churn are calculated. 
The results are printed, providing insights into which variables have significant associations or correlations with customer churn, aiding 
in understanding the factors driving churn.
"""
import pandas as pd
from scipy.stats import chi2_contingency

df = pd.read_csv('C:/Users/14157/Downloads/customer_churn_data.csv')
categorical_columns = df.select_dtypes(include=['object']).columns
categorical_columns = [col for col in categorical_columns if col not in ['customerID', 'Churn']]
variable_columns = df.select_dtypes(include=['number']).columns
variable_columns = [col for col in variable_columns if col not in ['customerID', 'Churn']]
df['Churn'] = df['Churn'].apply(lambda x: 1 if x == 'Yes' else 0)

chi2_results = {}
#Chi-Squared testing for objects
for col in categorical_columns:
    contingency_table = pd.crosstab(df[col], df['Churn'])
    chi2, p, _, _ = chi2_contingency(contingency_table)
    chi2_results[col] = p

chi2_results_sorted = sorted(chi2_results.items(), key=lambda x: x[1])
print(chi2_results_sorted)

#Correlation testing for int and float columns
correlation_results = {}
for col in variable_columns:
    cor_value = df['Churn'].corr(df[col])
    correlation_results[col] = cor_value
    
print(correlation_results)
    


    
    

[('PhoneService', 0.17508922975924363), ('Contract', 0.20098665550238531), ('MultipleLines', 0.3844131634885288), ('DeviceProtection', 0.40372396663363774), ('Partner', 0.4526539726065454), ('TechSupport', 0.46989558664691966), ('gender', 0.5460579370078414), ('OnlineBackup', 0.6497131152242142), ('StreamingMovies', 0.6834715476530313), ('StreamingTV', 0.8407687498857044), ('OnlineSecurity', 0.8539744927886259), ('PaymentMethod', 0.8953021208901424), ('InternetService', 0.9058021178645241), ('Dependents', 0.9566020565403854), ('PaperlessBilling', 0.9940527463246381)]
{'SeniorCitizen': -0.004742956805168825, 'tenure': 0.00859972335744912, 'MonthlyCharges': 0.014968711828272602, 'TotalCharges': 0.014263858000488844}
