## Importing Libraries :

In [None]:
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt
%matplotlib inline
pd.set_option("display.max_columns", None)

## Reading Data :

In [None]:
data_df = pd.read_csv('../input/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv')

In [None]:
data_df.head(5)

In [None]:
data_df.info()

In [None]:
data_df.nunique()

## See some insights :

In [None]:
sb.catplot(data=data_df, kind='count', x='Churn', order=['Yes','No'], col='gender')

In [None]:
sb.catplot(data=data_df, kind='count', x='Churn', order=['Yes','No'], col='InternetService')
sb.catplot(data=data_df, kind='count', x='Churn', order=['Yes','No'], col='OnlineSecurity')
sb.catplot(data=data_df, kind='count', x='Churn', order=['Yes','No'], col='TechSupport')
sb.catplot(data=data_df, kind='count', x='Churn', order=['Yes','No'], col='Contract')

In [None]:
sb.catplot(data=data_df, kind='count', x='Churn', order=['Yes','No'], col='PaymentMethod')

### We thought that gender would make any difference but it seems to be not as we thought.
### And as we see there are some reasons for churn :
1. Internet Service = Fiber optic ---- (which means FI-OP service need some care)
2. Online Security = No ---- (Online Security needs to be activated users)
3. Tech Support = No ---- (Tech Support service need some care)
4. Contract = Month to month ---- (We need to extend contract per 6 months for example)
5. Payment Method = Electronic Check ---- (Maybe because it's an easy way!)

## Getting dummies of non numirical columns :

In [None]:
dum_columns = data_df.columns[data_df.nunique().values<5]

In [None]:
dum_data = pd.get_dummies(data_df, columns=dum_columns)
dum_data.head()

In [None]:
plt.figure(figsize=(18,14))
sb.heatmap(dum_data.corr())
plt.tight_layout
# It seems we have many columns to deal with

## Let's see which columns correlated churn :

In [None]:
def find_corr_with (Data,column,num_of_corr_cols) :

    corr = Data.corr()[column].reindex(Data.corr().drop(column,axis=1).columns)
    corr = corr.sort_values()
    corr = corr[::-1]
    
    return corr.head(num_of_corr_cols)

In [None]:
corr_cols = find_corr_with(dum_data, 'Churn_Yes', 10)
corr_cols

In [None]:
corr_data = dum_data[corr_cols.index]
corr_data

## Split Data to enter the Machine Learning model :

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = corr_data
y = dum_data['Churn_Yes']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

## Machine Learning Model :

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
model = LogisticRegression()

In [None]:
model.fit(X_train,y_train)

In [None]:
predictions = model.predict(X_test)

In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
print('Training Score : ', model.score(X_train,y_train))
print('Test Score : ', model.score(X_test,y_test))
print('Error :', np.sqrt(mean_squared_error(predictions,y_test)))