## Import Python Packages and Environment Setup

In [None]:
!pip install lifelines==0.24.9

In [None]:
import lifelines
print('The lifelines version is {}.'.format(lifelines.__version__)) 
# Must be version 0.24.9 in order to execute all the cells in this Notebook

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#from lifelines import *
from lifelines import KaplanMeierFitter
from lifelines.utils import median_survival_times
from lifelines import CoxPHFitter
from sklearn.model_selection import train_test_split

In [None]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 500)

## Importing and Analyzing the Dataset

In [None]:
df = pd.read_csv("/kaggle/input/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv")
df.head()

In [None]:
df.info()

# Data Visualization

In [None]:
sns.set(style="darkgrid")
sns.set(rc={'figure.figsize':(7,5)})

sns.countplot(x="gender", 
              data = df, 
              color = 'gray'
             ).set_title('Gender Distribution Among the Customers')

In [None]:
sns.countplot(x = 'SeniorCitizen', hue = 'Partner', 
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Distribution of Senior Citizens grouped by Partners')

In [None]:
sns.countplot(x = 'Dependents',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Customers with Dependents')

In [None]:
sns.countplot(x = 'Dependents', hue = 'Partner', 
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Dependents and Partner Distribution among the Customers')

In [None]:
sns.countplot(x = 'Dependents', hue = 'SeniorCitizen', 
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Dependents and Senior Citizen Distribution')

In [None]:
sns.countplot(x = 'PhoneService', 
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Customers with Phone Service')

In [None]:
sns.countplot(x = 'MultipleLines',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Gender Distribution Among the Three Classes of Population')

In [None]:
sns.countplot(x = 'PhoneService', hue = 'MultipleLines',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Phone Service and Multiple Lines')

In [None]:
sns.countplot(x = 'InternetService', 
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Internet Service')

In [None]:
sns.countplot(x = 'OnlineSecurity', 
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Online Security')

In [None]:
sns.countplot(x = 'InternetService', hue = 'OnlineSecurity',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Internet Service and Online Security')

In [None]:
sns.countplot(x = 'InternetService', hue = 'OnlineBackup',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Internet Service and Online Backup')

In [None]:
sns.countplot(x = 'InternetService', hue = 'DeviceProtection',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Internet Service and Device Protection')

In [None]:
sns.countplot(x = 'InternetService', hue = 'TechSupport',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Internet Service and Tech Support')

In [None]:
sns.countplot(x = 'InternetService', hue = 'StreamingTV',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Internet Service and Streaming TV')

In [None]:
sns.countplot(x = 'InternetService', hue = 'StreamingMovies',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Internet Service and Streaming Movies')

In [None]:
sns.countplot(x = 'Contract',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Types of Contracts among the Customers')

In [None]:
sns.countplot(x = 'Contract', hue = 'PaperlessBilling',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Contract Type and Paperless Billing')

In [None]:
sns.countplot(x = 'Contract', hue = 'PaymentMethod',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Contract Type and Payment Method')

In [None]:
sns.countplot(x = 'Churn', hue = 'gender',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Churning Customers grouped by Gender')

In [None]:
sns.countplot(x = 'Churn', hue = 'SeniorCitizen',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Churning Customers grouped by Senior or Normal Citizens')

In [None]:
sns.countplot(x = 'Churn', hue = 'Partner',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Churning Customers grouped by Customers with or without Partners')

In [None]:
sns.countplot(x = 'Churn', hue = 'Dependents',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Churning Customers grouped by Dependents or No Dependents')

In [None]:
sns.countplot(x = 'Churn', hue = 'PhoneService',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Churning Customers grouped by Customers with or without Phone Service')

In [None]:
sns.countplot(x = 'Churn', hue = 'MultipleLines',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Churning Customers grouped by Customers with or without Multiple Phone Lines')

In [None]:
sns.countplot(x = 'Churn', hue = 'InternetService',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Churning Customers grouped by type of Internet Service')

In [None]:
sns.countplot(x = 'Churn', hue = 'OnlineSecurity',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Churning Customers grouped by the type of Online Security')

In [None]:
sns.countplot(x = 'Churn', hue = 'OnlineBackup',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Churning Customers grouped by Customers with or without Online Backup')

In [None]:
sns.countplot(x = 'Churn', hue = 'DeviceProtection',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Churning Customers grouped by Customers with or without Device Protection')

In [None]:
sns.countplot(x = 'Churn', hue = 'TechSupport',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Churning Customers grouped by Customers with or without Tech Support')

In [None]:
sns.countplot(x = 'Churn', hue = 'StreamingTV',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Churning Customers grouped by Customers with or without Streaming TV')

In [None]:
sns.countplot(x = 'Churn', hue = 'StreamingMovies',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Churning Customers grouped by Customers with or without Streaming Movies')

In [None]:
sns.countplot(x = 'Churn', hue = 'Contract',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Churning Customers grouped by the Contract Type')

In [None]:
sns.countplot(x = 'Churn', hue = 'PaperlessBilling',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Churning Customers grouped by Customers with or without Paperless Billing')

In [None]:
sns.countplot(x = 'Churn', hue = 'PaymentMethod',
              data = df, color = 'gray', 
              edgecolor=sns.color_palette('gray', 1)
             ).set_title('Churning Customers grouped by the type of Payment Method')

In [None]:
# Checking for Null Values
# In this case, there are no Null Values since we cannot see any lines in the figure below
sns.heatmap(df.isnull(), yticklabels=False, cbar=False, cmap="Greens")

In [None]:
corr = df.corr()
ax = sns.heatmap(
    corr, 
    vmin=-1, vmax=1, center=0,
    cmap=sns.diverging_palette(20, 220, n=200)
)
ax.set_xticklabels(
    ax.get_xticklabels(),
    rotation=45,
    horizontalalignment='right'
);

## Data Analysis

In [None]:
df[['Churn', 'gender', 'customerID']].groupby(['gender', 'Churn']).count()

In [None]:
df[['Churn', 'SeniorCitizen', 'customerID']].groupby(['SeniorCitizen', 'Churn']).count()

In [None]:
df[['gender','SeniorCitizen','Churn', 'customerID']].groupby(['gender','SeniorCitizen', 'Churn']).count()

## Data Preprocessing

We are creating a new column that has the value 1 if the customer is a Female. Otherwise, it will have a value of 0 in case if the customer is a Male.

In [None]:
print(df.gender.value_counts())
df['Female'] = df['gender'] == 'Female'
df["Female"] = df["Female"].astype(int)
df.drop('gender', axis = 1, inplace = True)

In [None]:
print(df.Female.value_counts())

We are modifying the Partner column to be 1 if the customer has a partner and 0 otherwise.

In [None]:
print(df.Partner.value_counts())
df['Partner'] = df['Partner'].map({'Yes': 1, 'No': 0})

In [None]:
print(df.Partner.value_counts())

The Senior Citizen column is already in the desired format, so we do not need to modify it.

In [None]:
df.SeniorCitizen.value_counts()

We are modifying the Dependents column to be 1 if the customer has Dependents and 0 otherwise.

In [None]:
print(df.Dependents.value_counts())
df['Dependents'] = df['Dependents'].map({'Yes': 1, 'No': 0})

In [None]:
print(df.Dependents.value_counts())

We are modifying the Phone Service column to be 1 if the customer has Phone Service Available and 0 otherwise.

In [None]:
print(df.PhoneService.value_counts())
df['PhoneService'] = df['PhoneService'].map({'Yes': 1, 'No': 0})

In [None]:
print(df.PhoneService.value_counts())

We are modifying the Multiple Lines column to be 1 if the customer has Multiple Lines and 0 otherwise.

In [None]:
print(df.MultipleLines.value_counts())
df['MultipleLines'] = df['MultipleLines'].map({'Yes' : 1, 'No' : 0, 'No phone service' : 0})

In [None]:
print(df.MultipleLines.value_counts())

We will One-Hot Encode the Internet Services Column Later.

In [None]:
df.InternetService.value_counts()

We are modifying the Online Security column to be 1 if the customer has Oneline Security and 0 otherwise.

In [None]:
print(df.OnlineSecurity.value_counts())
df['OnlineSecurity'] = df['OnlineSecurity'].map({'Yes' : 1, 'No' : 0, 'No internet service' : 0})

In [None]:
print(df.OnlineSecurity.value_counts())

We are modifying the Online Backup column to be 1 if the customer has Oneline Backup and 0 otherwise.

In [None]:
print(df.OnlineBackup.value_counts())
df['OnlineBackup'] = df['OnlineBackup'].map({'Yes' : 1, 'No' : 0, 'No internet service' : 0})

In [None]:
print(df.OnlineBackup.value_counts())

We are modifying the Device Protection column to be 1 if the customer has Device Protection and 0 otherwise.

In [None]:
print(df.DeviceProtection.value_counts())
df['DeviceProtection'] = df['DeviceProtection'].map({'Yes' : 1, 'No' : 0, 'No internet service' : 0})

In [None]:
print(df.DeviceProtection.value_counts())

We are modifying the Tech Support column to be 1 if the customer has Tech Support and 0 otherwise.

In [None]:
print(df.TechSupport.value_counts())
df['TechSupport'] = df['TechSupport'].map({'Yes' : 1, 'No' : 0, 'No internet service' : 0})

In [None]:
print(df.TechSupport.value_counts())

We are modifying the Streaming TV column to be 1 if the customer has Streaming TV and 0 otherwise.

In [None]:
print(df.StreamingTV.value_counts())
df['StreamingTV'] = df['StreamingTV'].map({'Yes' : 1, 'No' : 0, 'No internet service' : 0})

In [None]:
print(df.StreamingTV.value_counts())

We are modifying the Streaming Movies column to be 1 if the customer has Streaming Movies and 0 otherwise.

In [None]:
print(df.StreamingMovies.value_counts())
df['StreamingMovies'] = df['StreamingMovies'].map({'Yes' : 1, 'No' : 0, 'No internet service' : 0})

In [None]:
print(df.StreamingMovies.value_counts())

We will One-Hot Encode the Contract Column later.

In [None]:
df.Contract.value_counts()

We are modifying the Paperless Billing column to be 1 if the customer has Paperless Billing and 0 otherwise.

In [None]:
print(df.PaperlessBilling.value_counts())
df['PaperlessBilling'] = df['PaperlessBilling'].map({'Yes': 1, 'No': 0})

In [None]:
print(df.PaperlessBilling.value_counts())

We will One-Hot Encode the Payment Method Column later.

In [None]:
df.PaymentMethod.value_counts()

We are modifying the Churn column to be 1 if the customer has Churned and 0 otherwise.

In [None]:
print(df.Churn.value_counts())
df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})

In [None]:
print(df.Churn.value_counts())

Taking Care of a few missing values in the Total Charges column. 

In [None]:
df['TotalCharges'] = df['TotalCharges'].replace(" ", np.nan).astype(float)
df['TotalCharges'].fillna(df['TotalCharges'].mean(), inplace = True)

In [None]:
df.info()

In [None]:
df.head()

## Training multiple Kaplan Meier Fitter Models

In [None]:
T = df['tenure']
E = df['Churn']

In [None]:
# from lifelines import KaplanMeierFitter
kmf = KaplanMeierFitter()
kmf.fit(T, event_observed=E)

In [None]:
#print(kmf.cumulative_density_)
kmf.plot_cumulative_density()

In [None]:
kmf.plot_survival_function() 

In [None]:
median_ = kmf.median_survival_time_
# The estimated median time to event. np.inf if doesn’t exist.
print('Median Value: ' + str(median_))

### Survival Analysis of Churning Customer Groups (Senior Citizens Vs. Normal Citizens)

In [None]:
seniorCitizen = (df['SeniorCitizen'] == 1)

kmf.fit(T[~seniorCitizen], E[~seniorCitizen], label = 'Not Senior Citizens')
ax = kmf.plot()

kmf.fit(T[seniorCitizen], E[seniorCitizen], label = 'Senior Citizens')
ax = kmf.plot(ax=ax)

### Survival Analysis of Churning Customer Groups (With Partners Vs. Without Partners)

In [None]:
Partner = (df['Partner'] == 1)

kmf.fit(T[~Partner], E[~Partner], label = 'Without Partner')
ax = kmf.plot()

kmf.fit(T[Partner], E[Partner], label = 'With Partner')
ax = kmf.plot(ax=ax)

### Survival Analysis of Churning Customer Groups (With Dependents Vs. Without Dependents)

In [None]:
Dependents = (df['Dependents'] == 1)

kmf.fit(T[~Dependents], E[~Dependents], label = 'Without Dependents')
ax = kmf.plot()

kmf.fit(T[Dependents], E[Dependents], label = 'With Dependents')
ax = kmf.plot(ax=ax)

### Survival Analysis of Churning Customer Groups (With Phone Service Vs. Without Phone Service)

In [None]:
PhoneService = (df['PhoneService'] == 1)

kmf.fit(T[~PhoneService], E[~PhoneService], label = 'Without Phone Service')
ax = kmf.plot()

kmf.fit(T[PhoneService], E[PhoneService], label = 'With Phone Service')
ax = kmf.plot(ax=ax)

### Survival Analysis of Churning Customer Groups (With Multiple Lines Vs. Without Multiple Lines)

In [None]:
MultipleLines = (df['MultipleLines'] == 1)

kmf.fit(T[~MultipleLines], E[~MultipleLines], label = 'Without MultipleLines')
ax = kmf.plot()

kmf.fit(T[MultipleLines], E[MultipleLines], label = 'With MultipleLines')
ax = kmf.plot(ax=ax)

### Survival Analysis of Churning Customer Groups (With DSL Vs. With Fiber Optics Vs. No Internet Service)

In [None]:
InternetServiceDSL = (df['InternetService'] == 'DSL')
InternetServiceFiberOptic = (df['InternetService'] == 'Fiber optic')
NoInternetService = (df['InternetService'] == 'No')


kmf.fit(T[InternetServiceDSL], E[InternetServiceDSL], label = 'DSL')
ax = kmf.plot()

kmf.fit(T[InternetServiceFiberOptic], E[InternetServiceFiberOptic], label = 'Fiber Optics')
ax = kmf.plot(ax=ax)

kmf.fit(T[NoInternetService], E[NoInternetService], label = 'No Internet Services')
ax = kmf.plot(ax=ax)

### Survival Analysis of Churning Customer Groups (With Online Security Vs. Without Online Security)

In [None]:
OnlineSecurity = (df['OnlineSecurity'] == 1)

kmf.fit(T[~OnlineSecurity], E[~OnlineSecurity], label = 'Without Online Security')
ax = kmf.plot()

kmf.fit(T[OnlineSecurity], E[OnlineSecurity], label = 'With Online Security')
ax = kmf.plot(ax=ax)

### Survival Analysis of Churning Customer Groups (With Online Backup Vs. Without Online Backup)

In [None]:
OnlineBackup = (df['OnlineBackup'] == 1)

kmf.fit(T[~OnlineBackup], E[~OnlineBackup], label = 'Without Online Backup')
ax = kmf.plot()

kmf.fit(T[OnlineBackup], E[OnlineBackup], label = 'With Online Backup')
ax = kmf.plot(ax=ax)

### Survival Analysis of Churning Customer Groups (With Device Protection Vs. Without Device Protection)

In [None]:
DeviceProtection = (df['DeviceProtection'] == 1)

kmf.fit(T[~DeviceProtection], E[~DeviceProtection], label = 'Without Device Protection')
ax = kmf.plot()

kmf.fit(T[DeviceProtection], E[DeviceProtection], label = 'With Device Protection')
ax = kmf.plot(ax=ax)

### Survival Analysis of Churning Customer Groups (With Tech Support Vs. Without Tech Support)

In [None]:
TechSupport = (df['TechSupport'] == 1)

kmf.fit(T[~TechSupport], E[~TechSupport], label = 'Without Tech Support')
ax = kmf.plot()

kmf.fit(T[TechSupport], E[TechSupport], label = 'With Tech Support')
ax = kmf.plot(ax=ax)

### Survival Analysis of Churning Customer Groups (With Streaming TV Vs. Without Streaming TV)

In [None]:
StreamingTV = (df['StreamingTV'] == 1)

kmf.fit(T[~StreamingTV], E[~StreamingTV], label = 'Without Streaming TV')
ax = kmf.plot()

kmf.fit(T[StreamingTV], E[StreamingTV], label = 'With Streaming TV')
ax = kmf.plot(ax=ax)

### Survival Analysis of Churning Customer Groups (With Streaming Movies Vs. Without Streaming Movies)

In [None]:
StreamingMovies = (df['StreamingMovies'] == 1)

kmf.fit(T[~StreamingMovies], E[~StreamingMovies], label = 'Without Streaming Movies')
ax = kmf.plot()

kmf.fit(T[StreamingMovies], E[StreamingMovies], label = 'With Streaming Movies')
ax = kmf.plot(ax=ax)

### Survival Analysis of Churning Customer Groups (With Paperless Billing Vs. Without Paperless Billing)

In [None]:
PaperlessBilling = (df['PaperlessBilling'] == 1)

kmf.fit(T[~PaperlessBilling], E[~PaperlessBilling], label = 'Without Paperless Billing')
ax = kmf.plot()

kmf.fit(T[PaperlessBilling], E[PaperlessBilling], label = 'With Paperless Billing')
ax = kmf.plot(ax=ax)

### Survival Analysis of Churning Customer Groups (With Electronic Check Vs. With Mailed Check Vs. Bank Transfer Vs. Credit Card)

In [None]:
ElectronicCheck = (df['PaymentMethod'] == 'Electronic check')
MailedCheck = (df['PaymentMethod'] == 'Mailed check')
BankTransfer = (df['PaymentMethod'] == 'Bank transfer (automatic)')
CreditCard = (df['PaymentMethod'] == 'Credit card (automatic)')

kmf.fit(T[ElectronicCheck], E[ElectronicCheck], label = 'Electronic Check')
ax = kmf.plot()

kmf.fit(T[MailedCheck], E[MailedCheck], label = 'Mailed Check')
ax = kmf.plot(ax=ax)

kmf.fit(T[BankTransfer], E[BankTransfer], label = 'Bank Transfer')
ax = kmf.plot(ax=ax)

kmf.fit(T[CreditCard], E[CreditCard], label = 'Credit Card')
ax = kmf.plot(ax=ax)

### Survival Analysis of Churning Customer Groups (Male Vs. Female)

In [None]:
Female = (df['Female'] == 1)

kmf.fit(T[~Female], E[~Female], label = 'Male Customers')
ax = kmf.plot()

kmf.fit(T[Female], E[Female], label = 'Female Customers')
ax = kmf.plot(ax=ax)

In [None]:
from lifelines.utils import median_survival_times

# The estimated median time to event. np.inf if doesn’t exist.
median_ci = median_survival_times(kmf.confidence_interval_)
median_ci

## Training a Cox Proportional Hazard Regression Model

In [None]:
cols_of_interest = ['SeniorCitizen', 'Partner', 'Dependents', 'tenure', 
                    'PhoneService', 'MultipleLines', 'InternetService', 
                    'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 
                    'TechSupport', 'StreamingTV', 'StreamingMovies', 
                    'Contract', 'PaperlessBilling', 'PaymentMethod', 
                    'MonthlyCharges', 'TotalCharges', 'Female', 'Churn']
data = df[cols_of_interest]
data.head()

In [None]:
data = pd.get_dummies(data)
# Dropping these columns to avoid 'Matrix Singularity Error when training the Model'.
# We previously did not one-hot encode these columns in the preprocessing section as they were needed for modeling.
data.drop('Contract_Two year', axis = 1, inplace = True)
data.drop('PaymentMethod_Mailed check', axis = 1, inplace = True)
data.drop('InternetService_Fiber optic', axis = 1, inplace = True)
data.head()

In [None]:
cph = CoxPHFitter()
cph.fit(data, 'tenure', event_col = 'Churn', show_progress = True)

In [None]:
cph.print_summary()

In [None]:
cph.plot() 
# Produces a visual representation of the coefficients (i.e. log hazard ratios), 
# including their standard errors and magnitudes.
# Think of it like a magnitude of the impact. 
# Examples:
# 1. If you have no internet services then the chances of churning is lower than that of fibre  optics
# 2. Contract of one-year has higher chances of churning than a two-year contract
# 3. Contract of month-to-month contract has higher chances than a two-year contract and one-year contract

In [None]:
cph.params_ # The estimated coefficients.

In [None]:
cph.plot_covariate_groups('Contract_Month-to-month', [0, 1], cmap='coolwarm')

# "we can plot what the survival curves look like as we vary a single covariate while holding everything else equal. 
# This is useful to understand the impact of a covariate, given the model. To do this, we use the plot_covariate_groups() 
# method and give it the covariate of interest, and the values to display."

Read More about Predict Expectation Here: https://lifelines.readthedocs.io/en/latest/fitters/regression/CoxPHFitter.html?highlight=predict_expectation#lifelines.fitters.coxph_fitter.CoxPHFitter.predict_expectation

In [None]:
cph.predict_expectation(data)

Read more about Predict Log Partial Hazard Here: https://lifelines.readthedocs.io/en/latest/fitters/regression/CoxPHFitter.html?highlight=cph.predict_log_partial_hazard#lifelines.fitters.coxph_fitter.CoxPHFitter.predict_log_partial_hazard

In [None]:
cph.predict_log_partial_hazard(data)

In [None]:
# The event_observed variable provided
cph.event_observed

In [None]:
cph.baseline_cumulative_hazard_

In [None]:
sns.lineplot(data=cph.baseline_cumulative_hazard_)

### Internet Service vs. No Internet Service
- InternetService_No = 1 means no internet service.
- InternetService_No = 0 means internet service is available.

In [None]:
cph.plot_covariate_groups('InternetService_No', [0, 1], cmap='coolwarm')

### Online Security vs. No Online Security

In [None]:
cph.plot_covariate_groups('OnlineSecurity', [0, 1], cmap='coolwarm')

### DSL Internet Service vs. Other Internet Service or No Internet Service

In [None]:
cph.plot_covariate_groups('InternetService_DSL', [0, 1], cmap='coolwarm')

### One Year Contract vs. Two Year and Monthly Contract

In [None]:
cph.plot_covariate_groups('Contract_One year', [0, 1], cmap='coolwarm')

### Phone Service vs. No Phone Service

In [None]:
cph.plot_covariate_groups('PhoneService', [0, 1], cmap='coolwarm')

### Senior Citizen vs. Normal Citizen 

In [None]:
cph.plot_covariate_groups('SeniorCitizen', [0, 1], cmap='coolwarm')

### Dependents vs. No Dependents

In [None]:
cph.plot_covariate_groups('Dependents', [0, 1], cmap='coolwarm')

### Senior Citizen and Partner

In [None]:
cph.plot_covariate_groups(['SeniorCitizen', 'Partner'], 
                            [
                                [0, 0],
                                [0, 1],
                                [1, 0],
                                [1, 1],
                            ],
                            cmap='coolwarm')
#plt.savefig('SeniorCitizen with Partner.png')
plt.title("SeniorCitizen with Partner");

### Senior Citizen and Dependents

In [None]:
cph.plot_covariate_groups(['SeniorCitizen', 'Dependents'], 
                            [
                                [0, 0],
                                [0, 1],
                                [1, 0],
                                [1, 1],
                            ],
                            cmap='coolwarm')
#plt.savefig('Media/SeniorCitizen with Dependents.png')
plt.title("SeniorCitizen with Dependents");

### Phone Service and Internet Service
- InternetService_No = 1 means no internet service.
- InternetService_No = 0 means internet service is available.

In [None]:
cph.plot_covariate_groups(
    ['PhoneService', 'InternetService_No'],
    [
        [0, 0],
        [0, 1],
        [1, 0],
        [1, 1],
    ],
    cmap='coolwarm')
#plt.savefig('Media/Internet Services and Phone Services.png')
plt.title("Internet Services and Phone Services");

### Predicting Survival of all the Customers

In [None]:
data['ID'] = df['customerID']
data.head()

In [None]:
cph.predict_survival_function(data.drop('ID', axis = 1))

In [None]:
cph.predict_median(data.drop('ID', axis = 1))

Read more about Predict Partial Hazard Here: https://lifelines.readthedocs.io/en/latest/fitters/regression/CoxPHFitter.html#lifelines.fitters.coxph_fitter.CoxPHFitter.predict_partial_hazard

In [None]:
cph.predict_partial_hazard(data.drop('ID', axis = 1))

Read more about Predict Median Here: https://lifelines.readthedocs.io/en/latest/fitters/regression/CoxPHFitter.html#lifelines.fitters.coxph_fitter.CoxPHFitter.predict_median

In [None]:
cph.predict_median(data)

### Predicting Survival of a specific Customer

In [None]:
customer_id = '3668-QPYBK'
input_ = data.loc[data['ID'] == customer_id]
input_.head()

In [None]:
# Validate
print('Total number of Customers with the id ' + customer_id + ': ' + str(data.loc[data['ID'] == customer_id].shape[0]))

In [None]:
# Get indexe of the Customer
customer = data[data['ID'] == customer_id].index
 
# Delete the row containing information for that Customer
data.drop(customer , inplace=True)

# Validate
print('Total number of Customers with the id ' + customer_id + ': ' + str(data.loc[data['ID'] == customer_id].shape[0]))

In [None]:
data.drop('ID', axis = 1, inplace = True)
data.head()

In [None]:
cph = CoxPHFitter()
cph.fit(data, 'tenure', event_col = 'Churn', show_progress = True)

In [None]:
results = cph.predict_survival_function(input_.drop('ID', axis = 1))
sns.lineplot(data = results, 
             legend = False).set_title('Survival of the Customer: ' + str(customer_id))

Read more about Predict Partial Hazard Here: https://lifelines.readthedocs.io/en/latest/fitters/regression/CoxPHFitter.html#lifelines.fitters.coxph_fitter.CoxPHFitter.predict_partial_hazard

In [None]:
cph.predict_partial_hazard(input_.drop('ID', axis = 1))

### Using Cross-Validation Scheme
- This allows us to be Confident that the predictions will work well in practice.
- This also allows us to choose between multiple models.
- An example is as follows
- Read More Here: https://lifelines.readthedocs.io/en/latest/lifelines.utils.html?highlight=k_fold_cross_validation#lifelines.utils.k_fold_cross_validation

In [None]:
from lifelines.utils import k_fold_cross_validation

cph = CoxPHFitter()

# print(np.mean(k_fold_cross_validation(cph, data.drop('ID', axis = 1), duration_col='tenure', event_col='Churn')))
print(np.mean(k_fold_cross_validation(cph, data, duration_col='tenure', event_col='Churn')))