In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from causalinference import CausalModel

In [2]:
df_telco = pd.read_csv('Telco_customer_churn_clean.csv')

In [3]:
df_telco_internet = df_telco[df_telco['Internet Service'] != 'No']

# Based On Additional Internet Service

## Online Security

### Churn Rate

In [4]:
pd.crosstab(df_telco_internet['Online Security'], df_telco_internet['Churn Label'], normalize = 0)

Churn Label,No,Yes
Online Security,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.582333,0.417667
Yes,0.853888,0.146112


### Identify Comfound

In [5]:
df_telco_internet[['Online Security','Longitude','Latitude','Tenure Months']].groupby('Online Security').mean()

Unnamed: 0_level_0,Longitude,Latitude,Tenure Months
Online Security,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
No,-119.728928,36.236179,25.850772
Yes,-119.819043,36.282124,45.046558


In [6]:
pd.crosstab(df_telco_internet['Online Security'], df_telco_internet['Gender'], normalize = 0)

Gender,Female,Male
Online Security,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.489994,0.510006
Yes,0.508668,0.491332


In [7]:
pd.crosstab(df_telco_internet['Online Security'], df_telco_internet['Senior Citizen'], normalize = 0)

Senior Citizen,No,Yes
Online Security,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.769011,0.230989
Yes,0.860327,0.139673


In [8]:
pd.crosstab(df_telco_internet['Online Security'], df_telco_internet['Partner'], normalize = 0)

Partner,No,Yes
Online Security,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.582333,0.417667
Yes,0.40416,0.59584


In [9]:
pd.crosstab(df_telco_internet['Online Security'], df_telco_internet['Dependents'], normalize = 0)

Dependents,No,Yes
Online Security,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.848485,0.151515
Yes,0.735513,0.264487


In [10]:
pd.crosstab(df_telco_internet['Online Security'], df_telco_internet['Internet Service'], normalize = 0)

Internet Service,DSL,Fiber optic
Online Security,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.354774,0.645226
Yes,0.584448,0.415552


we can see the different in 

1. Tenure, online security user has higher tenure
1. Senior citizen, online security user has less senior citizen
1. Partner, Online Security user has more user with partner
1. Dependents, Online Security user has more user with dependents
1. Internet Service, Online Security user has less user using fiber optic

we will include these variables as comfound because it affect churn rate and chronologicaly can affect Online Security usage

### Causal Inference

In [11]:
comfound = [
    'Tenure Months',
    'Senior Citizen',
    'Partner',
    'Dependents',
    'Internet Service'
]

df_telco_internet['Online Security t'] = np.where(df_telco_internet['Online Security']=='Yes',1,0)

y = df_telco_internet['Churn Value'].values
t = df_telco_internet['Online Security t'].values
X = pd.get_dummies(df_telco_internet[comfound], drop_first = True).astype(int).values

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_telco_internet['Online Security t'] = np.where(df_telco_internet['Online Security']=='Yes',1,0)


In [12]:
model = CausalModel(y, t, X)
model.est_via_matching()
print(model.estimates)


Treatment Effect Estimates: Matching

                     Est.       S.e.          z      P>|z|      [95% Conf. int.]
--------------------------------------------------------------------------------
           ATE     -0.089      0.016     -5.526      0.000     -0.121     -0.058
           ATC     -0.106      0.018     -5.764      0.000     -0.142     -0.070
           ATT     -0.060      0.021     -2.882      0.004     -0.100     -0.019



## Online Backup

### Churn Rate

In [13]:
pd.crosstab(df_telco_internet['Online Backup'], df_telco_internet['Churn Label'], normalize = 0)

Churn Label,No,Yes
Online Backup,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.600712,0.399288
Yes,0.784685,0.215315


### Identify comfound

In [14]:
df_telco_internet[['Online Backup','Latitude','Longitude','Tenure Months']].groupby('Online Backup').mean()

Unnamed: 0_level_0,Latitude,Longitude,Tenure Months
Online Backup,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
No,36.238894,-119.762156,23.680699
Yes,36.270917,-119.761589,44.565253


In [15]:
pd.crosstab(df_telco_internet['Online Backup'], df_telco_internet['Gender'], normalize = 0)

Gender,Female,Male
Online Backup,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.490609,0.509391
Yes,0.504734,0.495266


In [16]:
pd.crosstab(df_telco_internet['Online Backup'], df_telco_internet['Senior Citizen'], normalize = 0)

Senior Citizen,No,Yes
Online Backup,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.801166,0.198834
Yes,0.804035,0.195965


In [17]:
pd.crosstab(df_telco_internet['Online Backup'], df_telco_internet['Partner'], normalize = 0)

Partner,No,Yes
Online Backup,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.593912,0.406088
Yes,0.419514,0.580486


In [18]:
pd.crosstab(df_telco_internet['Online Backup'], df_telco_internet['Dependents'], normalize = 0)

Dependents,No,Yes
Online Backup,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.838731,0.161269
Yes,0.766982,0.233018


In [19]:
pd.crosstab(df_telco_internet['Online Backup'], df_telco_internet['Internet Service'], normalize = 0)

Internet Service,DSL,Fiber optic
Online Backup,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.432319,0.567681
Yes,0.447098,0.552902


hetergenus variable
1. partner
2. dependent
3. tenure

### Causal Inference

In [20]:
comfound = [
    'Partner', 
    'Dependents',
    'Tenure Months',
    'Internet Service']

df_telco_internet['Online Backup t'] = np.where(df_telco_internet['Online Backup']=='Yes',1,0)

y = df_telco_internet['Churn Value'].values
t = df_telco_internet['Online Backup t'].values
X = pd.get_dummies(df_telco_internet[comfound], drop_first = True).astype(int).values

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_telco_internet['Online Backup t'] = np.where(df_telco_internet['Online Backup']=='Yes',1,0)


In [21]:
model = CausalModel(y, t, X)
model.est_via_matching()
print(model.estimates)


Treatment Effect Estimates: Matching

                     Est.       S.e.          z      P>|z|      [95% Conf. int.]
--------------------------------------------------------------------------------
           ATE     -0.017      0.014     -1.233      0.218     -0.045      0.010
           ATC     -0.024      0.016     -1.514      0.130     -0.055      0.007
           ATT     -0.009      0.016     -0.527      0.598     -0.041      0.023



## Tech Support

### Churn Rate

In [22]:
pd.crosstab(df_telco_internet['Tech Support'], df_telco_internet['Churn Value'], normalize = 0)

Churn Value,0,1
Tech Support,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.583645,0.416355
Yes,0.848337,0.151663


### Comfound

In [23]:
df_telco_internet[['Tech Support','Latitude','Longitude','Tenure Months']].groupby('Tech Support').mean()

Unnamed: 0_level_0,Latitude,Longitude,Tenure Months
Tech Support,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
No,36.22248,-119.720084,25.844227
Yes,36.304838,-119.832967,44.822896


In [24]:
pd.crosstab(df_telco_internet['Tech Support'], df_telco_internet['Gender'], normalize = 0)

Gender,Female,Male
Tech Support,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.493521,0.506479
Yes,0.502446,0.497554


In [25]:
pd.crosstab(df_telco_internet['Tech Support'], df_telco_internet['Senior Citizen'], normalize = 0)

Senior Citizen,No,Yes
Tech Support,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.761014,0.238986
Yes,0.872798,0.127202


In [26]:
pd.crosstab(df_telco_internet['Tech Support'], df_telco_internet['Partner'], normalize = 0)

Partner,No,Yes
Tech Support,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.572416,0.427584
Yes,0.42319,0.57681


In [27]:
pd.crosstab(df_telco_internet['Tech Support'], df_telco_internet['Dependents'], normalize = 0)

Dependents,No,Yes
Tech Support,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.840772,0.159228
Yes,0.75,0.25


In [28]:
pd.crosstab(df_telco_internet['Tech Support'], df_telco_internet['Internet Service'], normalize = 0)

Internet Service,DSL,Fiber optic
Tech Support,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.357904,0.642096
Yes,0.576321,0.423679


hetergenus variable
1. senior citizen
2. partner
3. dependent
4. internet service
5. tenure

### causal inference

In [29]:
comfound = [
    'Senior Citizen',
    'Partner', 
    'Dependents',
    'Tenure Months',
    'Internet Service'
]

df_telco_internet['Tech Support t'] = np.where(df_telco_internet['Tech Support']=='Yes',1,0)

y = df_telco_internet['Churn Value'].values
t = df_telco_internet['Tech Support t'].values
X = pd.get_dummies(df_telco_internet[comfound], drop_first = True).astype(int).values

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_telco_internet['Tech Support t'] = np.where(df_telco_internet['Tech Support']=='Yes',1,0)


In [30]:
model = CausalModel(y, t, X)
model.est_via_matching()
print(model.estimates)


Treatment Effect Estimates: Matching

                     Est.       S.e.          z      P>|z|      [95% Conf. int.]
--------------------------------------------------------------------------------
           ATE     -0.089      0.017     -5.145      0.000     -0.123     -0.055
           ATC     -0.098      0.021     -4.796      0.000     -0.139     -0.058
           ATT     -0.074      0.021     -3.484      0.000     -0.116     -0.032



## Device Protection

### Churn Rate

In [31]:
pd.crosstab(df_telco_internet['Device Protection'], df_telco_internet['Churn Value'], normalize = 0)

Churn Value,0,1
Device Protection,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.608724,0.391276
Yes,0.774979,0.225021


### Comfound

In [32]:
df_telco_internet[['Device Protection','Latitude','Longitude','Tenure Months']].groupby('Device Protection').mean()

Unnamed: 0_level_0,Latitude,Longitude,Tenure Months
Device Protection,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
No,36.230683,-119.743417,23.696931
Yes,36.281501,-119.785533,44.604872


In [33]:
pd.crosstab(df_telco_internet['Device Protection'], df_telco_internet['Gender'], normalize = 0)

Gender,Female,Male
Device Protection,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.496931,0.503069
Yes,0.496697,0.503303


In [34]:
pd.crosstab(df_telco_internet['Device Protection'], df_telco_internet['Senior Citizen'], normalize = 0)

Senior Citizen,No,Yes
Device Protection,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.798384,0.201616
Yes,0.807597,0.192403


In [35]:
pd.crosstab(df_telco_internet['Device Protection'], df_telco_internet['Partner'], normalize = 0)

Partner,No,Yes
Device Protection,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.600323,0.399677
Yes,0.410818,0.589182


In [36]:
pd.crosstab(df_telco_internet['Device Protection'], df_telco_internet['Dependents'], normalize = 0)

Dependents,No,Yes
Device Protection,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.826817,0.173183
Yes,0.781998,0.218002


In [37]:
pd.crosstab(df_telco_internet['Device Protection'], df_telco_internet['Internet Service'], normalize = 0)

Internet Service,DSL,Fiber optic
Device Protection,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.438126,0.561874
Yes,0.439719,0.560281


hetergenous variable
1. tenure
2. senior citizen
3. partner
4. dependent

### Causal Inference

In [38]:
comfound = [
    'Tenure Months',
    'Senior Citizen',
    'Partner', 
    'Dependents'
]

df_telco_internet['Device Protection t'] = np.where(df_telco_internet['Device Protection']=='Yes',1,0)

y = df_telco_internet['Churn Value'].values
t = df_telco_internet['Device Protection t'].values
X = pd.get_dummies(df_telco_internet[comfound], drop_first = True).astype(int).values

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_telco_internet['Device Protection t'] = np.where(df_telco_internet['Device Protection']=='Yes',1,0)


In [39]:
model = CausalModel(y, t, X)
model.est_via_matching()
print(model.estimates)


Treatment Effect Estimates: Matching

                     Est.       S.e.          z      P>|z|      [95% Conf. int.]
--------------------------------------------------------------------------------
           ATE      0.005      0.014      0.356      0.722     -0.023      0.033
           ATC      0.014      0.016      0.880      0.379     -0.018      0.046
           ATT     -0.007      0.017     -0.388      0.698     -0.040      0.027

