# Note 


#### This code is designed for Churn Analysis 
#### The written function is generic and can be used to run data that has the exact format as below, required 4 input parameters: 

*****1.dataframe: input dataframe 

*****2.df_groups_col: the name of the column that identifys 'control' and 'treatment'

*****3.df_groups_split_value: the actual value of the column that identifys 'control' and 'treatment'

*****4.df_months_churn: months for churn analysis, i.e.['OneMonthChurn_HSI', 'ThreeMonthChurn_HSI', 'FourMonthChurn_HSI', 'EightMonthChurn_HSI']


# Import Packages

In [1]:
# Pandas, Numpy and SQL Connection
import pyodbc
import pandas as pd
import warnings
import missingno as msno
import numpy as np
import os

# Hypothesis Testing
from scipy.stats import shapiro
from scipy.stats import mannwhitneyu
import scipy.stats as stats
from statsmodels.stats.proportion import proportions_ztest, proportion_confint
from scipy.stats import ttest_ind
import statsmodels.stats.api as sms
from math import ceil
import glob

# Visualization
import plotly.express as px
import plotly.figure_factory as ff
import seaborn as sns
import matplotlib.pyplot as plt

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

pd.set_option('display.max_columns', 200)
pd.set_option('display.max_colwidth', 400)

from matplotlib import rcParams
sns.set(context='notebook', style='whitegrid', rc={'figure.figsize': (18,4)})
rcParams['figure.figsize'] = 18,4

%matplotlib inline
%config InlineBackend.figure_format = 'retina'


# Configuration
import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter(action='ignore', category=FutureWarning)

# Single Line Product

In [2]:
# Read Data from SQL Servers

conn = pyodbc.connect('Driver={SQL Server};'
                      'Server=CTOMASQL1SQL2,7116;'
                      'Database=USER_DB;'
                      'Trusted_Connection=yes;')

df = pd.read_sql_query('SELECT * FROM [dbo].[JJ_combined_single_line_churn_analysis_August22Update]', conn)
df.head(5)

Unnamed: 0,LINE_ID,SERVICE_PRODUCT,DSLAM,PORT,DSLAM_TYPE,CARD_TYPE,INSERTION_DATE,Cleaned_Group,CUST_ACCT_ID,CUST_ACCT_LINK_ID,CONSUMER_ACCT,DSL_SPEED_SUBSCRIBED,DATE_CUST_INSTALL,HSI_INST_DT,STATE_SERVICE,ACCT_CHURN_DATE,HSI_CHURN_DATE,PGM_START_DATE,ACCT_CHURN_RANGE,HSI_CHURN_RANGE,ACTIVE_120,HSI_120,Bill_Sys,OneMonthChurn_HSI,ThreeMonthChurn_HSI,FourMonthChurn_HSI,EightMonthChurn_HSI
0,5208252756,20128K_896K_NET,TCSRAZTJ,1/1/2014,Adtran Control,,,Adtran Control,5208252756386,12329380.0,1.0,20000.0,1995-07-10,1900-01-01,AZ,NaT,NaT,2021-08-31,,,True,,CRS,0,0,0,0
1,2067257347,20128K_896K_NET,STTQWAAA,1/13/1934,Adtran Control,,,Adtran Control,330635169,53116927.0,1.0,20000.0,2014-07-15,1900-01-01,WA,NaT,NaT,2021-08-31,,,True,,CRS,0,0,0,0
2,4804644700,60M_5M_V2,MESDAZHN,1/1/1942,Adtran Control,,,Adtran Control,330003986,71610173.0,1.0,60000.0,2018-03-07,1900-01-01,AZ,NaT,NaT,2021-08-31,,,True,,CRS,0,0,0,0
3,8015445963,20128K_896K_NET,LYTNUTOC,3/1/1944,Adtran Control,,,Adtran Control,330580865,11179915.0,1.0,20000.0,2000-07-27,1900-01-01,UT,NaT,NaT,2021-08-31,,,True,,CRS,0,0,0,0
4,5035855417,20M_1.5M_V2,SALMOR78002,1/2/2002,Adtran Control,,,Adtran Control,5035855417316,66199709.0,1.0,20000.0,2016-12-01,1900-01-01,OR,2022-05-23,2022-05-23,2021-08-31,,,True,,CRS,0,0,0,0


In [3]:
# Filter to include study columns 

#Key independent variables are: Cleaned_Group, Bill_Sys
#Key dependent variables are: OneMonthChurn_HSI, ThreeMonthChurn_HSI, FourMonthChurn_HSI, EightMonthChurn_HSI

df_v1 = df[['Cleaned_Group', 'Bill_Sys', 'OneMonthChurn_HSI', 'ThreeMonthChurn_HSI', 'FourMonthChurn_HSI', 'EightMonthChurn_HSI']]
#df_v1.shape
#df_v1.head(10)

In [4]:
# Churn Rate 

def churn_rate(control, treament): 

    # calculate churn % between treatment and control
    churn = ((treament - control) / treament)
    
    return str(round(churn*100, 0)) + '%'

In [5]:
def sub_groups(row):
    if row['Cleaned_Group'] == 'Adtran Treatment':
        value='Treatment'
    elif row['Cleaned_Group'] == 'Calix Treatment':
        value='Treatment'
    elif row['Cleaned_Group'] =='Adtran Control':
        value='Control'
    else:
        value = 'Control'
    return value
    
df_v1['Groups'] = df_v1.apply(sub_groups, axis=1)
df_v1.head()

Unnamed: 0,Cleaned_Group,Bill_Sys,OneMonthChurn_HSI,ThreeMonthChurn_HSI,FourMonthChurn_HSI,EightMonthChurn_HSI,Groups
0,Adtran Control,CRS,0,0,0,0,Control
1,Adtran Control,CRS,0,0,0,0,Control
2,Adtran Control,CRS,0,0,0,0,Control
3,Adtran Control,CRS,0,0,0,0,Control
4,Adtran Control,CRS,0,0,0,0,Control


# Function

In [26]:
# Functions for running statistical testing

def New_ControlTreatmentTesting(dataframe, df_groups_col, df_groups_split_value, df_months_churn):
        
    """
    Functions for calculating Churn for Control and Treatment and Test Statistical Significance
    """
    
    for df_months_churn in churnmonth_cols:
        print (df_months_churn)
        
        # Identify Control and Treatment Group Per Months_Churn
        control = dataframe[dataframe[df_groups_col] == df_groups_split_value[0]][df_months_churn]
        treatment = dataframe[dataframe[df_groups_col] == df_groups_split_value[1]][df_months_churn]
        
        # Calculate total control and treatment records and total churn
        control_counts = dataframe[dataframe[df_groups_col] == df_groups_split_value[0]][df_months_churn].count()
        treatment_counts = dataframe[dataframe[df_groups_col] == df_groups_split_value[1]][df_months_churn].count()
        control_results = dataframe[dataframe[df_groups_col] == df_groups_split_value[0]][df_months_churn].sum()
        treatment_results = dataframe[dataframe[df_groups_col] == df_groups_split_value[1]][df_months_churn].sum()  
        print(f'total control churn :  {control_counts, control_results}')
        print(f'total treatment churn :  {treatment_counts, treatment_results}')
        
        # Calculate churn rate between control and treatment per Months_Churn
        control_churn = round((control_results /control_counts)*100,2) 
        treatment_churn = round((treatment_results / treatment_counts)*100,2)
        print('Control Churn Rate :' , control_churn, '%')
        print('Treatment Churn Rate :',  treatment_churn,'%')
        print('Churn Rate % : ', churn_rate(control_churn, treatment_churn))
        
        # Test for Statistical Significance: first check for normality using Shapiro-Wilk Test (calculates whether a random sample of data comes from a normal distribution, When the p-value is less than or equal to 0.05 (assuming a 95% confidence level) the data is not normal.
        # If Normal Distribution, then check homogeneity of variances
        #perform Shapiro-Wilk test
        nt_control = shapiro(dataframe[dataframe[df_groups_col] == df_groups_split_value[0]][df_months_churn])[1] < 0.05 # 0 - test statistic, 1 - pvalues
        nt_treatment = shapiro(dataframe[dataframe[df_groups_col] == df_groups_split_value[1]][df_months_churn])[1] < 0.05
        print('ShapiroResult Control : ', nt_control)
        print('ShapiroResult Treament : ', nt_treatment)
        # H0: There is no statistically significant difference between the sample distribution and normal distribution, meanining Distribution is Normal! - False 
        # H1: There is a statistically significant difference between the sample distribution and normal distribution, meanining Distribution is not Normal! - True
        
        if (nt_control == False) & (nt_treatment == False): # meaning normal distribution

        # Since the distribution is normal, we need to go ahead and test the assumption of variances in our distribution
        # Homogeneity of variances using Levene's Test 
            leveneTest_results = stats.levene(control, treatment)[1] < 0.05
            print('Levene Variance test p-value:', leveneTest_results)     
        # H0: The variances are equal between control and treatment -- Homogeneity: False
        # H1: The variances are not equal between control and treatment -- Heterogeneous: True
            
            if leveneTest_results == False:
                # The results is showing equal variances -- Homogeneity
                equalvar_ttest = stats.ttest_ind(control, treatment, equal_var=True)[1]
                print('T test with Equal Variance p-value', equalvar_ttest) 
                # H0: M1 == M2 - False
                # H1: M1 != M2 - True
            
            else:
                # The results is showing not equal variances -- Heterogeneous
                unequalvar_ttest = stats.ttest_ind(control, treatment, equal_var=False)[1]
                print('T test with Unequal Variance p-value', unequalvar_ttest) 
                # H0: M1 == M2 - False
                # H1: M1 != M2 - True        
        
        else:
            # Since the distribution is not normal, we should use Non-Parametric Test
            # H0: The distribution of the values between control and treament are equal -- M1 == M2 - False
            # H1: The distribution of the values between control and treament are not equal -- M1 != M2 - True
            u_test_results = stats.mannwhitneyu(control, treatment)[1]
            print('U test p-value', u_test_results)
        
        # In addition to above t and u test, we could also use z proportion test(Parametric Test) to compare with the results above 
        # Use Z Proportion Tests (%) --z-test 95% CI 
        n_control = control_counts
        n_result = treatment_counts
        successes = [control_results, treatment_results]
        nobs = [n_control, n_result]
        
        z_stat, pval = proportions_ztest(successes, nobs=nobs)
        (lower_off, lower_on), (upper_off, upper_on) = proportion_confint(successes, nobs=nobs, alpha=0.05)
        print(f'Z statistic: {z_stat:.2f}')
        print(f'Z Proportion test p-value: {pval:.3f}','\n\n')

# Predefined variables list

In [7]:
group_cols = ['Control', 'Treatment']
churnmonth_cols = ['OneMonthChurn_HSI', 'ThreeMonthChurn_HSI', 'FourMonthChurn_HSI', 'EightMonthChurn_HSI']
adtran_groups= ['Adtran Control', 'Adtran Treatment']
calix_groups = ['Calix Control', 'Calix Treatment']
df_ENS = df_v1[df_v1['Bill_Sys'] == 'ENS']
df_CRS = df_v1[df_v1['Bill_Sys'] == 'CRS']

# Result

# CRS

In [8]:
New_ControlTreatmentTesting(dataframe = df_CRS, df_groups_col = 'Groups', df_groups_split_value=group_cols, df_months_churn = churnmonth_cols)

OneMonthChurn_HSI
total control churn :  (101921, 1192)
total treatment churn :  (228679, 410)
Control Churn Rate : 1.17 %
Treatment Churn Rate : 0.18 %
Churn Rate % :  -550.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.0
Z statistic: 37.86
Z Proportion test p-value: 0.000 


ThreeMonthChurn_HSI
total control churn :  (101921, 3621)
total treatment churn :  (228679, 5240)
Control Churn Rate : 3.55 %
Treatment Churn Rate : 2.29 %
Churn Rate % :  -55.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 1.6299167728727153e-95
Z statistic: 20.74
Z Proportion test p-value: 0.000 


FourMonthChurn_HSI
total control churn :  (101921, 4797)
total treatment churn :  (228679, 8003)
Control Churn Rate : 4.71 %
Treatment Churn Rate : 3.5 %
Churn Rate % :  -35.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 5.813337091012074e-62
Z statistic: 16.61
Z Proportion test p-value: 0.000 


EightMonthChurn_HSI
total

# ENS

In [9]:
New_ControlTreatmentTesting(dataframe = df_ENS, df_groups_col = 'Groups', df_groups_split_value=group_cols, df_months_churn = churnmonth_cols)

OneMonthChurn_HSI
total control churn :  (44745, 607)
total treatment churn :  (198120, 1379)
Control Churn Rate : 1.36 %
Treatment Churn Rate : 0.7 %
Churn Rate % :  -94.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 1.3024587071444994e-44
Z statistic: 14.01
Z Proportion test p-value: 0.000 


ThreeMonthChurn_HSI
total control churn :  (44745, 1666)
total treatment churn :  (198120, 5779)
Control Churn Rate : 3.72 %
Treatment Churn Rate : 2.92 %
Churn Rate % :  -27.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 3.9811108695609723e-19
Z statistic: 8.94
Z Proportion test p-value: 0.000 


FourMonthChurn_HSI
total control churn :  (44745, 2184)
total treatment churn :  (198120, 8085)
Control Churn Rate : 4.88 %
Treatment Churn Rate : 4.08 %
Churn Rate % :  -20.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 3.042065103849148e-14
Z statistic: 7.60
Z Proportion test p-value: 0.000 


EightMonthCh

#  CRS & Adtran

In [10]:
New_ControlTreatmentTesting(dataframe = df_CRS, df_groups_col = 'Cleaned_Group', df_groups_split_value=adtran_groups, df_months_churn = churnmonth_cols)

OneMonthChurn_HSI
total control churn :  (96717, 1130)
total treatment churn :  (216628, 282)
Control Churn Rate : 1.17 %
Treatment Churn Rate : 0.13 %
Churn Rate % :  -800.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.0
Z statistic: 40.08
Z Proportion test p-value: 0.000 


ThreeMonthChurn_HSI
total control churn :  (96717, 3451)
total treatment churn :  (216628, 4845)
Control Churn Rate : 3.57 %
Treatment Churn Rate : 2.24 %
Churn Rate % :  -59.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 4.850545450534444e-102
Z statistic: 21.45
Z Proportion test p-value: 0.000 


FourMonthChurn_HSI
total control churn :  (96717, 4565)
total treatment churn :  (216628, 7486)
Control Churn Rate : 4.72 %
Treatment Churn Rate : 3.46 %
Churn Rate % :  -36.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 8.206764936753545e-65
Z statistic: 17.00
Z Proportion test p-value: 0.000 


EightMonthChurn_HSI
total c

# CRS & Calix

In [11]:
New_ControlTreatmentTesting(dataframe = df_CRS, df_groups_col = 'Cleaned_Group', df_groups_split_value=calix_groups, df_months_churn = churnmonth_cols)

OneMonthChurn_HSI
total control churn :  (5204, 62)
total treatment churn :  (12051, 128)
Control Churn Rate : 1.19 %
Treatment Churn Rate : 1.06 %
Churn Rate % :  -12.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.45531082173182924
Z statistic: 0.75
Z Proportion test p-value: 0.455 


ThreeMonthChurn_HSI
total control churn :  (5204, 170)
total treatment churn :  (12051, 395)
Control Churn Rate : 3.27 %
Treatment Churn Rate : 3.28 %
Churn Rate % :  0.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.9702308266825217
Z statistic: -0.04
Z Proportion test p-value: 0.970 


FourMonthChurn_HSI
total control churn :  (5204, 232)
total treatment churn :  (12051, 517)
Control Churn Rate : 4.46 %
Treatment Churn Rate : 4.29 %
Churn Rate % :  -4.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.6191611124658332
Z statistic: 0.50
Z Proportion test p-value: 0.619 


EightMonthChurn_HSI
total control ch

# ENS & Adtran

In [12]:
New_ControlTreatmentTesting(dataframe= df_ENS, df_groups_col = 'Cleaned_Group', df_groups_split_value=adtran_groups, df_months_churn = churnmonth_cols)

OneMonthChurn_HSI
total control churn :  (6455, 74)
total treatment churn :  (26551, 78)
Control Churn Rate : 1.15 %
Treatment Churn Rate : 0.29 %
Churn Rate % :  -297.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 1.1426613984363677e-19
Z statistic: 9.07
Z Proportion test p-value: 0.000 


ThreeMonthChurn_HSI
total control churn :  (6455, 243)
total treatment churn :  (26551, 731)
Control Churn Rate : 3.76 %
Treatment Churn Rate : 2.75 %
Churn Rate % :  -37.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 1.6603766981214576e-05
Z statistic: 4.31
Z Proportion test p-value: 0.000 


FourMonthChurn_HSI
total control churn :  (6455, 336)
total treatment churn :  (26551, 1095)
Control Churn Rate : 5.21 %
Treatment Churn Rate : 4.12 %
Churn Rate % :  -26.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.00013061932274460324
Z statistic: 3.83
Z Proportion test p-value: 0.000 


EightMonthChurn_HSI
to

# ENS & Calix

In [13]:
New_ControlTreatmentTesting(dataframe= df_ENS, df_groups_col = 'Cleaned_Group', df_groups_split_value=calix_groups, df_months_churn = churnmonth_cols)

OneMonthChurn_HSI
total control churn :  (38290, 533)
total treatment churn :  (171569, 1301)
Control Churn Rate : 1.39 %
Treatment Churn Rate : 0.76 %
Churn Rate % :  -83.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 2.024259726326891e-33
Z statistic: 12.05
Z Proportion test p-value: 0.000 


ThreeMonthChurn_HSI
total control churn :  (38290, 1423)
total treatment churn :  (171569, 5048)
Control Churn Rate : 3.72 %
Treatment Churn Rate : 2.94 %
Churn Rate % :  -27.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 2.3204117515313954e-15
Z statistic: 7.92
Z Proportion test p-value: 0.000 


FourMonthChurn_HSI
total control churn :  (38290, 1848)
total treatment churn :  (171569, 6990)
Control Churn Rate : 4.83 %
Treatment Churn Rate : 4.07 %
Churn Rate % :  -19.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 3.453482346586673e-11
Z statistic: 6.63
Z Proportion test p-value: 0.000 


EightMonthCh

# Pair Bond Product

In [14]:
# Read Data from SQL Servers

conn = pyodbc.connect('Driver={SQL Server};'
                      'Server=CTOMASQL1SQL2,7116;'
                      'Database=USER_DB;'
                      'Trusted_Connection=yes;')

df_pb = pd.read_sql_query('SELECT * FROM [dbo].[JJ_combined_PB_churn_analysis_August22Update_wCuts]', conn)
#df_pb.head(5)
#df_pb.shape

In [15]:
df_pb['Groups'] = df_pb.apply(sub_groups, axis=1)

In [16]:
df_pb_CRS = df_pb[df_pb['Bill_Sys'] == 'CRS']
df_pb_ENS = df_pb[df_pb['Bill_Sys'] == 'ENS']

# CRS

In [17]:
New_ControlTreatmentTesting(dataframe = df_pb_CRS, df_groups_col = 'Groups', df_groups_split_value=group_cols, df_months_churn = churnmonth_cols)

OneMonthChurn_HSI
total control churn :  (96652, 297)
total treatment churn :  (416502, 1137)
Control Churn Rate : 0.31 %
Treatment Churn Rate : 0.27 %
Churn Rate % :  -15.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.06877762773195509
Z statistic: 1.82
Z Proportion test p-value: 0.069 


ThreeMonthChurn_HSI
total control churn :  (96652, 909)
total treatment churn :  (416502, 3887)
Control Churn Rate : 0.94 %
Treatment Churn Rate : 0.93 %
Churn Rate % :  -1.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.8331179603405823
Z statistic: 0.21
Z Proportion test p-value: 0.833 


FourMonthChurn_HSI
total control churn :  (96652, 1283)
total treatment churn :  (416502, 5603)
Control Churn Rate : 1.33 %
Treatment Churn Rate : 1.35 %
Churn Rate % :  1.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.66464388820008
Z statistic: -0.43
Z Proportion test p-value: 0.665 


EightMonthChurn_HSI
total c

# ENS

In [18]:
New_ControlTreatmentTesting(dataframe = df_pb_ENS, df_groups_col = 'Groups', df_groups_split_value=group_cols, df_months_churn = churnmonth_cols)

OneMonthChurn_HSI
total control churn :  (20786, 116)
total treatment churn :  (76612, 384)
Control Churn Rate : 0.56 %
Treatment Churn Rate : 0.5 %
Churn Rate % :  -12.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.30914866239447036
Z statistic: 1.02
Z Proportion test p-value: 0.309 


ThreeMonthChurn_HSI
total control churn :  (20786, 407)
total treatment churn :  (76612, 1457)
Control Churn Rate : 1.96 %
Treatment Churn Rate : 1.9 %
Churn Rate % :  -3.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.5995596467483153
Z statistic: 0.53
Z Proportion test p-value: 0.600 


FourMonthChurn_HSI
total control churn :  (20786, 555)
total treatment churn :  (76612, 2129)
Control Churn Rate : 2.67 %
Treatment Churn Rate : 2.78 %
Churn Rate % :  4.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.39510322827142785
Z statistic: -0.85
Z Proportion test p-value: 0.395 


EightMonthChurn_HSI
total contr

# CRS & Adtran

In [19]:
New_ControlTreatmentTesting(dataframe= df_pb_CRS, df_groups_col = 'Cleaned_Group', df_groups_split_value=adtran_groups, df_months_churn = churnmonth_cols)

OneMonthChurn_HSI
total control churn :  (92920, 288)
total treatment churn :  (400214, 1097)
Control Churn Rate : 0.31 %
Treatment Churn Rate : 0.27 %
Churn Rate % :  -15.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.06291616331568664
Z statistic: 1.86
Z Proportion test p-value: 0.063 


ThreeMonthChurn_HSI
total control churn :  (92920, 875)
total treatment churn :  (400214, 3731)
Control Churn Rate : 0.94 %
Treatment Churn Rate : 0.93 %
Churn Rate % :  -1.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.7880112431092945
Z statistic: 0.27
Z Proportion test p-value: 0.788 


FourMonthChurn_HSI
total control churn :  (92920, 1235)
total treatment churn :  (400214, 5363)
Control Churn Rate : 1.33 %
Treatment Churn Rate : 1.34 %
Churn Rate % :  1.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.7938564078253739
Z statistic: -0.26
Z Proportion test p-value: 0.794 


EightMonthChurn_HSI
total

# CRS & Calix

In [20]:
New_ControlTreatmentTesting(dataframe = df_pb_CRS, df_groups_col = 'Cleaned_Group', df_groups_split_value=calix_groups, df_months_churn = churnmonth_cols)

OneMonthChurn_HSI
total control churn :  (3732, 9)
total treatment churn :  (16288, 40)
Control Churn Rate : 0.24 %
Treatment Churn Rate : 0.25 %
Churn Rate % :  4.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.9606857109029044
Z statistic: -0.05
Z Proportion test p-value: 0.961 


ThreeMonthChurn_HSI
total control churn :  (3732, 34)
total treatment churn :  (16288, 156)
Control Churn Rate : 0.91 %
Treatment Churn Rate : 0.96 %
Churn Rate % :  5.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.7906163721976553
Z statistic: -0.27
Z Proportion test p-value: 0.791 


FourMonthChurn_HSI
total control churn :  (3732, 48)
total treatment churn :  (16288, 240)
Control Churn Rate : 1.29 %
Treatment Churn Rate : 1.47 %
Churn Rate % :  12.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.38608671195182154
Z statistic: -0.87
Z Proportion test p-value: 0.386 


EightMonthChurn_HSI
total control churn 

# ENS & Adtran

In [21]:
New_ControlTreatmentTesting(dataframe= df_pb_ENS, df_groups_col = 'Cleaned_Group', df_groups_split_value=adtran_groups, df_months_churn = churnmonth_cols)

OneMonthChurn_HSI
total control churn :  (2682, 20)
total treatment churn :  (9752, 59)
Control Churn Rate : 0.75 %
Treatment Churn Rate : 0.61 %
Churn Rate % :  -23.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.4167053677310353
Z statistic: 0.81
Z Proportion test p-value: 0.417 


ThreeMonthChurn_HSI
total control churn :  (2682, 63)
total treatment churn :  (9752, 189)
Control Churn Rate : 2.35 %
Treatment Churn Rate : 1.94 %
Churn Rate % :  -21.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.18108569234181082
Z statistic: 1.34
Z Proportion test p-value: 0.181 


FourMonthChurn_HSI
total control churn :  (2682, 90)
total treatment churn :  (9752, 273)
Control Churn Rate : 3.36 %
Treatment Churn Rate : 2.8 %
Churn Rate % :  -20.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.12966971356952983
Z statistic: 1.52
Z Proportion test p-value: 0.130 


EightMonthChurn_HSI
total control churn 

# ENS & Calix

In [22]:
New_ControlTreatmentTesting(dataframe = df_pb_ENS, df_groups_col = 'Cleaned_Group', df_groups_split_value=calix_groups, df_months_churn = churnmonth_cols)

OneMonthChurn_HSI
total control churn :  (18104, 96)
total treatment churn :  (66860, 325)
Control Churn Rate : 0.53 %
Treatment Churn Rate : 0.49 %
Churn Rate % :  -8.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.4526706045447585
Z statistic: 0.75
Z Proportion test p-value: 0.453 


ThreeMonthChurn_HSI
total control churn :  (18104, 344)
total treatment churn :  (66860, 1268)
Control Churn Rate : 1.9 %
Treatment Churn Rate : 1.9 %
Churn Rate % :  0.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.9746488459004813
Z statistic: 0.03
Z Proportion test p-value: 0.975 


FourMonthChurn_HSI
total control churn :  (18104, 465)
total treatment churn :  (66860, 1856)
Control Churn Rate : 2.57 %
Treatment Churn Rate : 2.78 %
Churn Rate % :  8.0%
ShapiroResult Control :  True
ShapiroResult Treament :  True
U test p-value 0.12875011127290167
Z statistic: -1.52
Z Proportion test p-value: 0.129 


EightMonthChurn_HSI
total control c