# Cell 2 cell game assignment

---

# 

# Imports

#### Standard library imports

In [1]:
import sys
sys.path.append("../")

import os

#### Third party imports

In [2]:
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 15)
import plotly.express as px
import matplotlib.pyplot as plt
# from sklearn import metrics
import plotly.graph_objects as go
import numpy as np
import statsmodels.api as sm
from sklearn.impute import SimpleImputer

#### Local application imports

In [3]:
%load_ext autoreload
%autoreload 2

from pkg_dir.config import *
from pkg_dir.src.utils import *
from pkg_dir.src.functions import *
from pkg_dir.src.parameters import *

# 

# Loading data

In [4]:
data_path = '../pkg_dir/data/cell2cell.csv'

In [5]:
dfr = pd.read_csv(data_path)

# 

# Parameters definition

In [6]:
disc_rate = 0.09
sim_periods = 12*5
staff_cost_per_transaction = 0.02

# 

# Initial data wrangling

##### Adding RFM variables

In [7]:
## RFM variables
num_tiles = 10
dfr['rec_ntile'] = mba263.ntile(dfr['eqpdays'], num_tiles)
dfr['freq_ntile'] = num_tiles - 1 - mba263.ntile(dfr['mou'], num_tiles)
dfr['mon_ntile'] = num_tiles - 1 - mba263.ntile(dfr['recchrge'], num_tiles)

##### Setting the client ID as index

In [8]:
dfr.set_index('customer', inplace=True)

##### Filling missing values

In [9]:
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer = imputer.fit(dfr)
dfr[:] = imputer.transform(dfr)

##### Splitting train and test data

In [10]:
## Creating new dataframes based on `calibrat` field
df_train = dfr[dfr['calibrat'] == 1].copy()
df_test = dfr[dfr['calibrat'] == 0].copy()

## Dropping `calibrat` field
df_train.drop(['calibrat'], axis=1, inplace=True)
df_test.drop(['calibrat'], axis=1, inplace=True)

# 

# Data exploration

##### Saving summary statistics about the data as dataframe

In [11]:
dfi = dfr.describe().T
dfi.drop(['25%', '50%', '75%'], axis=1, inplace=True)

##### Evaluating number of labels in train and test set

##### Understanding the `churndep` variable

In [None]:
dfr['calibrat'].value_counts()

In [None]:
dfr['churndep'].value_counts()

In [None]:
dfr.groupby(
    [
        'calibrat',
        'churndep',
    ]
).agg(
    count=('calibrat', 'count')
)

# 

# Manual models

##### 

## Model 1
### Logistic regression without RFM variables

##### Model dataset

In [None]:
dfm1_train = df_train.copy()
dfm1_test = df_test.copy()

##### Model label and features

In [None]:
## Label
m1l = 'churndep'

## Features
m1f = [
    # 'calibrat',
    # 'churn',
    # 'churndep',
    'revenue',
    'mou',
    'recchrge',
    'directas',
    'overage',
    'roam',
    'changem',
    'changer',
    'dropvce',
    'blckvce',
    'unansvce',
    'custcare',
    'threeway',
    'mourec',
    'outcalls',
    'incalls',
    'peakvce',
    'opeakvce',
    'dropblk',
    'callfwdv',
    'callwait',
    'months',
    'uniqsubs',
    'actvsubs',
    'phones',
    'models',
    'eqpdays',
    'age1',
    'age2',
    'children',
    'credita',
    'creditaa',
    'prizmrur',
    'prizmub',
    'prizmtwn',
    'refurb',
    'webcap',
    'truck',
    'rv',
    'occprof',
    'occcler',
    'occcrft',
    'occstud',
    'occhmkr',
    'occret',
    'occself',
    'ownrent',
    'marryun',
    'marryyes',
    'mailord',
    'mailres',
    'mailflag',
    'travel',
    'pcown',
    'creditcd',
    'retcalls',
    'retaccpt',
    'newcelly',
    'newcelln',
    'refer',
    'incmiss',
    'income',
    'mcycle',
    'setprcm',
    'setprc',
    'retcall',
    # 'rec_ntile',
    # 'freq_ntile',
    # 'mon_ntile',
]

##### Model training

In [None]:
m1 = logit_reg(
    a=dfm1_train[m1l],
    b=dfm1_train[m1f],
    alpha=0,
)

##### Model coefficients

In [None]:
m1.summary()

In [None]:
dfm1_or = odds_ratios(m1)
dfm1_or

In [None]:
## Adding the standard deviation column to the odds ratios dataframe
dfm1_or['std'] = dfm1_test[m1f].std()

## Operation between resulting odds ratios and standard deviation
dfm1_or['norm_or'] = np.power(dfm1_or['Odds ratios'], dfm1_or['std'])

## Correcting odds ratios that are lower than one
dfm1_or['norm_or'] = dfm1_or['norm_or'].apply(lambda x: 1/x if x < 1 else x)

## Ranking results
dfm1_or['rank'] = dfm1_or['norm_or'].rank(ascending=False)
dfm1_or.sort_values(by='rank', inplace=True, ascending=True)

##### Predictions

In [None]:
## Churn predicted probability
dfm1_test['m1_pred_prob'] = m1.predict(dfm1_test[m1f])

## Classifying churners in groups
dfm1_test['m1_pred_prob_grade'] = 10 - ntile(dfm1_test['m1_pred_prob'], 10)

### 

## Model 2
### Logistic regression without RFM variables

##### Model dataset

In [12]:
dfm2_train = df_train.copy()
dfm2_test = df_test.copy()

##### Model label and features

In [13]:
## Label
m2l = 'churndep'

## Features
m2f = [
    # 'calibrat',
    # 'churn',
    # 'churndep',
    'revenue',
    'mou',
    'recchrge',
    'directas',
    'overage',
    'roam',
    'changem',
    'changer',
    'dropvce',
    'blckvce',
    'unansvce',
    'custcare',
    'threeway',
    'mourec',
    'outcalls',
    'incalls',
    'peakvce',
    'opeakvce',
    'dropblk',
    'callfwdv',
    'callwait',
    'months',
    'uniqsubs',
    'actvsubs',
    'phones',
    'models',
    'eqpdays',
    'age1',
    'age2',
    'children',
    'credita',
    'creditaa',
    'prizmrur',
    'prizmub',
    'prizmtwn',
    'refurb',
    'webcap',
    'truck',
    'rv',
    'occprof',
    'occcler',
    'occcrft',
    'occstud',
    'occhmkr',
    'occret',
    'occself',
    'ownrent',
    'marryun',
    'marryyes',
    'mailord',
    'mailres',
    'mailflag',
    'travel',
    'pcown',
    'creditcd',
    'retcalls',
    'retaccpt',
    'newcelly',
    'newcelln',
    'refer',
    'incmiss',
    'income',
    'mcycle',
    'setprcm',
    'setprc',
    'retcall',
    # 'rec_ntile',
    # 'freq_ntile',
    # 'mon_ntile',
]

##### Model training

In [14]:
m2 = sm.Logit(
    dfm2_train[m2l],
    dfm2_train[m2f],
).fit()

Optimization terminated successfully.
         Current function value: 0.670879
         Iterations 5


##### Model coefficients

In [15]:
m2.summary()

0,1,2,3
Dep. Variable:,churndep,No. Observations:,40000.0
Model:,Logit,Df Residuals:,39934.0
Method:,MLE,Df Model:,65.0
Date:,"Mon, 17 Apr 2023",Pseudo R-squ.:,0.03213
Time:,10:40:29,Log-Likelihood:,-26835.0
converged:,True,LL-Null:,-27726.0
Covariance Type:,nonrobust,LLR p-value:,0.0

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
revenue,0.0022,0.001,2.792,0.005,0.001,0.004
mou,-0.0003,4.87e-05,-5.262,0.000,-0.000,-0.000
recchrge,-0.0033,0.001,-3.845,0.000,-0.005,-0.002
directas,-0.0005,0.006,-0.092,0.927,-0.012,0.011
overage,0.0007,0.000,2.635,0.008,0.000,0.001
roam,0.0063,0.002,3.163,0.002,0.002,0.010
changem,-0.0005,5.28e-05,-9.159,0.000,-0.001,-0.000
changer,0.0022,0.000,5.939,0.000,0.001,0.003
dropvce,0.0135,0.006,2.338,0.019,0.002,0.025


In [16]:
dfm2_or = odds_ratios(m2)
dfm2_or

Unnamed: 0,Odds ratios,std err,z,P>|z|,[0.025,0.975]
mou,0.999744,0.000049,5.262881,0.000,0.999650,0.999838
recchrge,0.996679,0.000862,3.851049,0.000,0.995006,0.998352
directas,0.999468,0.005814,0.091540,0.927,0.988189,1.010747
overage,1.000721,0.000274,2.634221,0.008,1.000190,1.001252
roam,1.006304,0.001999,3.153384,0.002,1.002426,1.010183
...,...,...,...,...,...,...
income,0.988955,0.005731,1.927230,0.054,0.977838,1.000073
mcycle,1.130841,0.100268,1.304919,0.192,0.936322,1.325361
setprcm,0.924232,0.033626,2.253277,0.024,0.858998,0.989466
setprc,1.000757,0.000270,2.803526,0.005,1.000233,1.001281


In [17]:
## Adding the standard deviation column to the odds ratios dataframe
dfm2_or['std'] = dfm2_test[m2f].std()

## Operation between resulting odds ratios and standard deviation
dfm2_or['norm_or'] = np.power(dfm2_or['Odds ratios'], dfm2_or['std'])

## Correcting odds ratios that are lower than one
dfm2_or['norm_or'] = dfm2_or['norm_or'].apply(lambda x: 1/x if x < 1 else x)

## Ranking results
dfm2_or['rank'] = dfm2_or['norm_or'].rank(ascending=False)
dfm2_or.sort_values(by='rank', inplace=True, ascending=True)

##### Predictions

In [18]:
## Churn predicted probability
dfm2_test['m2_pred_prob'] = m2.predict(dfm2_test[m2f])

## Classifying churners in groups
dfm2_test['m2_pred_prob_grade'] = 10 - ntile(dfm2_test['m2_pred_prob'], 10)

## 

## Manual models compilation

In [19]:
## Models objects
models = {
    
    # ## 
    # 'm1': { 
    #     'df_test': dfm1_test,
    #     'label': m1l,
    #     'features': m1f,
    #     'model': m1,
    # },
    
    ## 
    'm2': { 
        'df_test': dfm2_test,
        'label': m2l,
        'features': m2f,
        'model': m2,
    },
    
}

# 

# Evaluating models

##### Lift

# 

# Identifying customers that will receive promotion

## Generating dataset of selected customers

In [20]:
dfc = dfm2_test[dfm2_test['m2_pred_prob_grade'] == 1].copy()
dfc.drop(['m2_pred_prob_grade'], axis=1, inplace=True)

## Understanding customer selection

##### Do customers with a refurbished handset own a cheaper handset?
- It seems that refurbished handsets are actually more expensive

##### How does the `eqpdays` variable behaves?

# 

# Profit calculations

## Profit calculations without intervention

In [21]:
## Function to calculate status quo expected value
def status_quo_exp_profit(periods, revenue, cost, churn_p, disc):
    
    ### List where all the results will be stored
    res_lst = []
    
    ### Iterating over all periods and calculating individual results
    for p in np.linspace(0, periods, periods + 1):
        
        res_lst.append(
            ((revenue - cost)*(1 - churn_p)**p)*(1 - disc)**p
        )
    
    ### Adding and rounding all results stored
    res = round(sum(res_lst), 2)
    
    return res

In [22]:
## Expected profits (present value) for each customer if we don't do anything
dfc['exp_pv_profit_no_inc'] = dfc.apply(
    lambda x: status_quo_exp_profit(
        sim_periods, 
        x['revenue'], 
        0,
        x['m2_pred_prob'],
        disc_rate,
    ), 
    axis=1
)

## 

## Profit calculations with intervention

### Function to estimate the new churn probability and cost of an applied promotion

In [23]:
## Function to calculate the churn probability for each incentive
def inc_pp_and_cost(dfc, ftr, prom):
    """
    Function to calculate the churn probability for each incentive
    
    param dfc (dataframe): dataframe with all customers that are in 1st decile top churners
    param ftr (series): series with booleans related to filtering conditions for a give promotion
    param prom (string): name of the promotion applied
    
    return: NaN
    """
    
    
    ## Creating copy of the row with the modified fields
    dfx = dfc.loc[ftr, :].copy()
    
    
    ## Adjusting features based on the promotions applied
    
    ### Monthly Usage Discount
    if prom == 'Monthly Usage Discount':

        #### Adjusting features based on the promotions applied
        inc_perc = 0.5
        dfx.loc[ftr, 'mou'] = dfx.loc[ftr, 'mou']*(1 + inc_perc)
        dfx.loc[ftr, 'changem'] = inc_perc*100
        dfx.loc[ftr, 'peakvce'] = dfx.loc[ftr, 'peakvce']*(1 + inc_perc)
        
        #### Registering the cost of the incentive
        dfc.loc[ftr, 'prom_cost'] = dfc.loc[ftr, 'recchrge']*0.05
    
    ### New Phone
    elif prom == 'New Phone':
    
        #### Adjusting features based on the promotions applied
        dfx.loc[ftr, 'eqpdays'] = 0
        dfx.loc[ftr, 'refurb'] = 0
        dfx.loc[ftr, 'webcap'] = 1
        dfx.loc[ftr, 'setprc'] = np.maximum(dfx.loc[ftr, 'setprc'], 81.85)
        
        #### Registering the cost of the incentive
        dfc.loc[ftr, 'prom_cost'] = np.maximum(dfc.loc[ftr, 'setprc'], 81.85)*0.1
        
    ### Prune Inactive Users
    elif prom == 'Prune Inactive Users':
        
        #### Adjusting features based on the promotions applied
        dfx.loc[ftr, 'uniqsubs'] = dfx.loc[ftr, 'actvsubs']
        
        #### Registering the cost of the incentive
        dfc.loc[ftr, 'prom_cost'] = (dfc.loc[ftr, 'uniqsubs'] - dfc.loc[ftr, 'actvsubs'])*staff_cost_per_transaction
    
    
    ## Estimating the new churn probability with the new parameters
    pp = m2.predict(dfx.loc[ftr, m2f])
    
    ## Adding the predicted probabilities to the original dataframe
    dfc.loc[ftr, 'prom_pred_prob'] = pp
    
    
    return 

### Customer selection criteria for each promotion

In [24]:
## Promotions dictionary
proms = {
    
    
    ## 
    'Monthly Usage Discount': { 
        'selection': (dfc['mou'] < dfc['mou'].quantile(0.3))
    },
    
    ## Selecting out clients who might have lower churn if they get a new device
    'New Phone': { 
        'selection': ((dfc['refurb'] == 1) | (dfc['eqpdays'] >= dfc['eqpdays'].quantile(0.90)) | (dfc['webcap'] == 0))
    },
    
    ## Selecting out clients who might have lower churn if they get inactive users removed
    'Prune Inactive Users': {
        'selection': (dfc['uniqsubs'] - dfc['actvsubs'] > 0)
    },
    
}

In [25]:
## Creating empty column that then will be overwritten with the different promotions
dfc['prom_sel'] = 'no_promotion'
dfc['prom_pred_prob'] = dfc['m2_pred_prob'] 
dfc['prom_cost'] = 0

## Tagging all customer that fall under a certain promotion selection criteria and calculating new churn probability
for prom in proms:
    
    dfc.loc[proms[prom]['selection'], 'prom_sel'] = prom
    
    inc_pp_and_cost(dfc, proms[prom]['selection'], prom)

In [26]:
dfc['prom_sel'].value_counts(normalize=True)

New Phone                 0.409340
Prune Inactive Users      0.286957
no_promotion              0.237359
Monthly Usage Discount    0.066345
Name: prom_sel, dtype: float64

### Calculating expected profits related to each intervention

In [27]:
## Function to calculate expected value for each promotion
def prom_exp_profit(periods, revenue, cost, churn_p_sq, churn_p_prom, disc, prom_name):
    
    ### List where all the results will be stored
    res_lst = []
    
    ### Recurring cost - Iterating over all periods and calculating individual results
    if prom_name in ['Monthly Usage Discount']:
        
        for p in np.linspace(0, periods, periods + 1):

            ## Entry for t and t+1
            if p <= 1:
                res_lst.append(
                    ((revenue - cost)*(1 - churn_p_prom)**p)*(1 - disc)**p
                )

            ## Entries from t+2 onwards
            elif p > 1:
                res_lst.append(
                    (revenue - cost)*(1*(1 - churn_p_prom)**(1)*(1 - churn_p_sq)**(p - 1))*(1 - disc)**p
                )
                
        ### Adding and rounding all results stored
        res = round(sum(res_lst), 2)
                
    else:
        
        for p in np.linspace(0, periods, periods + 1):

            ## Entry for t and t+1
            if p <= 1:
                res_lst.append(
                    (revenue*(1 - churn_p_prom)**p)*(1 - disc)**p
                )

            ## Entries from t+2 onwards
            elif p > 1:
                res_lst.append(
                    revenue*(1*(1 - churn_p_prom)**(1)*(1 - churn_p_sq)**(p - 1))*(1 - disc)**p
                )
    
        ### Adding and rounding all results stored
        res = round(sum(res_lst), 2) - cost
    
    return res

In [28]:
## Expected profits (present value) for each customer if we offer a promotion assuming the cost is zero
dfc['exp_pv_prom_profit_cost0'] = dfc.apply(
    lambda x: prom_exp_profit(
        periods=sim_periods, 
        revenue=x['revenue'], 
        cost=0,
        churn_p_sq=x['m2_pred_prob'],
        churn_p_prom=x['prom_pred_prob'],
        disc=disc_rate,
        prom_name=x['prom_sel'],
    ), 
    axis=1
)

In [29]:
## Expected profits (present value) for each customer if we offer a promotion and considering the cost of the promotion
dfc['exp_pv_prom_profit'] = dfc.apply(
    lambda x: prom_exp_profit(
        periods=sim_periods, 
        revenue=x['revenue'], 
        cost=x['prom_cost'],
        churn_p_sq=x['m2_pred_prob'],
        churn_p_prom=x['prom_pred_prob'],
        disc=disc_rate,
        prom_name=x['prom_sel'],
    ), 
    axis=1
)

### Identify which clients are not profitable with the promotion

In [30]:
## Calculating the profitability of the LTV with the promotion
dfc['final_profit'] = dfc['exp_pv_prom_profit'] - dfc['exp_pv_profit_no_inc']

In [31]:
## Realocating all customers that are not profitable to the 'no_promotion' bucket
mr1 = dfc['final_profit'] < 0
dfc.loc[mr1, 'prom_sel'] = 'no_promotion'
dfc.loc[mr1, 'prom_pred_prob'] = dfc.loc[mr1, 'm2_pred_prob']
dfc.loc[mr1, 'prom_cost'] = 0
dfc.loc[mr1, 'exp_pv_prom_profit_cost0'] = dfc.loc[mr1, 'exp_pv_profit_no_inc']
dfc.loc[mr1, 'exp_pv_prom_profit'] = dfc.loc[mr1, 'exp_pv_profit_no_inc']
dfc.loc[mr1, 'final_profit'] = 0

In [39]:
dfc['prom_sel'].value_counts(normalize=True)

no_promotion              0.395813
New Phone                 0.315942
Prune Inactive Users      0.286957
Monthly Usage Discount    0.001288
Name: prom_sel, dtype: float64

In [32]:
## Counting the number of customers that are profitable and not profitable for each promotion
dfc.groupby(
    [
        'prom_sel',
    ]
).agg(
    profitables=('final_profit', lambda x: x[x >= 0].count()),
    non_profitables=('final_profit', lambda x: x[x < 0].count())
)

Unnamed: 0_level_0,profitables,non_profitables
prom_sel,Unnamed: 1_level_1,Unnamed: 2_level_1
Monthly Usage Discount,4,0
New Phone,981,0
Prune Inactive Users,891,0
no_promotion,1229,0


### Understanding the profitability of each promotion

In [None]:
## Distribution of profits for each promotion
px.box(
    x=dfc['prom_sel'],
    y=dfc['final_profit'],
)

In [38]:
## Summary profits per promotion
dfc.groupby(
    [
        'prom_sel',
    ]
).agg(
    mean_profit=('final_profit', 'mean'),
    total_profit=('final_profit', 'sum'),
)

Unnamed: 0_level_0,mean_profit,total_profit
prom_sel,Unnamed: 1_level_1,Unnamed: 2_level_1
Monthly Usage Discount,0.2175,0.87
New Phone,11.659216,11437.691066
Prune Inactive Users,5.119158,4561.17
no_promotion,0.0,0.0


In [33]:
dfc['final_profit'].sum()

15999.731065999997

In [None]:
dfc

# 

# Header

# *Notes*

### Testing profit calculation algorithm

In [None]:
## Expected profits (present value) for each customer if we don't do anything
prom_exp_profit(
        periods=6, 
        revenue=100, 
        cost=40,
        churn_p_sq=0.2,
        churn_p_prom=0.1,
        disc=0.0,
    ),

### Testing weird behavior calculating predictions

#### Model 1

In [None]:
## Works
x = dfm1_test.loc[1099785:1099979, m1f]
m1.predict(x)

## Doesn't work
x = dfm1_test.loc[1099971:1099979, m1f]
m1.predict(x)

## Doesn't work
x = dfm1_test.loc[1099971:1099979, m1f].values
m1.predict(x)

## Doesn't work
x_matrix = sm.add_constant(dfm1_test.loc[1099971:1099979, m1f])
m1.predict(x_matrix)

#### Model 2

In [None]:
## Works
x = dfm2_test.loc[1099785:1099979, m2f]
m2.predict(x)

## Doesn't work
x = dfm2_test.loc[1099971:1099979, m2f]
m2.predict(x)

## Doesn't work
x = dfm2_test.loc[1099971:1099979, m2f].values
m2.predict(x)

## Doesn't work
x_matrix = sm.add_constant(dfm2_test.loc[1099971:1099979, m2f])
m2.predict(x_matrix)

### Old dict

In [None]:
## Promotions dictionary
proms = {
    
    ## Selecting out clients who might have lower churn if they get a customer service call asking them about their service and notifying them of either/both "block calls from unknown numbers" setting and referral promotion.
    'Customer Service Call': {
        'selection': (dfc['m1_pred_prob_grade'] == 1),
    },
    
    ## Selecting out clients who might have lower churn if they get inactive users removed
    'Prune Inactive Users': {
        'selection': (dfc['m1_pred_prob_grade'] == 1) & (dfc['uniqsubs'] - dfc['actvsubs'] > 0)
    },
    
    ## Selecting out clients who might have lower churn if they get a new device
    'New Phone': { # These people are grade 1 churn risks with a refurbished, old, or non-web device
        'selection': (dfc['m1_pred_prob_grade'] == 1) & ((dfc['refurb'] == 1) | (dfc['eqpdays'] >= dfc['eqpdays'].quantile(0.75)) | (dfc['webcap'] == 0)),
    },
}

## Model 2
### Logistic regression with regularization

##### Model dataset

In [None]:
dfm2_train = df_train.copy()
dfm2_test = df_test.copy()

##### Model label and features

In [None]:
## Label
m2l = 'churndep'

## Features
m2f = [
    # 'calibrat',
    # 'churn',
    # 'churndep',
    'revenue',
    'mou',
    'recchrge',
    'directas',
    'overage',
    'roam',
    'changem',
    'changer',
    'dropvce',
    'blckvce',
    'unansvce',
    'custcare',
    'threeway',
    'mourec',
    'outcalls',
    'incalls',
    'peakvce',
    'opeakvce',
    'dropblk',
    'callfwdv',
    'callwait',
    'months',
    'uniqsubs',
    'actvsubs',
    'phones',
    'models',
    'eqpdays',
    'age1',
    'age2',
    'children',
    'credita',
    'creditaa',
    'prizmrur',
    'prizmub',
    'prizmtwn',
    'refurb',
    'webcap',
    'truck',
    'rv',
    'occprof',
    'occcler',
    'occcrft',
    'occstud',
    'occhmkr',
    'occret',
    'occself',
    'ownrent',
    'marryun',
    'marryyes',
    'mailord',
    'mailres',
    'mailflag',
    'travel',
    'pcown',
    'creditcd',
    'retcalls',
    'retaccpt',
    'newcelly',
    'newcelln',
    'refer',
    'incmiss',
    'income',
    'mcycle',
    'setprcm',
    'setprc',
    'retcall',
    'rec_ntile',
    'freq_ntile',
    'mon_ntile',
]

##### Model training

In [None]:
m2 = logit_reg(
    a=dfm2_train[m2l],
    b=dfm2_train[m2f],
    alpha=3,
)

##### Model coefficients

In [None]:
m2.summary()

In [None]:
dfm2_or = odds_ratios(m2)
dfm2_or

##### Features selection

In [None]:
## Adding the standard deviation column to the odds ratios dataframe
dfm2_or = dfm2_or.join(
    dfi.loc[:, 'std']
)

## Operation between resulting odds ratios and standard deviation
dfm2_or['or**std'] = dfm2_or['Odds ratios']**dfm2_or['std']

## Calculating absolute difference between resulting column and 1
dfm2_or['diff_or**std'] = abs(1 - dfm2_or['or**std'])

## Ranking results
dfm2_or['rank'] = dfm2_or['diff_or**std'].rank(ascending=False)
dfm2_or.sort_values(by='rank', inplace=True, ascending=True)

##### Predictions

In [None]:
## Churn predicted probability
dfm2_test['m2_pred_prob'] = m2.predict(dfm2_test[m2f])

## Classifying churners in groups
dfm2_test['m2_pred_prob_grade'] = 10 - ntile(dfm2_test['m2_pred_prob'], 10)

##### 

## Model 3
### Logistic regression with features selected from model 1

##### Model dataset

In [None]:
dfm3_train = df_train.copy()
dfm3_test = df_test.copy()

##### Model label and features

In [None]:
## Label
m3l = 'churndep'

## Features
m3f = [
    # 'calibrat',
    # 'churn',
    # 'churndep',
    # 'revenue',
    'mou',
    # 'recchrge',
    # 'directas',
    # 'overage',
    # 'roam',
    'changem',
    'changer',
    'dropvce',
    # 'blckvce',
    # 'unansvce',
    # 'custcare',
    # 'threeway',
    # 'mourec',
    # 'outcalls',
    # 'incalls',
    # 'peakvce',
    # 'opeakvce',
    # 'dropblk',
    # 'callfwdv',
    # 'callwait',
    'months',
    'uniqsubs',
    'actvsubs',
    # 'phones',
    # 'models',
    'eqpdays',
    # 'age1',
    # 'age2',
    # 'children',
    # 'credita',
    'creditaa',
    # 'prizmrur',
    # 'prizmub',
    # 'prizmtwn',
    # 'refurb',
    # 'webcap',
    # 'truck',
    # 'rv',
    # 'occprof',
    # 'occcler',
    # 'occcrft',
    # 'occstud',
    # 'occhmkr',
    # 'occret',
    # 'occself',
    # 'ownrent',
    # 'marryun',
    # 'marryyes',
    # 'mailord',
    # 'mailres',
    # 'mailflag',
    # 'travel',
    # 'pcown',
    # 'creditcd',
    # 'retcalls',
    # 'retaccpt',
    # 'newcelly',
    # 'newcelln',
    # 'refer',
    # 'incmiss',
    # 'income',
    # 'mcycle',
    # 'setprcm',
    # 'setprc',
    'retcall',
    # 'rec_ntile',
    # 'freq_ntile',
    # 'mon_ntile',
]

##### Model training

In [None]:
m3 = logit_reg(
    a=dfm3_train[m3l],
    b=dfm3_train[m3f],
    alpha=0,
)

##### Model coefficients

In [None]:
m3.summary()

In [None]:
odds_ratios(m3)

##### Model predictions

In [None]:
## Churn predicted probability
dfm3_test['m3_pred_prob'] = m3.predict(dfm3_test[m3f])

## Classifying churners in groups
dfm3_test['m3_pred_prob_grade'] = 10 - ntile(dfm3_test['m3_pred_prob'], 10)

## 

---

---