# Cell 2 cell game assignment

---

# 

# Imports

#### Standard library imports

In [19]:
import sys
sys.path.append("../")

import os

#### Third party imports

In [20]:
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 70)

#### Local application imports

In [21]:
%load_ext autoreload
%autoreload 2

from pkg_dir.config import *
from pkg_dir.src.utils import *
from pkg_dir.src.functions import *
from pkg_dir.src.parameters import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# 

# Loading data

In [4]:
data_path = '../pkg_dir/data/cell2cell.csv'

In [5]:
dfr = pd.read_csv(data_path)

# 

# Initial data wrangling

##### Adding RFM variables

In [6]:
## RFM variables
num_tiles = 10
dfr['rec_ntile'] = mba263.ntile(dfr['eqpdays'], num_tiles)
dfr['freq_ntile'] = num_tiles - 1 - mba263.ntile(dfr['mou'], num_tiles)
dfr['mon_ntile'] = num_tiles - 1 - mba263.ntile(dfr['recchrge'], num_tiles)

##### Setting the client ID as index

In [7]:
dfr.set_index('customer', inplace=True)

##### Splitting train and test data

In [8]:
## Creating new dataframes based on `calibrat` field
df_train = dfr[dfr['calibrat'] == 1].copy()
df_test = dfr[dfr['calibrat'] == 0].copy()

## Dropping `calibrat` field
df_train.drop(['calibrat', 'churndep'], axis=1, inplace=True)
df_test.drop(['calibrat', 'churndep'], axis=1, inplace=True)

# 

# Data exploration

##### Saving summary statistics about the data as dataframe

In [34]:
dfi = dfr.describe().T
dfi.drop(['25%', '50%', '75%'], axis=1, inplace=True)

##### Evaluating number of labels in train and test set

In [None]:
pd.concat(
    [
        dfr.groupby(
            [
                'calibrat',
                'churn',
            ]
        ).agg(
            count=('calibrat', 'count')
        ),  
        dfr.groupby(
            [
                'calibrat',
            ]
        )['churn'].value_counts(normalize=True).to_frame()
    ],
    axis=1
)


##### Understanding the `churndep` variable

In [None]:
dfr['calibrat'].value_counts()

In [None]:
dfr['churndep'].value_counts()

In [None]:
dfr.groupby(
    [
        'calibrat',
        'churndep',
    ]
).agg(
    count=('calibrat', 'count')
)

##### Seeking correlations among variables

# 

# Manual models

## Model 1

##### Model dataset

In [10]:
dfm1_train = df_train.copy()
dfm1_test = df_test.copy()

##### Model label and features

In [11]:
## Label
m1l = 'churn'

## Features
m1f = [
    # 'customer',
    # 'calibrat',
    # 'churndep',
    'revenue',
    'mou',
    'recchrge',
    'directas',
    'overage',
    'roam',
    'changem',
    'changer',
    'dropvce',
    'blckvce',
    'unansvce',
    'custcare',
    'threeway',
    'mourec',
    'outcalls',
    'incalls',
    'peakvce',
    'opeakvce',
    'dropblk',
    'callfwdv',
    'callwait',
    'months',
    'uniqsubs',
    'actvsubs',
    'phones',
    'models',
    'eqpdays',
    'age1',
    'age2',
    'children',
    'credita',
    'creditaa',
    'prizmrur',
    'prizmub',
    'prizmtwn',
    'refurb',
    'webcap',
    'truck',
    'rv',
    'occprof',
    'occcler',
    'occcrft',
    'occstud',
    'occhmkr',
    'occret',
    'occself',
    'ownrent',
    'marryun',
    'marryyes',
    'mailord',
    'mailres',
    'mailflag',
    'travel',
    'pcown',
    'creditcd',
    'retcalls',
    'retaccpt',
    'newcelly',
    'newcelln',
    'refer',
    'incmiss',
    'income',
    'mcycle',
    'setprcm',
    'setprc',
    'retcall',
]

##### Model training

In [12]:
m1 = logit_reg(
    a=dfm1_train[m1l],
    b=dfm1_train[m1f],
    alpha=0,
)

Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.6715330588481454
            Iterations: 201
            Function evaluations: 256
            Gradient evaluations: 201


##### Model coefficients

In [13]:
m1.summary()

0,1,2,3
Dep. Variable:,churn,No. Observations:,38941.0
Model:,Mba263Logit,Df Residuals:,38874.0
Method:,MLE,Df Model:,66.0
Date:,"Mon, 10 Apr 2023",Pseudo R-squ.:,0.03117
Time:,18:47:47,Log-Likelihood:,-26150.0
converged:,True,LL-Null:,-26992.0
Covariance Type:,nonrobust,LLR p-value:,6.012e-308

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.1499,0.095,1.573,0.116,-0.037,0.337
revenue,0.0020,0.001,2.460,0.014,0.000,0.004
mou,-0.0003,4.96e-05,-5.657,0.000,-0.000,-0.000
recchrge,-0.0031,0.001,-3.514,0.000,-0.005,-0.001
directas,-0.0012,0.006,-0.201,0.840,-0.013,0.010
overage,0.0008,0.000,2.711,0.007,0.000,0.001
roam,0.0071,0.002,3.436,0.001,0.003,0.011
changem,-0.0005,5.35e-05,-9.194,0.000,-0.001,-0.000
changer,0.0023,0.000,6.247,0.000,0.002,0.003


In [14]:
odds_ratios(m1)

Unnamed: 0,Odds ratios,std err,z,P>|z|,[0.025,0.975]
revenue,1.001965,0.0008,2.457656,0.014,1.000414,1.003517
mou,0.999719,5e-05,5.657852,0.0,0.999623,0.999815
recchrge,0.996882,0.000886,3.519019,0.0,0.995163,0.998601
directas,0.998804,0.005932,0.201561,0.84,0.987297,1.010312
overage,1.000761,0.000281,2.710142,0.007,1.000216,1.001305
roam,1.007116,0.002078,3.424167,0.001,1.003084,1.011148
changem,0.999508,5.3e-05,9.196449,0.0,0.999404,0.999612
changer,1.002306,0.00037,6.239389,0.0,1.001589,1.003023
dropvce,1.011403,0.007337,1.554212,0.12,0.99717,1.025635
blckvce,1.006423,0.007203,0.891614,0.373,0.992448,1.020397


##### Model predictions

#### x

## 

## Manual models compilation

# 

# Evaluating models

# 

# Building models

# Header

# *Notes*

---

---