In [1]:
import pandas as pd
import numpy as np
import sklearn.preprocessing
from sklearn_pandas import DataFrameMapper
from sklearn.preprocessing import StandardScaler

# reference: https://github.com/pandas-dev/sklearn-pandas

In [2]:
dfTrial = pd.read_csv('../data/trialPromoResults.csv', index_col='index')
dfCustdb = pd.read_csv('../data/custdatabase.csv', index_col='index')
dfActual = pd.read_csv('../data/Cust_Actual.csv', index_col='index')
dfTrain = pd.read_csv('../data/Train.csv', index_col='index')

In [3]:
dfTrial.shape

(1000, 10)

In [4]:
# map columns to transformations
# outputting a dataframe
# reference: https://github.com/pandas-dev/sklearn-pandas

dfMapper = DataFrameMapper([
    ('sex',sklearn.preprocessing.LabelBinarizer()),
    ('mstatus',sklearn.preprocessing.LabelBinarizer()),
    (['age'],sklearn.preprocessing.StandardScaler()),
    ('occupation',sklearn.preprocessing.LabelBinarizer()),
    ('education',sklearn.preprocessing.LabelBinarizer()),
    (['income'],sklearn.preprocessing.StandardScaler()),
    (['avbal'],sklearn.preprocessing.StandardScaler()),
    (['avtrans'],sklearn.preprocessing.StandardScaler()),
    ('decision', None)
], df_out=True)

In [5]:
dfm = np.round(dfMapper.fit_transform(dfTrial.copy()),2)
dfm.head()

Unnamed: 0,sex,mstatus_divorced,mstatus_married,mstatus_single,mstatus_widowed,age,occupation_IT,occupation_construct,occupation_education,occupation_finance,...,occupation_medicine,occupation_retired,education_postgrad,education_professional,education_secondary,education_tertiary,income,avbal,avtrans,decision
0,0,0,1,0,0,1.05141,0,0,0,0,...,0,0,0,0,1,0,-0.614625,0.986559,-0.0622867,
1,1,0,0,0,1,3.18537,0,0,0,0,...,0,1,0,0,0,1,-0.0749526,-0.249461,-0.780053,
2,1,0,0,1,0,-0.910599,0,0,0,0,...,0,0,0,1,0,0,2.7805,0.723721,1.08074,B
3,0,0,1,0,0,-0.424116,0,0,1,0,...,0,0,1,0,0,0,-0.581202,-0.558764,0.110056,
4,1,0,0,1,0,-1.48515,0,1,0,0,...,0,0,0,0,0,1,-0.581202,-0.558764,0.110056,


In [6]:
# list all columns with transformed names
dfMapper.transformed_names_

['sex',
 'mstatus_divorced',
 'mstatus_married',
 'mstatus_single',
 'mstatus_widowed',
 'age',
 'occupation_IT',
 'occupation_construct',
 'occupation_education',
 'occupation_finance',
 'occupation_government',
 'occupation_legal',
 'occupation_manuf',
 'occupation_medicine',
 'occupation_retired',
 'education_postgrad',
 'education_professional',
 'education_secondary',
 'education_tertiary',
 'income',
 'avbal',
 'avtrans',
 'decision']

In [7]:
dfm.columns

Index(['sex', 'mstatus_divorced', 'mstatus_married', 'mstatus_single',
       'mstatus_widowed', 'age', 'occupation_IT', 'occupation_construct',
       'occupation_education', 'occupation_finance', 'occupation_government',
       'occupation_legal', 'occupation_manuf', 'occupation_medicine',
       'occupation_retired', 'education_postgrad', 'education_professional',
       'education_secondary', 'education_tertiary', 'income', 'avbal',
       'avtrans', 'decision'],
      dtype='object')

In [8]:
# Below onwards is on feeding data to NN
# reference: https://www.springboard.com/blog/beginners-guide-neural-network-in-python-scikit-learn-0-18/

from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.model_selection import train_test_split

In [9]:
mlp = MLPClassifier(hidden_layer_sizes=(13,13,13),max_iter=500)

In [10]:
x = dfm.drop('decision',axis=1)
y = dfm['decision']

In [11]:
x_train, x_test, y_train, y_test = train_test_split(x, y)

In [12]:
mlp.fit(x_train, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(13, 13, 13), learning_rate='constant',
       learning_rate_init=0.001, max_iter=500, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [13]:
predictions = mlp.predict(x_test)
print(classification_report(y_test,predictions))

             precision    recall  f1-score   support

          A       0.27      0.24      0.25        29
          B       0.00      0.00      0.00         8
       None       0.86      0.90      0.88       213

avg / total       0.77      0.80      0.78       250



In [21]:
mlp.fit(x, y)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(13, 13, 13), learning_rate='constant',
       learning_rate_init=0.001, max_iter=500, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [22]:
predictions = mlp.predict(x_test)
print(classification_report(y_test,predictions))

             precision    recall  f1-score   support

          A       0.85      0.59      0.69        29
          B       0.67      0.25      0.36         8
       None       0.92      0.98      0.95       213

avg / total       0.90      0.91      0.90       250



# fit model to customer base

In [16]:
dfCustdb.shape

(4000, 9)

In [17]:
dfCustMapper = DataFrameMapper([
    ('sex',sklearn.preprocessing.LabelBinarizer()),
    ('mstatus',sklearn.preprocessing.LabelBinarizer()),
    (['age'],sklearn.preprocessing.StandardScaler()),
    ('occupation',sklearn.preprocessing.LabelBinarizer()),
    ('education',sklearn.preprocessing.LabelBinarizer()),
    (['income'],sklearn.preprocessing.StandardScaler()),
    (['avbal'],sklearn.preprocessing.StandardScaler()),
    (['avtrans'],sklearn.preprocessing.StandardScaler())
], df_out=True)

In [18]:
dfmCustdb = np.round(dfCustMapper.fit_transform(dfCustdb.copy()),2)
dfmCustdb.head()

Unnamed: 0,sex,mstatus_divorced,mstatus_married,mstatus_single,mstatus_widowed,age,occupation_IT,occupation_construct,occupation_education,occupation_finance,...,occupation_manuf,occupation_medicine,occupation_retired,education_postgrad,education_professional,education_secondary,education_tertiary,income,avbal,avtrans
0,1.0,1.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.49,2.6,1.62
1,1.0,0.0,1.0,0.0,0.0,1.51,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-1.02,-0.36,-0.31
2,1.0,1.0,0.0,0.0,0.0,-0.24,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.39,0.18,0.6
3,0.0,1.0,0.0,0.0,0.0,-1.11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.46,0.37,1.68
4,0.0,1.0,0.0,0.0,0.0,0.54,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-0.84,-0.75,-1.28


In [23]:
outputDf = mlp.predict(dfmCustdb)
dfCustdb['decision'] = outputDf
dfCustdb['decision'].unique()

array(['None', 'B', 'A'], dtype=object)

In [24]:
targetCust = dfCustdb[dfCustdb['decision'] != 'None']
targetCust.shape

(444, 10)

In [None]:
targetCust.to_csv(../data/.csv)