In [1]:
pip install dmba

Note: you may need to restart the kernel to use updated packages.


In [43]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from dmba import classificationSummary

In [3]:
example_df = pd.read_csv('TinyData.csv')

In [4]:
example_df.head()

Unnamed: 0,Obs.,Fat,Salt,Acceptance
0,1,0.2,0.9,like
1,2,0.1,0.1,dislike
2,3,0.2,0.4,dislike
3,4,0.2,0.5,dislike
4,5,0.4,0.5,like


In [5]:
predictors = ['Fat', 'Salt']
outcome = 'Acceptance'

In [6]:
X = example_df[predictors]
y = example_df[outcome]

In [10]:
classes = sorted(y.unique())
classes

['dislike', 'like']

In [11]:
clf = MLPClassifier(hidden_layer_sizes=(3), activation='logistic', solver='lbfgs',random_state=1)


In [12]:
clf.fit(X, y)
clf.predict(X)

array(['like', 'dislike', 'dislike', 'dislike', 'like', 'like'],
      dtype='<U7')

In [13]:
print('Intercepts')
print(clf.intercepts_)

Intercepts
[array([0.13368045, 4.07247552, 7.00768104]), array([14.30748676])]


In [14]:
print('Weights')
print(clf.coefs_)


Weights
[array([[ -1.30656481,  -4.20427792, -13.29587332],
       [ -0.04399727,  -4.91606924,  -6.03356987]]), array([[ -0.27348313],
       [ -9.01211573],
       [-17.63504694]])]


In [15]:
print(pd.concat([
example_df,
pd.DataFrame(clf.predict_proba(X), columns=classes)
], axis=1))

   Obs.  Fat  Salt Acceptance   dislike      like
0     1  0.2   0.9       like  0.000490  0.999510
1     2  0.1   0.1    dislike  0.999994  0.000006
2     3  0.2   0.4    dislike  0.999741  0.000259
3     4  0.2   0.5    dislike  0.997368  0.002632
4     5  0.4   0.5       like  0.002133  0.997867
5     6  0.3   0.8       like  0.000075  0.999925


In [16]:
classificationSummary(y, clf.predict(X), class_names=classes)

Confusion Matrix (Accuracy 1.0000)

        Prediction
 Actual dislike    like
dislike       3       0
   like       0       3


### Example 2

In [17]:
accidents_df = pd.read_csv('accidentsnn.csv')

In [18]:
input_vars = ['ALCHL_I', 'PROFIL_I_R', 'VEH_INVL']

In [19]:
accidents_df.SUR_COND = accidents_df.SUR_COND.astype('category')
accidents_df.MAX_SEV_IR = accidents_df.MAX_SEV_IR.astype('category')

In [20]:
processed = pd.get_dummies(accidents_df, columns=['SUR_COND']).drop(columns=['SUR_COND_9'])
outcome = 'MAX_SEV_IR'
predictors = [c for c in processed.columns if c != outcome]

In [21]:
print(accidents_df.shape)

(999, 5)


In [23]:
X = processed[predictors]
y = processed[outcome]

train_X, valid_X, train_y, valid_y = train_test_split(X, y, test_size=0.4, random_state=1)

In [24]:
clf = MLPClassifier(hidden_layer_sizes=(2), activation='logistic', solver='lbfgs',random_state=1)
clf.fit(train_X, train_y.values)

MLPClassifier(activation='logistic', hidden_layer_sizes=2, random_state=1,
              solver='lbfgs')

In [25]:
MLPClassifier(activation='logistic', hidden_layer_sizes=2, random_state=1, solver='lbfgs')

MLPClassifier(activation='logistic', hidden_layer_sizes=2, random_state=1,
              solver='lbfgs')

In [26]:
classificationSummary(train_y, clf.predict(train_X))

Confusion Matrix (Accuracy 0.8664)

       Prediction
Actual   0   1   2
     0 331   0   1
     1   0 180   0
     2  30  49   8


In [27]:
classificationSummary(valid_y, clf.predict(valid_X))

Confusion Matrix (Accuracy 0.8550)

       Prediction
Actual   0   1   2
     0 218   0   1
     1   0 119   0
     2  24  33   5


### Example 3

In [99]:
Amazon2018_df = pd.read_csv('Amazon2018.csv')
Amazon2018_df.head()

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,AMZN_YR_WK,Department,VendorName,Purchase Order,DC,Original Delivery Date,Actual Delivery Date,Gate Time,Ordercases,Rcvd Cases,Non Compliant Qty,Compliant %,Status
0,11828,85,KODAK ALARIS OPERATIONS CANADA,6250080999,6002,8/6/2018,7/9/2018,8:56:04 AM,1719,0,,,
1,11828,85,KODAK ALARIS OPERATIONS CANADA,6350090995,6063,8/6/2018,7/11/2018,7:42:51 PM,1273,0,,,
2,11828,85,KODAK ALARIS OPERATIONS CANADA,6450090985,6093,8/6/2018,7/12/2018,10:55:39 AM,897,0,,,
3,11828,72,APPLE CANADA INC,8450361239,6063,8/9/2018,7/27/2018,7:02:56 AM,772,0,,,
4,11828,98,TREASURE MILLS INC.,3300401778,6098,8/5/2018,7/29/2018,1:19:54 PM,572,0,,,


In [100]:
Amazon2018_df.shape

(196798, 13)

In [101]:
Amazon2018_df.columns = [s.strip().replace(' ','_') for s in Amazon2018_df.columns]

In [102]:
Amazon2018_df

Unnamed: 0,AMZN_YR_WK,Department,VendorName,Purchase_Order,DC,Original_Delivery_Date,Actual_Delivery_Date,Gate_Time,Ordercases,Rcvd_Cases,Non_Compliant_Qty,Compliant__%,Status
0,11828,85,KODAK ALARIS OPERATIONS CANADA,6250080999,6002,8/6/2018,7/9/2018,8:56:04 AM,1719,0,,,
1,11828,85,KODAK ALARIS OPERATIONS CANADA,6350090995,6063,8/6/2018,7/11/2018,7:42:51 PM,1273,0,,,
2,11828,85,KODAK ALARIS OPERATIONS CANADA,6450090985,6093,8/6/2018,7/12/2018,10:55:39 AM,897,0,,,
3,11828,72,APPLE CANADA INC,8450361239,6063,8/9/2018,7/27/2018,7:02:56 AM,772,0,,,
4,11828,98,TREASURE MILLS INC.,3300401778,6098,8/5/2018,7/29/2018,1:19:54 PM,572,0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
196793,11849,72,LEXAR INTERNATIONAL,8100365713,6063,12/31/2018,2/4/2019,9:44:00 AM,128,0,,,
196794,11849,72,LEXAR INTERNATIONAL,8250365644,6093,12/29/2018,2/6/2019,4:55:58 AM,148,0,,,
196795,11849,6,LEXAR INTERNATIONAL,650371602,6093,12/29/2018,2/6/2019,4:55:58 AM,211,0,,,
196796,11849,16,JAY TRENDS MERCHANDISING INC.,450431790,6093,12/29/2018,2/13/2019,9:46:12 PM,34,0,,,


In [103]:
Amazon2018_df["Status"] = np.where(Amazon2018_df["Ordercases"] == Amazon2018_df["Rcvd_Cases"], 1, 0)

In [89]:
Amazon2018_df

Unnamed: 0,AMZN_YR_WK,Department,VendorName,Purchase_Order,DC,Original_Delivery_Date,Actual_Delivery_Date,Gate_Time,Ordercases,Rcvd_Cases,Non_Compliant_Qty,Compliant__%,Status
0,11828,85,KODAK ALARIS OPERATIONS CANADA,6250080999,6002,8/6/2018,7/9/2018,8:56:04 AM,1719,0,,,0
1,11828,85,KODAK ALARIS OPERATIONS CANADA,6350090995,6063,8/6/2018,7/11/2018,7:42:51 PM,1273,0,,,0
2,11828,85,KODAK ALARIS OPERATIONS CANADA,6450090985,6093,8/6/2018,7/12/2018,10:55:39 AM,897,0,,,0
3,11828,72,APPLE CANADA INC,8450361239,6063,8/9/2018,7/27/2018,7:02:56 AM,772,0,,,0
4,11828,98,TREASURE MILLS INC.,3300401778,6098,8/5/2018,7/29/2018,1:19:54 PM,572,0,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
196793,11849,72,LEXAR INTERNATIONAL,8100365713,6063,12/31/2018,2/4/2019,9:44:00 AM,128,0,,,0
196794,11849,72,LEXAR INTERNATIONAL,8250365644,6093,12/29/2018,2/6/2019,4:55:58 AM,148,0,,,0
196795,11849,6,LEXAR INTERNATIONAL,650371602,6093,12/29/2018,2/6/2019,4:55:58 AM,211,0,,,0
196796,11849,16,JAY TRENDS MERCHANDISING INC.,450431790,6093,12/29/2018,2/13/2019,9:46:12 PM,34,0,,,0


In [104]:
predictors = ['Department', 'DC','Ordercases']
outcome = 'Status'

In [105]:
X = Amazon2018_df[predictors]
y = Amazon2018_df[outcome]

train_X, valid_X, train_y, valid_y = train_test_split(X, y, test_size=0.4, random_state=0)

In [106]:
clf = MLPClassifier(hidden_layer_sizes=(10), activation='logistic', solver='lbfgs',
random_state=1)

In [107]:
clf.fit(train_X, train_y.values)

MLPClassifier(activation='logistic', hidden_layer_sizes=10, random_state=1,
              solver='lbfgs')

In [108]:
clf.predict(X)


array([0, 0, 0, ..., 0, 0, 0])

In [109]:
#Network structure
print('Intercepts')
print(clf.intercepts_)


Intercepts
[array([-0.31517711, -0.06188827,  0.3591981 ,  0.02601699,  0.15052219,
       -0.14472145,  0.14630336,  0.26255797, -0.37788577,  0.19635934]), array([-2.09673765])]


In [110]:
print('Weights')
print(clf.coefs_)

Weights
[array([[-0.07074045,  0.17283636, -0.39214165, -0.15506273, -0.27704498,
        -0.31979519, -0.24611718, -0.11846758, -0.08098207,  0.03790973],
       [-0.65393464,  0.1452978 , -0.2318462 ,  0.29661905, -0.36170323,
         0.13372541, -0.0648713 ,  0.39561255, -0.28210305,  0.57463906],
       [ 0.07550565,  0.36733376, -0.14636178,  0.15086993,  0.30612527,
         0.30955423, -0.32551736, -0.28380764, -0.25900574,  0.48492966]]), array([[ 0.35567273],
       [-1.47521312],
       [-0.18723757],
       [-1.44015134],
       [-0.34891837],
       [-1.73128547],
       [ 0.34845062],
       [-1.71999296],
       [-0.18098539],
       [-0.97898303]])]


In [111]:
classificationSummary(train_y, clf.predict(train_X))


Confusion Matrix (Accuracy 1.0000)

       Prediction
Actual      0
     0 118078


In [112]:
classificationSummary(valid_y, clf.predict(valid_X))

Confusion Matrix (Accuracy 1.0000)

       Prediction
Actual     0
     0 78720
