# ML II: AMAZON CASE

### IMPORT PACKAGES

In [35]:
%matplotlib inline

from pathlib import Path

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from dmba import classificationSummary

### IMPORT FILE

In [36]:
amazon_df = pd.read_csv('Amazon2024.csv')

  amazon_df = pd.read_csv('Amazon2024.csv')


### DETERMINE DATA TYPES

In [37]:
amazon_df.dtypes

AMZN_YR_WK                  int64
Department                  int64
VendorName                 object
Purchase Order              int64
DC                          int64
Original Delivery Date     object
Actual Delivery Date       object
Gate Time                  object
Ordercases                  int64
Rcvd Cases                 object
Non Compliant Qty         float64
Compliant  %              float64
Status                    float64
dtype: object

### TRANSFORM COLUMNS TO NUMERIC FORMAT

In [38]:
amazon_df["Ordercases"] = pd.to_numeric(amazon_df["Ordercases"], errors="coerce").fillna(0)
amazon_df["Rcvd Cases"] = pd.to_numeric(amazon_df["Rcvd Cases"], errors="coerce").fillna(0)

### CALCULATE NON-COMPLIANT QUANTITY

In [39]:
amazon_df["Non Compliant Qty"] = amazon_df["Ordercases"] - amazon_df["Rcvd Cases"]

### CALCULATE COMPLIANCE PERCENTAGE (%)

In [40]:
amazon_df["Compliant  %"] = np.where(amazon_df["Ordercases"] > 0, amazon_df["Rcvd Cases"] / amazon_df["Ordercases"],0)

### CALCULATE STATUS

In [41]:
amazon_df["Status"] = (amazon_df["Compliant  %"] >= 0.8).astype(int)

In [48]:
amazon_df.head()

Unnamed: 0,AMZN_YR_WK,Department,VendorName,Purchase Order,DC,Original Delivery Date,Actual Delivery Date,Gate Time,Ordercases,Rcvd Cases,Non Compliant Qty,Compliant %,Status
0,11828,85,KODAK ALARIS OPERATIONS CANADA,6250080999,6002,8/6/2024,7/9/2024,8:56:04 AM,1719,0.0,1719.0,0.0,0
1,11828,85,KODAK ALARIS OPERATIONS CANADA,6350090995,6063,8/6/2024,7/11/2024,7:42:51 PM,1273,0.0,1273.0,0.0,0
2,11828,85,KODAK ALARIS OPERATIONS CANADA,6450090985,6093,8/6/2024,7/12/2024,10:55:39 AM,897,0.0,897.0,0.0,0
3,11828,72,APPLE CANADA INC,8450361239,6063,8/9/2024,7/27/2024,7:02:56 AM,772,0.0,772.0,0.0,0
4,11828,98,TREASURE MILLS INC.,3300401778,6098,8/5/2024,7/29/2024,1:19:54 PM,572,0.0,572.0,0.0,0


### PREDICTIVE MODEL 

In [49]:
predictors = ['Ordercases','Rcvd Cases']
outcome = "Status"

X = amazon_df[predictors]
y = amazon_df[outcome]
classes = sorted(y.unique())

In [50]:
train_X, valid_X, train_y, valid_y = train_test_split(X, y, test_size=0.4, random_state=1)

In [51]:
# train neural network with 2 hidden nodes
clf = MLPClassifier(hidden_layer_sizes=(2), activation='logistic', solver='lbfgs',
                    random_state=1)
clf.fit(train_X, train_y.values)

0,1,2
,hidden_layer_sizes,2
,activation,'logistic'
,solver,'lbfgs'
,alpha,0.0001
,batch_size,'auto'
,learning_rate,'constant'
,learning_rate_init,0.001
,power_t,0.5
,max_iter,200
,shuffle,True


In [52]:
# Network structure
print('Intercepts')
print(clf.intercepts_)

print('Weights')
print(clf.coefs_)

# Prediction
print(pd.concat([
    amazon_df,
    pd.DataFrame(clf.predict_proba(X), columns=classes)
], axis=1))

Intercepts
[array([-1.52364878, -1.91708386]), array([24.7222578])]
Weights
[array([[ 8.83423855,  7.13032007],
       [ 0.9117601 , -8.91296123]]), array([[-11.17612215],
       [-46.98226295]])]
        AMZN_YR_WK  Department                      VendorName  \
0            11828          85  KODAK ALARIS OPERATIONS CANADA   
1            11828          85  KODAK ALARIS OPERATIONS CANADA   
2            11828          85  KODAK ALARIS OPERATIONS CANADA   
3            11828          72  APPLE CANADA INC                 
4            11828          98  TREASURE MILLS INC.              
...            ...         ...                             ...   
196793       11849          72  LEXAR INTERNATIONAL              
196794       11849          72  LEXAR INTERNATIONAL              
196795       11849           6  LEXAR INTERNATIONAL              
196796       11849          16  JAY TRENDS MERCHANDISING INC.    
196797       11849          16  JAY TRENDS MERCHANDISING INC.    

        Pu

In [54]:
# training performance
classificationSummary(train_y, clf.predict(train_X))

# validation performance
classificationSummary(valid_y, clf.predict(valid_X))

Confusion Matrix (Accuracy 1.0000)

       Prediction
Actual     0     1
     0 74733     0
     1     0 43345
Confusion Matrix (Accuracy 1.0000)

       Prediction
Actual     0     1
     0 49942     1
     1     0 28777
