In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle as pickle
import tensorflow as tf
from tensorflow import keras
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

In [2]:
#Set number of predictions to make
n = 25

In [3]:
#load in needed column info
with open('BinColNames.txt', 'r') as f:
    raw_names = f.read()

col_names = raw_names.split("\n")
col_names.pop()
#col_names

''

In [4]:
#Create random continuous input data
import random

def CreateRandList(n=10):
    data = []
    for i in range(n):
        data.append(random.random())
    return(data)

In [5]:
#Create rand cont vars
QTY_TRACKED = CreateRandList(n=n)
MS1_Delta = CreateRandList(n=n)
MS2_Delta = CreateRandList(n=n)
MS3_Delta = CreateRandList(n=n)
PO_Delta = CreateRandList(n=n)
Schd_Rng = CreateRandList(n=n)
SOP_MS1_Delta = CreateRandList(n=n)
SOP_MS2_Delta = CreateRandList(n=n)
RAS_MS1_Delta = CreateRandList(n=n)
RAS_MS2_Delta = CreateRandList(n=n)
ETA_SCP_Delta = CreateRandList(n=n)

In [6]:
#Get rand cat vars
num_countries = 12
num_dests = 8
num_types = 6

def GetRandInts(ub, lb=0, n=10):
    data = []
    for i in range(n):
        val = random.randint(lb, ub-1)
        data.append(val)
    return(data)

country_index = GetRandInts(ub=num_countries, n=n)
dest_index = GetRandInts(ub=num_dests, n=n)
type_index = GetRandInts(ub=num_types, n=n)

In [7]:
raw_data = [QTY_TRACKED, MS1_Delta, MS2_Delta,
            MS3_Delta, PO_Delta, Schd_Rng,
            SOP_MS1_Delta, SOP_MS2_Delta,
            RAS_MS1_Delta, RAS_MS2_Delta,
            ETA_SCP_Delta, country_index,
            dest_index, type_index]

In [8]:
#Format data into proper nparray
def MakeArray(data):
    array = []
    
    CountryDic = {'AU':0, 'BE':1, 'CA':2, 'DE':3, 'GB':4, 'IT':5,
                  'KP':6, 'KR':7, 'NL':8, 'NO':9, 'RU':10, 'US':11}
    
    DestDic = {'ABB':0, 'CHY':1, 'CTC':2, 'FLD':3, 
             'HOU':4, 'MY1':5, 'SH2':6, 'SHP':7}
    
    MtlDic = {'B':0, 'I':1, 'K':2, 'M':3, 'S':4, 'T':5}
    
    for line in range(len(data[0])):
        ####################
        #deal with cont vars
        newline = []
        cont_line = []
        for col_val in range(11):
            cont_line.append(data[col_val][line])
        ###################
        #deal with cat vars
        #
        #deal with country
        country_line = [0]*12
        country_line[(data[11][line])] = 1
        
        #deal with dests
        dest_line = [0]*8
        dest_line[(data[12][line])] = 1
        
        #deal with mtl types
        mtl_line = [0]*6
        mtl_line[(data[13][line])] = 1        
        
        ##############
        #combine lines
        #
        newline = cont_line + country_line + dest_line + mtl_line
        #append line to array
        array.append(newline)
    #convert to np array
    array = np.array(array)
    return(array)

In [9]:
data = MakeArray(raw_data)

In [10]:
data.shape

(25, 37)

In [11]:
#Load in all models

#DT
mypickle = open('DT_full.pickle','rb')
DT_full = pickle.load(mypickle)
mypickle = open('DT_mini.pickle','rb')
DT_mini = pickle.load(mypickle)

#RF
mypickle = open('RF_full.pickle','rb')
RF_full = pickle.load(mypickle)
mypickle = open('RF_mini.pickle','rb')
RF_mini = pickle.load(mypickle)

#SVM
mypickle = open('SVM_full.pickle','rb')
SVM_full = pickle.load(mypickle)
mypickle = open('SVM_mini.pickle','rb')
SVM_mini = pickle.load(mypickle)
    
#Load tensorflow models
NN_full = keras.models.load_model('NN_full.h5')
NN_mini = keras.models.load_model('NN_mini.h5')

#Function to convert TF probability array into 1D prediction array
def Probs2Preds(labels, prob_array):
    preds = []
    for list in prob_array:
        position = 0
        max_prob = 0
        for prob in list:
            if prob > max_prob:
                max_prob = prob
                max_label = labels[position]
            position = position + 1
        preds.append(max_label)
    preds = np.array(preds)
    return(preds)

labels = ['on-time', '1-7dayL', '7-30dayL', '30-90dayL', '>90dayL']

In [12]:
#Make Predictions

#DT predictions
DT_full_preds = DT_full.predict(data)
DT_mini_preds = DT_mini.predict(data)

#RF predicitons
RF_full_preds = RF_full.predict(data)
RF_mini_preds = RF_mini.predict(data)

#SVM predictions
SVM_full_preds = SVM_full.predict(data)
SVM_mini_preds = SVM_mini.predict(data)

#NN predictions
NN_full_preds = NN_full.predict(data)
NN_mini_preds = NN_mini.predict(data)
#convert prob_arrays to list of labels
NN_full_preds = Probs2Preds(labels=labels, prob_array=NN_full_preds)
NN_mini_preds = Probs2Preds(labels=labels, prob_array=NN_mini_preds)


In [13]:
stacked_preds = np.stack((DT_full_preds, DT_mini_preds,
                          RF_full_preds, RF_mini_preds,
                          SVM_full_preds, SVM_mini_preds,
                          NN_full_preds, NN_mini_preds))

In [14]:
def MakeColList(n=10):
    col_list = []
    for i in range (n):
        col = 'prediction {}'.format(i)
        col_list.append(col)
    return(col_list)

col_list = MakeColList(n=n)

In [15]:
preds_index = ['DT full','DT mini', 'RF full', 'RF mini',
               'SVM full', 'SVM mini', 'NN full', 'NN mini']

In [16]:
pred_df = pd.DataFrame(data=stacked_preds, 
                       index=preds_index, 
                       columns=col_list)

In [19]:
display(pred_df)

Unnamed: 0,prediction 0,prediction 1,prediction 2,prediction 3,prediction 4,prediction 5,prediction 6,prediction 7,prediction 8,prediction 9,...,prediction 15,prediction 16,prediction 17,prediction 18,prediction 19,prediction 20,prediction 21,prediction 22,prediction 23,prediction 24
DT full,30-90dayL,>90dayL,>90dayL,30-90dayL,7-30dayL,7-30dayL,30-90dayL,30-90dayL,30-90dayL,>90dayL,...,30-90dayL,>90dayL,30-90dayL,>90dayL,7-30dayL,30-90dayL,>90dayL,on-time,>90dayL,1-7dayL
DT mini,30-90dayL,>90dayL,30-90dayL,7-30dayL,30-90dayL,7-30dayL,30-90dayL,>90dayL,30-90dayL,>90dayL,...,30-90dayL,30-90dayL,30-90dayL,>90dayL,7-30dayL,7-30dayL,30-90dayL,30-90dayL,7-30dayL,30-90dayL
RF full,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,...,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL
RF mini,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,...,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL
SVM full,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,30-90dayL,1-7dayL,>90dayL,...,>90dayL,>90dayL,1-7dayL,>90dayL,>90dayL,>90dayL,>90dayL,1-7dayL,30-90dayL,>90dayL
SVM mini,30-90dayL,>90dayL,>90dayL,30-90dayL,30-90dayL,30-90dayL,30-90dayL,30-90dayL,30-90dayL,>90dayL,...,>90dayL,30-90dayL,>90dayL,>90dayL,30-90dayL,>90dayL,30-90dayL,30-90dayL,>90dayL,>90dayL
NN full,30-90dayL,>90dayL,>90dayL,30-90dayL,30-90dayL,30-90dayL,30-90dayL,on-time,>90dayL,30-90dayL,...,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,30-90dayL,>90dayL,30-90dayL
NN mini,30-90dayL,>90dayL,>90dayL,>90dayL,30-90dayL,30-90dayL,>90dayL,7-30dayL,>90dayL,>90dayL,...,>90dayL,>90dayL,>90dayL,>90dayL,30-90dayL,7-30dayL,>90dayL,7-30dayL,>90dayL,on-time


In [20]:
display(pred_df.describe())

Unnamed: 0,prediction 0,prediction 1,prediction 2,prediction 3,prediction 4,prediction 5,prediction 6,prediction 7,prediction 8,prediction 9,...,prediction 15,prediction 16,prediction 17,prediction 18,prediction 19,prediction 20,prediction 21,prediction 22,prediction 23,prediction 24
count,8,8,8,8,8,8,8,8,8,8,...,8,8,8,8,8,8,8,8,8,8
unique,2,1,2,3,3,3,2,4,3,2,...,2,2,3,1,3,3,2,5,3,4
top,30-90dayL,>90dayL,>90dayL,>90dayL,30-90dayL,30-90dayL,30-90dayL,30-90dayL,>90dayL,>90dayL,...,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,>90dayL,30-90dayL,>90dayL,>90dayL
freq,5,8,7,4,4,3,4,3,4,7,...,6,6,5,8,4,5,6,3,6,4
