In [15]:
import pandas as pd 
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense , BatchNormalization , LeakyReLU
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.callbacks import ModelCheckpoint ,TensorBoard
import datetime 
from pickle import load
import pdb
from sklearn.metrics import recall_score

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [18]:
def final_fun_1(X) :
    ''' Takes pandas data frame as input and predict if the order is Back order or not'''
    impute_leadtime = np.load('/content/drive/MyDrive/Colab Notebooks/Assignments/Self Case study -1/impute_leadtime.npy')
    X['lead_time'] = X['lead_time'].fillna(impute_leadtime)  #1st preprocessing
    #Replacing the boolean columns which has Yes with 1 and No with 0 
    for col in ['potential_issue','deck_risk', 'oe_constraint','ppap_risk', 'stop_auto_buy', 'rev_stop']:
        X[col] = X[col].map({'Yes': 1 , 'No' : 0})  
        
    if 'sku' in X.columns :
        X = X.drop(columns=['sku'])
    if 'went_on_backorder' in X.columns :
        X = X.drop(columns=['went_on_backorder'])
        
# The Custom nine fields     
    X['net_quantity'] = X.apply(lambda row: row.national_inv +  row.in_transit_qty , axis = 1)
    X['safe_quantity'] = X.apply(lambda row: row.net_quantity -  row.min_bank , axis = 1)
    X['safe_quantity_pos'] = np.where(X['safe_quantity'] >= 0, 1, 0)
    X['max_fore_cast_1_month'] = X.apply(lambda row: max( (row.forecast_9_month - row.forecast_6_month) /3, (row.forecast_6_month - row.forecast_3_month) /3 ) , axis = 1)
    X['min_fore_cast_1_month'] = X.apply(lambda row: min( (row.forecast_9_month - row.forecast_6_month) /3, (row.forecast_6_month - row.forecast_3_month) /3 ) , axis = 1)
    X['safe_max_diff'] = X.apply(lambda row: row.safe_quantity - row.max_fore_cast_1_month, axis = 1)
    X['safe_min_diff'] = X.apply(lambda row: row.safe_quantity - row.min_fore_cast_1_month, axis = 1)
    X['safe_max_diff_pos'] = np.where(X['safe_max_diff'] >= 0, 1, 0)
    X['safe_min_diff_pos'] = np.where(X['safe_min_diff'] >= 0, 1, 0) 
    
    scaled_X = X.copy(deep = True)
    scaler = load((open('/content/drive/MyDrive/Colab Notebooks/Assignments/Self Case study -1/scaler_new.pkl','rb')))
    col = ['national_inv', 'lead_time', 'in_transit_qty', 'forecast_3_month', 'forecast_6_month', 'forecast_9_month', 'sales_1_month', 'sales_3_month', 'sales_6_month', 'sales_9_month', 'min_bank', 'pieces_past_due', 'perf_6_month_avg', 'perf_12_month_avg', 'local_bo_qty', 'net_quantity', 'safe_quantity', 'max_fore_cast_1_month', 'min_fore_cast_1_month', 'safe_max_diff', 'safe_min_diff']
    features = scaled_X[col]
    features = scaler.transform(features.values)
    scaled_X[col] = features   
    #auto encoder features 
    tf.keras.backend.clear_session()
    # Building the Input Layer
    input_layer = Input(shape =(30,))

    # Building Encoder layer
    encoded = Dense(25)(input_layer)
    encoded = BatchNormalization()(encoded)
    encoded = LeakyReLU()(encoded)

    encoded = Dense(20)(encoded)
    encoded = BatchNormalization()(encoded)
    encoded = LeakyReLU()(encoded)

    encoded = Dense(15)(encoded)
    encoded = BatchNormalization()(encoded)
    encoded = LeakyReLU()(encoded)

    encoded = Dense(10)(encoded)
    encoded = BatchNormalization()(encoded)
    encoded = LeakyReLU()(encoded)

    # Building Decoder layer
    decoded = Dense(15)(encoded)
    decoded = BatchNormalization()(decoded)
    decoded = LeakyReLU()(decoded)

    decoded = Dense(20)(decoded)
    decoded = BatchNormalization()(decoded)
    decoded = LeakyReLU()(decoded)

    decoded = Dense(25)(decoded)
    decoded = BatchNormalization()(decoded)
    decoded = LeakyReLU()(decoded)

    # Building Output Layer
    output_layer = Dense(30, activation ='relu')(decoded)

    autoencoder = Model(input_layer, output_layer)
    autoencoder.load_weights('/content/drive/MyDrive/Colab Notebooks/Assignments/Self Case study -1/autoencoder_save/weights.hdf5') 

    encode = Sequential()
    encode.add(autoencoder.layers[0])
    encode.add(autoencoder.layers[1])
    encode.add(autoencoder.layers[2])
    encode.add(autoencoder.layers[3])
    encode.add(autoencoder.layers[4])
    encode.add(autoencoder.layers[5])
    encode.add(autoencoder.layers[6])
    encode.add(autoencoder.layers[7])
    encode.add(autoencoder.layers[8])
    encode.add(autoencoder.layers[9])
    encode.add(autoencoder.layers[10])
    encode.add(autoencoder.layers[11])
    encode.add(autoencoder.layers[12])    
    
    auto_encode = encode.predict(scaled_X)
    auto_encode_columns = ['auto_encode_'+str(i) for i in range(1,11)]
    auto_encode = pd.DataFrame(data = auto_encode , columns = auto_encode_columns )

    auto_encode = pd.concat([X.reset_index(drop=True), auto_encode.reset_index(drop=True)], axis=1) 
    
    final_model = load(open('/content/drive/MyDrive/Colab Notebooks/Assignments/Self Case study -1/final_rf_model.sav', 'rb'))
    return final_model.predict(auto_encode)

In [None]:
test_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Assignments/Self Case study -1/Kaggle_Test_Dataset_v2.csv')
test_data = test_data.iloc[:-1,:] #last row is invalid
# test_data = test_data.drop(columns=['Unnamed: 0'])  
final_fun_1(test_data)

In [31]:
def final_fun_2(X,y) :
    '''Takes X and y as input and return the recall value for backorder'''
    predict_y = final_fun_1(X)
    return(round(recall_score(y,predict_y),3))

In [32]:
test_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Assignments/Self Case study -1/Kaggle_Test_Dataset_v2.csv')
test_data = test_data.iloc[:-1,:] #last row is invalid
test_data['went_on_backorder'] = test_data['went_on_backorder'].map({'Yes': 1 , 'No' : 0}) 
print(final_fun_2(test_data,test_data.went_on_backorder))

  interactivity=interactivity, compiler=compiler, result=result)


0.844
