<a href="https://colab.research.google.com/github/sheensta/retail_products_ensemble_deep_learning/blob/main/model_evaluations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import f1_score

from scipy import stats

import keras
from keras.models import load_model
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical

import joblib

In [None]:
#Load data
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/df_NLP.csv')
df['array'] = list(np.load('/content/drive/MyDrive/Colab Notebooks/np_img_array.npy'))

In [None]:
#Label encoding
le = LabelEncoder()
le.fit(df['categories'])
y = to_categorical(list(le.transform(df['categories'])))

In [None]:
#Preprocessing for image models (default)
X_img = list(df['array'])
X_train_img, X_test_img, y_train, y_test = train_test_split(X_img, y, test_size=0.3, random_state=42)
X_train_img = np.array(X_test_img)
X_test_img = np.array(X_test_img)

#Preprocessing for VGG16
X_train_vgg = keras.applications.vgg16.preprocess_input(X_train_img)
X_test_vgg = keras.applications.vgg16.preprocess_input(X_test_img)

#Preprocessing for ResNet50
X_train_resnet = keras.applications.resnet50.preprocess_input(X_train_img)
X_test_resnet = keras.applications.resnet50.preprocess_input(X_test_img)

In [None]:
#Preprocessing for NLP models with w2v
X_nlp = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/w2v_feature_array_evaluation.csv')
#X_nlp = df_nlp.loc[:,0:100]
#y_nlp = df_nlp[]
X_train_nlp, X_test_nlp, y_train, y_test = train_test_split(X_nlp, y, test_size=0.3, random_state=42)

In [None]:
XGB_features = ['f0', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f20', 'f21', 'f22', 'f23', 'f24', 'f25', 'f26', 'f27', 'f28', 'f29', 'f30', 'f31', 'f32', 'f33', 'f34', 'f35', 'f36', 'f37', 'f38', 'f39', 'f40', 'f41', 'f42', 'f43', 'f44', 'f45', 'f46', 'f47', 'f48', 'f49', 'f50', 'f51', 'f52', 'f53', 'f54', 'f55', 'f56', 'f57', 'f58', 'f59', 'f60', 'f61', 'f62', 'f63', 'f64', 'f65', 'f66', 'f67', 'f68', 'f69', 'f70', 'f71', 'f72', 'f73', 'f74', 'f75', 'f76', 'f77', 'f78', 'f79', 'f80', 'f81', 'f82', 'f83', 'f84', 'f85', 'f86', 'f87', 'f88', 'f89', 'f90', 'f91', 'f92', 'f93', 'f94', 'f95', 'f96', 'f97', 'f98', 'f99']

In [None]:
#Preprocessing for NLP deep learning models
X_nlp_dl = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/df_NLP.csv')
X_nlp_dl['description_clean'] = X_nlp_dl['description_clean'].astype(str)
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X_nlp_dl['description_clean'])

X_train_nlp_dl, X_test_nlp_dl, y_train, y_test = train_test_split(X_nlp_dl, y, test_size=0.3, random_state=42)
X_train_nlp_dl = tokenizer.texts_to_sequences(X_train_nlp_dl['description_clean'])
X_test_nlp_dl = tokenizer.texts_to_sequences(X_test_nlp_dl['description_clean'])

maxlen = 250
X_train_nlp_dl = pad_sequences(X_train_nlp_dl, padding='post', maxlen=maxlen)
X_test_nlp_dl = pad_sequences(X_test_nlp_dl, padding='post', maxlen=maxlen)

Image classification models

In [None]:
#Baseline model (augmentation)
model1 = load_model('/content/drive/MyDrive/models/baseline_model_with_augmentation.h5')
#Baseline model architecture 2(augmentation)
model2 = load_model('/content/drive/MyDrive/models/CV_architecture2.h5')
#VGG16 model (augmentation)
VGG16 = load_model('/content/drive/MyDrive/models/VGG_transfer_with_augmentation.h5')
#ResNet50 model (augmentation)
ResNet50 = load_model('/content/drive/MyDrive/models/resnet50_transfer_aug_15epochs.h5')

NLP models

In [None]:
#RandomForest
NLP_RF = joblib.load('/content/drive/MyDrive/models/NLP_rf.pkl')
#XGBoost
NLP_XGB = joblib.load('/content/drive/MyDrive/models/NLP_XGB.pkl')
#Architecture 1, custom embedding
NLP_model1 = load_model('/content/drive/MyDrive/models/NLP_custom_trainedsimple DL.h5')
#Architecture 1, GloVe embedding
NLP_model_glove = load_model('/content/drive/MyDrive/models/NLP_GloVeEmbedding.h5')
#CNN, custom embedding
NLP_CNN = load_model('/content/drive/MyDrive/models/NLP_custom_CNN.h5')
#CNN, GloVe embedding
NLP_CNN_glove = load_model('/content/drive/MyDrive/models/NLP_GloVe_CNN.h5')

Get predictions from each model

In [None]:
#Image predictions
y_preds1 = model1.predict(X_test_img)
y_preds2 = model2.predict(X_test_img)
y_vgg = VGG16.predict(X_test_vgg)
y_resnet = ResNet50.predict(X_test_resnet)

In [None]:
#Text predictions - classic ML
#y_preds_rf = NLP_RF.predict(X_test_nlp)
y_preds_rf = np.array(pd.read_csv('/content/drive/MyDrive/Colab Notebooks/y_preds_rf.csv'))
#y_preds_XGB = NLP_XGB.predict_proba(X_test_nlp.rename(columns=dict(zip(X_test_nlp.columns, XGB_features))))
y_preds_XGB = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/y_preds_xgb.csv')
y_preds_XGB = np.array(y_preds_XGB.drop(columns='Unnamed: 0'))

In [None]:
#Text predictions - Deep Learning
y_NLP1 = NLP_model1.predict(X_test_nlp_dl)
y_NLP1_glove = NLP_model_glove.predict(X_test_nlp_dl)
y_CNN = NLP_CNN.predict(X_test_nlp_dl)
y_CNN_glove = NLP_CNN_glove.predict(X_test_nlp_dl)

Calculate overall accuracy, F1-score

In [None]:
#decode predictions
def max_prediction(prediction):
  predictions_list = []
  for i in range(len(prediction)):
    predictions_list.append(np.argmax(prediction[i]))
  return predictions_list

def decode_predictions_f1(predictions_list):
  dict_prediction = dict(zip(le.classes_,f1_score(y_test, max_prediction(predictions_list), average = None)))
  return dict_prediction

def decode_f1_score(prediction_list):
  weighted_f1_score = f1_score(y_test, max_prediction(prediction_list), average = 'weighted')
  return weighted_f1_score

def decode_predictions_f1_mode(predictions_list):
  dict_prediction = dict(zip(le.classes_,f1_score(y_test, predictions_list, average = None)))
  return dict_prediction

def decode_f1_score_mode(prediction_list):
  weighted_f1_score = f1_score(y_test, prediction_list, average = 'weighted')
  return weighted_f1_score

In [None]:
img_models_list = [model1, model2, VGG16, ResNet50]
NLP_models_list = [NLP_RF, NLP_XGB, NLP_model1, NLP_model_glove, NLP_CNN, NLP_CNN_glove]
img_predictions_list = [y_preds1, y_preds2, y_vgg, y_resnet]
NLP_predictions_list = [y_preds_rf, y_preds_XGB, y_NLP1, y_NLP1_glove, y_CNN, y_CNN_glove]

In [None]:
f1_score_imgs = [list(decode_predictions_f1(x).values()) for x in img_predictions_list]
f1_score_NLP = [list(decode_predictions_f1(x).values()) for x in NLP_predictions_list]
f1_scores = f1_score_imgs + f1_score_NLP

In [None]:
def highlight_max(data, color='yellow'):
    '''
    highlight the maximum in a Series or DataFrame
    '''
    attr = 'background-color: {}'.format(color)
    #remove % and cast to float
    data = data.replace('%','', regex=True).astype(float)
    if data.ndim == 1:  # Series from .apply(axis=0) or axis=1
        is_max = data == data.max()
        return [attr if v else '' for v in is_max]
    else:  # from .apply(axis=None)
        is_max = data == data.max().max()
        return pd.DataFrame(np.where(is_max, attr, ''),
                            index=data.index, columns=data.columns)

In [None]:
#create dataframe
df_colnames = ['CV_model1','CV_model2',' CV_VGG16','CV_ResNet50', 'NLP_RF', 'NLP_XGB', 'NLP_model1', 'NLP_model_glove', 'NLP_CNN', 'NLP_CNN_glove']
df2 = pd.DataFrame(data = np.transpose(np.array(f1_scores)), columns = df_colnames)
df2.index = le.classes_
f1_score_weighted = [decode_f1_score(x) for x in img_predictions_list + NLP_predictions_list]
df3 = pd.DataFrame(data = (f1_score_weighted)).transpose()
df3.columns = df_colnames
df3.index = ['Overall_weighted_f1']
df_concat = pd.concat([df2, df3])
#df_concat.to_csv('df_concat.csv')
df_style = df_concat.style.apply(highlight_max, axis = 1)
df_style = df_style.format("{:,.3f}")
df_style

Unnamed: 0,CV_model1,CV_model2,CV_VGG16,CV_ResNet50,NLP_RF,NLP_XGB,NLP_model1,NLP_model_glove,NLP_CNN,NLP_CNN_glove
All Beauty,0.006,0.109,0.349,0.35,0.612,0.526,0.605,0.504,0.57,0.503
All Electronics,0.003,0.09,0.165,0.217,0.534,0.458,0.56,0.466,0.552,0.495
Appliances,0.136,0.266,0.309,0.376,0.817,0.794,0.853,0.813,0.833,0.749
"Arts, Crafts & Sewing",0.016,0.112,0.183,0.267,0.715,0.674,0.754,0.697,0.757,0.641
Automotive,0.155,0.221,0.276,0.376,0.649,0.606,0.673,0.603,0.612,0.553
Baby,0.149,0.295,0.394,0.481,0.719,0.666,0.728,0.656,0.708,0.607
Baby Products,0.132,0.197,0.218,0.319,0.61,0.528,0.625,0.553,0.645,0.525
Beauty,0.01,0.278,0.153,0.325,0.606,0.524,0.596,0.508,0.608,0.5
Cell Phones & Accessories,0.202,0.276,0.513,0.555,0.855,0.802,0.854,0.819,0.862,0.83
"Clothing, Shoes & Jewelry",0.217,0.388,0.471,0.531,0.73,0.664,0.758,0.682,0.726,0.651


Ensemble modeling

In [None]:
def ensemble_voting(list_predictions):
  preds_list_mode = [max_prediction(x) for x in list_predictions]
  mode_array = stats.mode(np.array(preds_list_mode))
  return mode_array

In [None]:
#Based on simple average
preds_list_avg = [y_NLP1 + y_CNN + y_preds_rf + y_resnet + y_preds_XGB]
n_list = len(preds_list_avg)
ensemble_mean_f1 = decode_f1_score(sum(preds_list_avg)/n_list)
ensemble_mean_f1_categories = decode_predictions_f1(sum(preds_list_avg)/n_list)

#Based on mode (3 or more only)
f1_dict = dict(zip(df_colnames, f1_score_weighted))
preds_list_mode = [y_NLP1, y_CNN, y_preds_rf, y_resnet]
a = ensemble_voting(preds_list_mode)
ensemble_mode_f1 = decode_f1_score_mode(list(a[0][0]))
ensemble_mode_f1_categories = decode_predictions_f1_mode(list(a[0][0]))

ValueError: ignored

In [None]:
df2['ensemble_mean'] = list(ensemble_mean_f1_categories.values())
df2['ensemble_mode'] = list(ensemble_mode_f1_categories.values())
df3['ensemble_mean'] = ensemble_mean_f1
df3['ensemble_mode'] = ensemble_mode_f1 
df_concat = pd.concat([df2, df3])
df_style = df_concat.style.apply(highlight_max, axis = 1)
df_style = df_style.format("{:,.3f}")
df_style

In [None]:
df_concat.to_csv('/content/drive/MyDrive/Colab Notebooks/final_models_f1_scores.csv')

In [None]:
#decode predictions full modles
def decode_predictions_f1_full(predictions_list):
  dict_prediction = dict(zip(le.classes_,f1_score(y_train, max_prediction(predictions_list), average = None)))
  return dict_prediction

def decode_f1_score_full(prediction_list):
  weighted_f1_score = f1_score(y_train, max_prediction(prediction_list), average = 'weighted')
  return weighted_f1_score

In [None]:
# using full models
FULL_NLP1 = load_model('/content/drive/MyDrive/models/full_models/FULL_NLP1.h5')
FULL_NLP_CNN = load_model('/content/drive/MyDrive/models/full_models/FULL_NLP_CNN.h5')
FULL_NLP_rf = joblib.load('/content/drive/MyDrive/models/full_models/FULL_NLP_rf.pkl')
FULL_NLP_xgb = joblib.load('/content/drive/MyDrive/models/full_models/FULL_NLP_xgb.pkl')
FULL_resnet = load_model('/content/drive/MyDrive/models/full_models/FULL_resnet.h5')

In [None]:
#Image predictions
y_preds1_img = FULL_resnet.predict(X_test_img)
decode_predictions_f1(y_preds1_img)
decode_f1_score(y_preds1_img)

In [None]:
y_full_CNN = FULL_NLP_CNN.predict(X_train_nlp_dl)
decode_predictions_f1_full(y_full_CNN)
decode_f1_score_full(y_full_CNN)

In [None]:
y_full_1 = FULL_NLP1.predict(X_train_nlp_dl)
decode_predictions_f1_full(y_full_1)
#decode_f1_score_full(y_full_1)

In [None]:
mapping = dict(zip(le.classes_, range(len(le.classes_))))
mapping