# AVG Model

In this notebook, we perform our first exploration of the combination of different models previously trained


In [11]:
import os
os.chdir('/home/app/src')
import time
import joblib

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import top_k_accuracy_score
from sklearn.base import BaseEstimator, TransformerMixin

from xgboost import XGBClassifier

from utils import evaluation
from utils.build_df import build_df
from utils import tree_utils
from utils.text_normalizer import normalize_corpus
from utils.decoder import decode_id_path

from utils.combined_model_class import Combined_Model
from utils import utils_img
from utils import efficientnet

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## 1. Importing Pre-Trained NLP Models (BL0 and BL1)

In [2]:
#opening
#NLP model for title
model_title = joblib.load('/home/app/src/model/model_BL0')
vect_title  = joblib.load('/home/app/src/model/vect_BL0')


#NLP model for description
model_title_desc = joblib.load('/home/app/src/model/model_BL1')
vect_title_desc = joblib.load('/home/app/src/model/vect_BL1')


## 2. Importing Pre-trained Image Model 

In [6]:
CONFIG_YML = "/home/app/src/model/exp4.yml"

TEST_FOLDER = "/home/app/src/uploads/"

WEIGHTS = "/home/app/src/model/model.06-2.0593.h5"

config = utils_img.load_config(CONFIG_YML)

MODEL_CLASSES = model_title.classes_

cnn_model = efficientnet.create_model(weights=WEIGHTS)


predictions, labels, probs = utils_img.predict_from_folder(
    folder=TEST_FOLDER, 
    model=cnn_model, 
    input_size=config["data"]["image_size"], 
    class_names=MODEL_CLASSES,
)

2022-12-26 21:25:13.071600: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-12-26 21:25:13.071650: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-12-26 21:25:13.071679: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (docker-desktop): /proc/driver/nvidia/version does not exist
2022-12-26 21:25:13.073265: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [13]:
decode_id_path(predictions[0])

['Computers & Tablets', 'Computer Cards & Components']

## 3. Create combined model to get predictions

In [21]:
final_model = Combined_Model()

In [18]:
prediction = final_model.predict_best_five(X_list=[name_sample_v, name_descr_sample_v], 
                                           estimators=[model_title, model_title_desc], 
                                           max_k_feat=5)

In [19]:
prediction

{'0': ['Musical Instruments', 'Keyboards'],
 '1': ['Computers & Tablets',
  'Computer Accessories & Peripherals',
  'Mice & Keyboards',
  'Computer Keyboards'],
 '2': ['Musical Instruments', 'Musical Instrument Accessories'],
 '3': ['other'],
 '4': ['Computers & Tablets', 'iPad & Tablet Accessories']}

## 4. Some inputs

In [22]:
name_sample = "Casio - Portable Keyboard with 61 Touch-Sensitive Keys - Black/Silver "
descr_sample = "CASIO Portable Keyboard with 61 Touch-Sensitive Keys: MIDI and USB connectivity; 600 AHL keyboard voices; 180 rhythms; 152 songs; auto accompaniment"
name_descr_sample = name_sample + descr_sample
true_label_sample = 'Keyboards'

In [23]:
name_sample_n = normalize_corpus([name_sample])
name_sample_v= vect_title.transform(name_sample_n)

In [24]:
name_descr_sample_n = normalize_corpus([name_descr_sample])
name_descr_sample_v= vect_title_desc.transform(name_descr_sample_n)

In [29]:
pred_model_name = final_model.predict_best_five([name_sample_v, name_sample_v],[model_title, model_title],5 )
pred_model_name

{'0': ['Computers & Tablets',
  'Computer Accessories & Peripherals',
  'Mice & Keyboards',
  'Computer Keyboards'],
 '1': ['Musical Instruments', 'Keyboards'],
 '2': ['Musical Instruments', 'Musical Instrument Accessories'],
 '3': ['other'],
 '4': ['Computers & Tablets', 'iPad & Tablet Accessories']}

In [30]:
pred_desc_name = final_model.predict_best_five([name_descr_sample_v, name_descr_sample_v],[model_title_desc, model_title_desc],5 )
pred_desc_name

{'0': ['Musical Instruments', 'Keyboards'],
 '1': ['Musical Instruments', 'Musical Instrument Accessories'],
 '2': ['Computers & Tablets',
  'Computer Accessories & Peripherals',
  'Mice & Keyboards',
  'Computer Keyboards'],
 '3': ['other'],
 '4': ['Computers & Tablets',
  'iPad & Tablet Accessories',
  'Cases, Covers & Keyboard Folios']}

In [31]:
pred_nlp_models = final_model.predict_best_five([name_sample_v, name_descr_sample_v],[model_title, model_title_desc],5 )
pred_nlp_models

{'0': ['Musical Instruments', 'Keyboards'],
 '1': ['Computers & Tablets',
  'Computer Accessories & Peripherals',
  'Mice & Keyboards',
  'Computer Keyboards'],
 '2': ['Musical Instruments', 'Musical Instrument Accessories'],
 '3': ['other'],
 '4': ['Computers & Tablets', 'iPad & Tablet Accessories']}

In [32]:
prediction, _, probs = utils_img.predict_from_folder(
    folder=TEST_FOLDER, 
    model=cnn_model, 
    input_size=config["data"]["image_size"], 
    class_names=MODEL_CLASSES,
)

In [52]:
def get_feat_max(cat_prob, max_k_feat, classes):
    """Given a array of predicted probability of classes for one product returns a dictionary with the names of the k classes with the highest probability"""
    most_prob_cat_idx = np.argsort(-cat_prob)[:max_k_feat]
    name_cat_max= []
    
    for idx in most_prob_cat_idx:
      nm_cat = classes[idx]
      name_cat_max.append(nm_cat)

    dict_max_feat = {}
    for items in range(len(name_cat_max)):
        dict_max_feat[str(items+1)] = decode_id_path(name_cat_max[items])

    return dict_max_feat #, name_cat_max

In [59]:
y_pred_img = probs[0][0]

In [60]:
get_feat_max(y_pred_img, 5, model_title.classes_)

{'1': ['Musical Instruments', 'Keyboards'],
 '2': ['Musical Instruments'],
 '3': ['Musical Instruments', 'Recording Equipment'],
 '4': ['other'],
 '5': ['Musical Instruments', 'DJ & Lighting Equipment']}

Super model

In [None]:
CONFIG_YML = "/home/app/src/model/exp4.yml"

TEST_FOLDER = "/home/app/src/uploads/"

WEIGHTS = "/home/app/src/model/model.06-2.0593.h5"

config = utils_img.load_config(CONFIG_YML)

MODEL_CLASSES = model_title.classes_

cnn_model = efficientnet.create_model(weights=WEIGHTS)


# predictions, labels, probs = utils_img.predict_from_folder(
#     folder=TEST_FOLDER, 
#     model=cnn_model, 
#     input_size=config["data"]["image_size"], 
#     class_names=MODEL_CLASSES,
# )

In [2]:
super_model = Combined_Model()

prediction = final_model.predict_best_five(X_list=[name_sample_v, name_descr_sample_v, TEST_FOLDER], 
                                           estimators=[model_title, model_title_desc, cnn_model], 
                                           max_k_feat=5)

NameError: name 'Combined_Model' is not defined

In [65]:
prediction

{'0': ['Musical Instruments', 'Keyboards'],
 '1': ['Computers & Tablets',
  'Computer Accessories & Peripherals',
  'Mice & Keyboards',
  'Computer Keyboards'],
 '2': ['Musical Instruments', 'Musical Instrument Accessories'],
 '3': ['other'],
 '4': ['Computers & Tablets', 'iPad & Tablet Accessories']}