# Product Classification

## Evaluate your model


In [12]:

import os
os.chdir("/home/app/src/utils")
from sklearn.metrics import accuracy_score, classification_report
from utils import evaluation
from utils import utils
from utils.utils_img import decoder

from utils import efficientnet
from utils import resnet_50
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
from utils.build_df import build_df
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from utils import tree_utils
from sklearn.metrics import top_k_accuracy_score
from sklearn.base import BaseEstimator, TransformerMixin
from joblib import dump, load
import os
os.chdir("/home/app/src/")

## Experiment settings

Set here the two variables in the following way:

- **CONFIG_YML:** assign the path to the config.yml file used for the experiment you want to evaluate
- **WEIGHTS:** assign the path to the model weights (.h5 file) you want to evaluate


In [2]:
# Modify this

CONFIG_YML = "/home/app/src/model_training/CV_models/exp4/exp4.yml"
#3.5181.h5   /home/gianniif/ecommerce-predictor/experiments/exp4/model.06-2.0593.h5
WEIGHTS = "/home/app/src/model/model.06-2.0593.h5"


In [3]:
# (!) Don't touch this cell, will load the config file specified before
config = utils.load_config(CONFIG_YML)
config

{'seed': 123,
 'data': {'directory': '/home/app/src/data_splitted/train',
  'labels': 'inferred',
  'label_mode': 'categorical',
  'validation_split': 0.2,
  'image_size': [224, 224],
  'batch_size': 32},
 'model': {'weights': 'imagenet',
  'input_shape': [224, 224, 3],
  'classes': 213,
  'dropout_rate': 0.2,
  'data_aug_layer': {'random_flip': {'mode': 'horizontal'}}},
 'compile': {'optimizer': {'adam': {'learning_rate': 0.001}},
  'loss': 'categorical_crossentropy',
  'metrics': ['accuracy']},
 'fit': {'epochs': 150,
  'callbacks': {'model_checkpoint': {'filepath': '/home/app/src/experiments/exp4/model.{epoch:02d}-{val_loss:.4f}.h5',
    'save_best_only': True},
   'tensor_board': {'log_dir': '/home/app/src/experiments/exp4/logs'}}}}

In [4]:
# (!) Don't touch this cell, will use the config file to infer the class names
#     and also to load the corresponding testing dataset.
#     If you get an error, you must review your data/code.

MODEL_CLASSES = utils.get_class_names(config)

if len(MODEL_CLASSES) != config['model']['classes']:
    raise ValueError(
        "Number of classes doesn't match between your model "
        "and your data!"
    )

_dirname, _ = os.path.split(config['data']['directory'])
TEST_FOLDER = os.path.join(_dirname, 'test')

if not os.path.exists(TEST_FOLDER):
    raise ValueError("'test' folder not found!")
    
if len(os.listdir(TEST_FOLDER)) != config['model']['classes']:
    raise ValueError(
        "Number of classes doesn't match between your model "
        "and your testing dataset!"
    )
    
if set(os.listdir(TEST_FOLDER)) != set(MODEL_CLASSES):
    raise ValueError(
        "The name of the subfolders inside your test set "
        "doesn't match with the model classes!"
    )

## Load your model

Use `efficietnet.create_model()` and remember to properly setup the model weights!

Assign the model to the variable `cnn_model`.


In [6]:
# Complete this
# TODO
cnn_model = efficientnet.create_model(weights=WEIGHTS)

# It should print your model correctly
print(cnn_model.summary())

2022-12-20 02:31:24.508556: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-20 02:31:24.517332: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-20 02:31:24.518172: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-20 02:31:24.520194: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 sequential (Sequential)     (None, 224, 224, 3)       0         
                                                                 
 efficientnetv2-b0 (Function  (None, 1280)             5919312   
 al)                                                             
                                                                 
 dropout (Dropout)           (None, 1280)              0         
                                                                 
 dense (Dense)               (None, 213)               272853    
                                                                 
Total params: 6,192,165
Trainable params: 6,131,557
Non-trainable params: 60,608
______________________________________________

## Get predictions from testing dataset

In [7]:
# (!) Don't touch this cell, will use the loaded model and
#     the function utils.predict_from_folder() to get 
#     model predictions and the corresponding true labels
#     so we can measure the accuracy
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

predictions, labels, probs = utils_img.predict_from_folder(
    folder=TEST_FOLDER, 
    model=cnn_model, 
    input_size=config["data"]["image_size"], 
    class_names=MODEL_CLASSES,
)

if len(predictions) != len(labels):
    raise ValueError(
        "The lenght of predictions and labels lists doesn't match!"
    )

if not isinstance(predictions[0], str):
    raise ValueError(
        "Model predictions should be represented as string. E.g: 'Acura RL Sedan 2012'"
    )

if not isinstance(labels[0], str):
    raise ValueError(
        "Ground true labels should be represented as string. E.g: 'Acura RL Sedan 2012'"
    )


2022-12-20 02:31:36.420593: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8200
2022-12-20 02:31:36.709582: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-12-20 02:31:36.710115: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-12-20 02:31:36.710168: W tensorflow/stream_executor/gpu/asm_compiler.cc:80] Couldn't get ptxas version string: INTERNAL: Couldn't invoke ptxas --version
2022-12-20 02:31:36.710818: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-12-20 02:31:36.710955: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] INTERNAL: Failed to launch ptxas
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.


In [8]:
probs= np.array(probs).reshape(-1, 213)


In [9]:
# (!) Don't touch this cell

print(classification_report(y_true=labels, y_pred=predictions))


                    precision    recall  f1-score   support

      abcat0100000       0.00      0.00      0.00        21
      abcat0101000       0.52      0.63      0.57        19
      abcat0101001       0.78      0.56      0.65        25
      abcat0106004       0.38      0.75      0.50        44
      abcat0106010       0.19      0.42      0.26        24
      abcat0107000       0.35      0.27      0.31        41
      abcat0107015       0.48      0.55      0.51        78
      abcat0200000       0.44      0.50      0.47        22
      abcat0204000       0.44      0.43      0.44        37
      abcat0205000       1.00      0.14      0.24        59
      abcat0205001       0.12      0.21      0.15        24
      abcat0205006       0.58      0.58      0.58        31
      abcat0205007       0.46      0.65      0.54        20
      abcat0207000       0.52      0.69      0.59        70
      abcat0208007       0.17      0.04      0.06        28
      abcat0208015       0.22      0.12

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [13]:
cat = build_df(json_path='/home/app/src/data/products.json', 
             threshold=100, 
             preprocessed_csv='/home/app/src/data/normalized_data.csv'
            ) 

y = cat['leaf']

tree_dict = tree_utils.make_tree(cat, cat['category'], 'Categories', display_tree= True)

Categories
├── pcmcat312300050015
│   ├── pcmcat248700050021
│   │   ├── pcmcat303600050001
│   │   └── pcmcat179100050006
│   │       ├── pcmcat179200050003
│   │       ├── pcmcat179200050008
│   │       │   └── pcmcat748300322875
│   │       └── pcmcat179200050013
│   ├── abcat0802000
│   │   ├── abcat0811011
│   │   └── abcat0802001
│   │       └── pcmcat159300050002
│   ├── abcat0805000
│   │   └── abcat0511001
│   │       └── pcmcat266500050030
│   ├── pcmcat275600050000
│   │   └── abcat0807000
│   │       ├── abcat0807001
│   │       ├── pcmcat335400050008
│   │       └── abcat0807009
│   ├── abcat0809000
│   │   ├── abcat0809004
│   │   └── abcat0809002
│   ├── pcmcat249700050006
│   │   ├── pcmcat219100050010
│   │   ├── pcmcat286300050020
│   │   └── pcmcat272800050000
│   ├── pcmcat254000050002
│   │   └── pcmcat308100050020
│   │       └── pcmcat340500050007
│   └── pcmcat341100050005
│       └── pcmcat253700050018
│           └── pcmcat248300050003
├── other
├── abcat03000

In [14]:
evaluation.get_performance(model=cnn_model,
                           pred_labels=predictions, 
                           true_labels=labels,
                           probs=probs,
                           average='micro',
                           tree= tree_dict,
                           vectorizer=None)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Model Performance metrics:
------------------------------
Accuracy: 0.6102338596841895
Precision: 0.6102338596841895
Recall: 0.6102338596841895
F1 Score: 0.6102338596841895
Average distance between nodes categories: 1.0107935238856687
Top 5 Score: 0.8203078153108135

Model Classification report:
------------------------------
                                           precision    recall  f1-score   support

                      3D Printer Filament       0.94      0.96      0.95        47
                  A/V Cables & Connectors       0.48      0.55      0.51        78
                  Action Camcorder Mounts       0.29      0.14      0.19        28
           Activity Trackers & Pedometers       0.38      0.67      0.48        36
              Adapters, Cables & Chargers       0.52      0.71      0.60        68
                         Air Conditioners       0.75      0.67      0.71        27
             Air Purifier Filters & Parts       0.58      0.33      0.42        21
       

In [15]:
# (!) Don't touch this cell

acc = accuracy_score(y_true=labels, y_pred=predictions)

print(f"Your model accuracy is {acc:.4f}!")

if acc < .3:
    raise ValueError("Your model accuracy is too low :(\nYou can do it better! :)")


Your model accuracy is 0.6102!
