In [1]:
nn_model_id = 'retrained_sherlock7'

In [2]:
import pandas as pd
import numpy as np

from sherlock.deploy.train_sherlock import train_sherlock
from sherlock.deploy.predict_sherlock import predict_sherlock
from datetime import datetime
from sklearn.metrics import f1_score

### Load training and validation sets

In [3]:
start = datetime.now()
print(f'Started at {start}')

X_train = pd.read_parquet('train.parquet')
y_train = pd.read_parquet('../data/raw/train_labels.parquet').values.flatten()

print(f'Load data (train) process took {datetime.now() - start} seconds.')

Started at 2021-01-02 11:00:09.972298
Load data (train) process took 0:00:03.346182 seconds.


In [4]:
print('Distinct types for columns in the Dataframe (should be all float32):')
print(set(X_train.dtypes))

Distinct types for columns in the Dataframe (should be all float32):
{dtype('float32')}


In [5]:
start = datetime.now()
print(f'Started at {start}')

X_validation = pd.read_parquet('validation.parquet')
y_validation = pd.read_parquet('../data/raw/val_labels.parquet').values.flatten()

print(f'Load data (validation) process took {datetime.now() - start} seconds.')

Started at 2021-01-02 11:00:13.330179
Load data (validation) process took 0:00:01.307804 seconds.


### Re-train Sherlock

In [6]:
start = datetime.now()
print(f'Started at {start}')

train_sherlock(X_train, y_train, X_validation, y_validation, nn_id=nn_model_id);

print('Trained and saved new model.')
print(f'Finished at {datetime.now()}, took {datetime.now() - start} seconds')

Started at 2021-01-02 11:00:14.643737


W0102 11:00:14.845481 4733758912 deprecation.py:506] From /Users/lowecg/source/private-github/sherlock-project/venv/lib/python3.7/site-packages/tensorflow_core/python/ops/init_ops.py:97: calling Zeros.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0102 11:00:14.846883 4733758912 deprecation.py:506] From /Users/lowecg/source/private-github/sherlock-project/venv/lib/python3.7/site-packages/tensorflow_core/python/ops/init_ops.py:97: calling Ones.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0102 11:00:14.851596 4733758912 deprecation.py:506] From /Users/lowecg/source/private-github/sherlock-project/venv/lib/python3.7/site-pa

Successfully loaded and compiled model, now fitting model on data.
Train on 412059 samples, validate on 137353 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100


Epoch 47/100
Retrained Sherlock.
Trained and saved new model.
Finished at 2021-01-02 11:50:23.381405, took 0:50:08.737683 seconds


### Load test set

In [7]:
start = datetime.now()
print(f'Started at {start}')

X_test = pd.read_parquet('test.parquet')
y_test = pd.read_parquet('../data/raw/test_labels.parquet').values.flatten()

print('Trained and saved new model.')
print(f'Finished at {datetime.now()}, took {datetime.now() - start} seconds')

Started at 2021-01-02 11:50:23.386788
Trained and saved new model.
Finished at 2021-01-02 11:50:25.281776, took 0:00:01.894999 seconds


### Make prediction

In [8]:
predicted_labels = predict_sherlock(X_test, nn_id=nn_model_id)

In [9]:
print(f'prediction count {len(predicted_labels)}, type = {type(predicted_labels)}')

size=len(y_test)

# Should be fully deterministic too.
f1_score(y_test[:size], predicted_labels[:size], average="weighted")

prediction count 137353, type = <class 'numpy.ndarray'>


0.8941992181577104

In [10]:
# Best results
# 0.8946297512938394

# LAST
# 0.8941992181577104
# 0.8661276466591463
# 0.8765360861865846
# 0.8777825108666634
# 0.8709458033528152

## Review errors

In [11]:
from collections import Counter

size = len(y_test)
mismatches = list()

for idx, k1 in enumerate(y_test[:size]):
    k2 = predicted_labels[idx]

    if k1 != k2:
        mismatches.append(k1)
#        if k1 in ('brand'):
#        print(f'[{idx}] expected "{k1}" but predicted "{k2}"')
        
f1 = f1_score(y_test[:size], predicted_labels[:size], average="weighted")
print(f'Total mismatches: {len(mismatches)} (F1 score: {f1})')

data = Counter(mismatches)
data.most_common()   # Returns all unique items and their counts

Total mismatches: 14497 (F1 score: 0.8941992181577104)


[('name', 788),
 ('rank', 729),
 ('position', 548),
 ('location', 518),
 ('region', 484),
 ('description', 464),
 ('artist', 419),
 ('team', 406),
 ('area', 371),
 ('notes', 357),
 ('category', 337),
 ('type', 333),
 ('album', 314),
 ('product', 310),
 ('company', 293),
 ('gender', 291),
 ('ranking', 287),
 ('city', 283),
 ('code', 275),
 ('team Name', 275),
 ('day', 270),
 ('class', 237),
 ('owner', 202),
 ('person', 201),
 ('country', 200),
 ('status', 198),
 ('year', 188),
 ('credit', 181),
 ('manufacturer', 180),
 ('brand', 180),
 ('result', 178),
 ('sex', 171),
 ('service', 171),
 ('order', 166),
 ('weight', 166),
 ('duration', 162),
 ('address', 162),
 ('plays', 161),
 ('sales', 157),
 ('format', 148),
 ('component', 147),
 ('age', 143),
 ('range', 142),
 ('origin', 139),
 ('club', 133),
 ('state', 124),
 ('county', 118),
 ('nationality', 108),
 ('publisher', 106),
 ('director', 105),
 ('capacity', 104),
 ('symbol', 102),
 ('command', 91),
 ('affiliation', 90),
 ('family', 84),
 

In [12]:
test_samples = pd.read_parquet('../data/raw/test_values.parquet')

In [13]:
from ast import literal_eval

idx = 541
original = test_samples.iloc[idx]
converted = original.apply(literal_eval).to_list()

print(f'Predicted "{predicted_labels[idx]}", actual label "{y_test[idx]}". Actual values:\n{converted}')

Predicted "age", actual label "position". Actual values:
[[2, 4]]
