# Practical box size estimation from RFID interrogation traces using transfer learning 

Instrucciones:

- Realizar por cada dataset un entrenamiento probando cual es la precisión alcanzada

In [18]:
# Bibliotecas necesarias
import numpy as np
import random
import os

# Configuramos matplotlib
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

%load_ext tensorboard

# Semilla fija para reproducibilidad
np.random.seed(1)

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [5]:
import pandas
import pickle

def change_distro(X,t,samples):  
    newX = np.zeros(shape=[samples.sum(),X.shape[1]])
    newt = np.zeros(shape=[samples.sum(),])
    numsamples = 0
    for i in range(t.max()+1):
        aux = X[t==i]
        newX[numsamples:numsamples+samples[i]] = aux[:samples[i]]
        newt[numsamples:numsamples+samples[i]] = i
        numsamples += samples[i]
    
    newt = newt.astype(int)
    return newX,newt

def readdataset(id, maxsize=0, debug=False):
    picklefile = f'{id}/data.pickle'

    try:
        with open(picklefile, 'rb') as f:
            X, t = pickle.load(f)
            print(f'Datos ya disponibles de {picklefile}')
    
    except:
        index = pandas.read_csv(f'{id}/index.{id}.txt')
        index["numframes"] = index.framelst-index.frameini+1
        maxframes = index.numframes.max()
        frames = pandas.read_csv(f'{id}/frames.{id}.txt')
    
        if maxsize>0:
            SAMPLES = maxsize
        else:
            SAMPLES = len(index)
            
        if debug:
            print(f'Samples: {SAMPLES}')
            
        X = np.zeros(shape=[SAMPLES, 23]) 
        t = np.zeros(shape=[SAMPLES, ]) 
    
        for i in range(SAMPLES):
            if maxsize>0:
                batch = random.randint(0,SAMPLES-1)
            else:
                batch = i
            tagsread = index.iloc[batch].tagsread
            readtime = index.iloc[batch].readtime
            numframes = index.iloc[batch].numframes
            framebase = int(index.iloc[batch].frameini) 
            aux = frames.loc[framebase:framebase+numframes-1].values
            X[i,0] = tagsread
            X[i,1] = readtime
            X[i,2] = numframes
            X[i,3:8] = aux.mean(axis=0)
            X[i,8:13] = aux.std(axis=0)
            X[i,13:18] = aux.min(axis=0)
            X[i,18:23] = aux.max(axis=0)
            t[i] = index.iloc[batch].boxoriginal
            #X[batch,3:3+numframes*5] = frames.loc[framebase:framebase+numframes-1].values.ravel()
            if (debug) and ((i+1)%5000)==0:
                print(f'i: {i+1}, batch: {batch}, frames: {numframes}, framebase: {framebase}, t: {t[i]}, aux: {aux}')
        
            t = t.astype(int)
   
        with open(picklefile, 'wb') as f:
            pickle.dump([X, t], f)
    
        if debug: 
            frames.head(-1)
            
    return X,t

In [26]:
import keras
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

import tensorflow as tf
from sklearn.metrics import confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping
from keras.callbacks import TensorBoard

def evalmodel(X,t,id,debug=False):  
    if debug:
        modelname = f'{id}/model.h5'
    X_train, X_test, t_train, t_test =  train_test_split(X, t, test_size=0.8)
    
    standard_transformer = Pipeline(steps=[('standard', StandardScaler())])
    minmax_transformer = Pipeline(steps=[('minmax', MinMaxScaler())])
    preprocessor = ColumnTransformer(
        remainder='passthrough', #passthough features not listed
        transformers=[
            ('std', standard_transformer , []),#slice(0,X.shape[1])),
            ('mm', minmax_transformer , slice(0,X.shape[1]))
        ])
    X_train = preprocessor.fit_transform(X_train)
    X_test = preprocessor.transform(X_test)
    model = keras.models.Sequential([
        keras.layers.Dense(100, activation="relu", input_shape=X_train.shape[1:]),
        keras.layers.Dense(50, activation="tanh"),
        keras.layers.Dense(int(t.max())+1, activation="sigmoid")
    ])
    
    try: 
        model = keras.models.load_model(modelname) # cargar modelo
    except:
        early_stopping_cb = EarlyStopping(patience=20, restore_best_weights=True)
        checkpoint_cb = ModelCheckpoint(modelname, save_best_only=True)
        tensorboard_cb = TensorBoard(f'{id}/log', histogram_freq=1)
        model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
        model.fit(X_train, t_train, epochs=50000
                  , validation_data=(X_test, t_test), batch_size=32, callbacks=[early_stopping_cb, checkpoint_cb, tensorboard_cb], verbose=0)
        keras.models.save_model(modelname)
    
    precision = model.evaluate(X_test,t_test)[1]
    y_test = model.predict_classes(X_test)
    conf = confusion_matrix(list(map(int,t_test)), y_test)
    
    return precision, conf

In [None]:
%tensorboard --logdir 'IMP/log'
X, t = readdataset('IMP')
precision, conf = evalmodel(X,t,'IMP', debug=True)
print(f'ac: {precision}, cm: {conf}')

Launching TensorBoard...

In [17]:
SAMPLES_BATCH = 200
NUM_BATCHES = 100

XX = np.zeros(shape=[SAMPLES_BATCH, 23]) 
tt = np.zeros(shape=[SAMPLES_BATCH, ]) 
print(f'XX.shape: {XX.shape}')

mejora = 0.0
mejora_independiente = 0.0

for batch in range(NUM_BATCHES):
    # batch = random.randint(0,100) 
    for i in range(SAMPLES_BATCH):
        tagsread = index_ajuste.iloc[batch+i].tagsread
        readtime = index_ajuste.iloc[batch+i].readtime
        numframes = index_ajuste.iloc[batch+i].numframes
        framebase = int(index_ajuste.iloc[batch+i].frameini) 
        ha = index_ajuste.iloc[batch+i].ha
        hb = index_ajuste.iloc[batch+i].hb
        d = index_ajuste.iloc[batch+i].d
        aux = frames_ajuste.loc[framebase:framebase+numframes-1].values
        XX[i,0] = tagsread
        XX[i,1] = readtime
        XX[i,2] = numframes
        XX[i,3:8] = aux.mean(axis=0)
        XX[i,8:13] = aux.std(axis=0)
        XX[i,13:18] = aux.min(axis=0)
        XX[i,18:23] = aux.max(axis=0)
        tt[i] = index_ajuste.iloc[batch+i].boxoriginal
    #    if (i+1)%50==0:
    #        print(f'i: {i+1}, batch: {batch}, frames: {numframes}, framebase: {framebase}, tt: {tt[i]}, aux: {aux}, ha: {ha}, hb: {hb}, d: {d}')

    # Vemos resultado con la red anterior sin ajustar. Debe ser malo por no tener ningun dato de esta red
    XX_transform = preprocessor.transform(XX)
    precision_modelo_original = model.evaluate(XX_transform,tt)[1]
    print(f'precision_modelo_original: {precision_modelo_original*100:.02f}%')

    # Probamos usando solo dataset de ajuste. Separamos el conjunto de ajuste en train (100) y test (100)
    XX_train, XX_test, tt_train, tt_test =  train_test_split(XX, tt, test_size=0.5)
    preprocessor_ajuste = ColumnTransformer(
            remainder='passthrough', #passthough features not listed
            transformers=[
                ('std', standard_transformer , []),#slice(0,X.shape[1])),
                ('mm', minmax_transformer , slice(0,X.shape[1]))
            ])
    XX_train = preprocessor_ajuste.fit_transform(XX_train)
    XX_test = preprocessor_ajuste.transform(XX_test)
    model_ajuste = keras.models.Sequential([
        keras.layers.Dense(100, activation="relu", input_shape=X_train.shape[1:]),
        keras.layers.Dense(50, activation="tanh"),
        keras.layers.Dense(int(t.max())+1, activation="sigmoid")
    ])
    model_ajuste.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
    model_ajuste.fit(XX_train, tt_train, epochs=50000,  validation_data=(XX_test, tt_test),  batch_size=32, callbacks=[early_stopping_cb], verbose=0)
    precision_modelo_independiente = model_ajuste.evaluate(XX_test,tt_test)[1]
    print(f'precision_modelo_independiente: {precision_modelo_independiente*100:.02f}%')

    # Ajuste mejorado del modelo original con transfer learning
    XX_train, XX_test, tt_train, tt_test =  train_test_split(preprocessor.transform(XX), tt, test_size=0.5)
    model_copy= keras.models.clone_model(model)
    model_copy.build((None, XX.shape[1])) # replace 10 with number of variables in input layer
    model_copy.set_weights(model.get_weights())
    model_copy.layers[0].trainable = False
    model_copy.layers[1].trainable = False
    model_copy.layers[2].trainable = False
    #for l in model_copy.layers:
    #    print(l.name, l.trainable)
    model_copy.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=["accuracy"])

    model_copy.fit(XX_train, tt_train, epochs=5000,  validation_data=(XX_test, tt_test),  batch_size=32, callbacks=[early_stopping_cb], verbose=0)
    precision_modelo_copia = model_copy.evaluate(XX_test,tt_test)[1]
    print(f'precision_modelo_copia: {precision_modelo_copia*100:.02f}%')

    mejora += (precision_modelo_copia - precision_modelo_original)/NUM_BATCHES
    mejora_independiente += (precision_modelo_copia - precision_modelo_independiente)/NUM_BATCHES
    
    
print(f'Mejora: {mejora*100:.02f}%')
print(f'Mejora independiente: {mejora_independiente*100:.02f}%')

XX.shape: (200, 23)
precision_modelo_original: 24.50%
precision_modelo_independiente: 25.00%
precision_modelo_copia: 33.00%
precision_modelo_original: 24.50%
precision_modelo_independiente: 22.00%
precision_modelo_copia: 23.00%
precision_modelo_original: 24.50%
precision_modelo_independiente: 21.00%
precision_modelo_copia: 25.00%
precision_modelo_original: 24.50%
precision_modelo_independiente: 24.00%
precision_modelo_copia: 33.00%
precision_modelo_original: 24.00%
precision_modelo_independiente: 19.00%
precision_modelo_copia: 29.00%
precision_modelo_original: 24.00%
precision_modelo_independiente: 36.00%
precision_modelo_copia: 27.00%
precision_modelo_original: 23.50%
precision_modelo_independiente: 19.00%
precision_modelo_copia: 34.00%
precision_modelo_original: 23.50%
precision_modelo_independiente: 26.00%
precision_modelo_copia: 22.00%
precision_modelo_original: 23.50%
precision_modelo_independiente: 23.00%
precision_modelo_copia: 27.00%
precision_modelo_original: 23.50%
precision_



precision_modelo_copia: 31.00%
precision_modelo_original: 23.50%
precision_modelo_independiente: 18.00%
precision_modelo_copia: 23.00%
precision_modelo_original: 23.50%
precision_modelo_independiente: 26.00%
precision_modelo_copia: 29.00%
precision_modelo_original: 24.00%
precision_modelo_independiente: 24.00%
precision_modelo_copia: 25.00%
precision_modelo_original: 24.00%
precision_modelo_independiente: 26.00%
precision_modelo_copia: 34.00%
precision_modelo_original: 23.50%
precision_modelo_independiente: 28.00%
precision_modelo_copia: 26.00%
precision_modelo_original: 23.50%
precision_modelo_independiente: 25.00%
precision_modelo_copia: 29.00%
precision_modelo_original: 24.00%
precision_modelo_independiente: 21.00%
precision_modelo_copia: 25.00%
precision_modelo_original: 24.50%
precision_modelo_independiente: 19.00%
precision_modelo_copia: 32.00%
precision_modelo_original: 24.50%
precision_modelo_independiente: 26.00%
precision_modelo_copia: 26.00%
precision_modelo_original: 25.00%

precision_modelo_copia: 30.00%
precision_modelo_original: 28.00%
precision_modelo_independiente: 20.00%
precision_modelo_copia: 30.00%
precision_modelo_original: 27.50%
precision_modelo_independiente: 27.00%
precision_modelo_copia: 27.00%
precision_modelo_original: 27.50%
precision_modelo_independiente: 19.00%
precision_modelo_copia: 24.00%
precision_modelo_original: 27.50%
precision_modelo_independiente: 33.00%
precision_modelo_copia: 33.00%
precision_modelo_original: 27.50%
precision_modelo_independiente: 24.00%
precision_modelo_copia: 26.00%
precision_modelo_original: 27.50%
precision_modelo_independiente: 26.00%
precision_modelo_copia: 29.00%
precision_modelo_original: 27.00%
precision_modelo_independiente: 24.00%
precision_modelo_copia: 32.00%
precision_modelo_original: 27.00%
precision_modelo_independiente: 29.00%
precision_modelo_copia: 31.00%
precision_modelo_original: 26.50%
precision_modelo_independiente: 24.00%
precision_modelo_copia: 32.00%
precision_modelo_original: 27.00%

In [18]:
for i in range(t.max()):
    print(f'Tags en cajas tipo {i}: {index[index.boxoriginal==i].shape[0]/index.shape[0]*100:.2f}')
    print(f'Cajas tipo {i}: {X[t==i].shape[0]/X.shape[0]*100:.2f}')

Tags en cajas tipo 0: 6.66
Cajas tipo 0: 9.09
Tags en cajas tipo 1: 10.02
Cajas tipo 1: 9.09
Tags en cajas tipo 2: 9.99
Cajas tipo 2: 9.09
Tags en cajas tipo 3: 10.00
Cajas tipo 3: 9.09
Tags en cajas tipo 4: 9.99
Cajas tipo 4: 9.09
Tags en cajas tipo 5: 6.66
Cajas tipo 5: 9.09
Tags en cajas tipo 6: 6.66
Cajas tipo 6: 9.09
Tags en cajas tipo 7: 9.98
Cajas tipo 7: 9.09
Tags en cajas tipo 8: 10.01
Cajas tipo 8: 9.09
Tags en cajas tipo 9: 10.01
Cajas tipo 9: 9.09


In [149]:
for i in range(t.max()):
    print(f'Tags en cajas tipo {i}: {index[index.boxoriginal==i].tagsread[:100]}')

Tags en cajas tipo 0: 5       1
31      1
33      1
51      1
76      1
       ..
1355    1
1360    1
1362    1
1363    1
1381    1
Name: tagsread, Length: 100, dtype: int64
Tags en cajas tipo 1: 17     1
30     1
35     1
36     1
59     1
      ..
957    1
969    1
971    1
974    1
990    1
Name: tagsread, Length: 100, dtype: int64
Tags en cajas tipo 2: 9       4
22      4
48      4
49      4
52      4
       ..
1029    4
1043    4
1058    4
1099    4
1103    4
Name: tagsread, Length: 100, dtype: int64
Tags en cajas tipo 3: 15     3
19     3
23     3
26     3
27     3
      ..
921    3
923    3
930    3
963    3
965    3
Name: tagsread, Length: 100, dtype: int64
Tags en cajas tipo 4: 4      3
7      3
21     3
32     3
39     3
      ..
948    3
949    3
955    3
970    3
985    3
Name: tagsread, Length: 100, dtype: int64
Tags en cajas tipo 5: 50      4
71      4
73      4
80      4
103     4
       ..
1392    4
1407    4
1411    4
1414    4
1419    4
Name: tagsread, Length: 100, dt

In [150]:
index[index.boxoriginal==i].head(50)

Unnamed: 0,frameini,framelst,boxnum,boxoriginal,L,W,H,read,tags,tagsread,readtime,despX,despY,theta,vhr,ha,hb,d,numframes
1,2,4,26,9,0.4,0.585,0.6,1,15,15,0.03983,0.040972,0.160102,21.612419,2,1.55,0.55,3.3,3
3,7,9,27,9,0.6,0.585,0.4,1,15,15,0.02296,0.134029,-0.121634,-30.319941,1,1.55,0.55,3.3,3
6,12,13,9,9,0.6,0.4,0.585,1,15,15,0.04277,0.054632,-0.190784,-28.103867,0,1.55,0.55,3.3,2
8,15,16,26,9,0.4,0.585,0.6,1,15,15,0.04277,0.006993,0.043028,36.409745,2,1.55,0.55,3.3,2
11,27,28,9,9,0.6,0.4,0.585,1,15,15,0.04068,-0.164511,-0.032152,-16.813992,2,1.55,0.55,3.3,2
25,49,51,27,9,0.6,0.585,0.4,1,15,15,0.04429,0.059907,-0.114639,12.099951,2,1.55,0.55,3.3,3
29,55,57,9,9,0.6,0.4,0.585,1,15,15,0.04666,0.147422,0.001961,-25.025301,0,1.55,0.55,3.3,3
43,76,79,27,9,0.6,0.585,0.4,1,15,15,0.04999,0.126045,0.138963,-35.906644,2,1.55,0.55,3.3,4
47,84,85,9,9,0.6,0.4,0.585,1,15,15,0.04277,-0.115452,-0.135786,-24.005346,0,1.55,0.55,3.3,2
66,111,121,26,9,0.4,0.585,0.6,0,15,14,0.07052,0.117095,-0.008755,41.212407,2,1.55,0.55,3.3,11
