# Object classification

In [1]:
from sklearn.linear_model import SGDClassifier 
from sklearn.model_selection import train_test_split
import numpy as np
import os
import ast
from glob import glob
import random
import traceback
from tabulate import tabulate
import pickle

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

## Parameters

In [2]:
new_data=True
load_old_params=False
save_params=True

## Utils functions

In [3]:
def translate(name):
    translate_dict={"apple":"mela",
                    "ball":"palla",
                    "bell pepper":"peperone",
                    "binder":"raccoglitore",
                    "bowl":"ciotola",
                    "calculator":"calcolatrice",
                    "camera":"fotocamera",
                    "cell phone":"telefono",
                    "cereal box":"scatola",
                    "coffee mug":"tazza",
                    "comb":"spazzola",
                    "dry battery":"batteria",
                    "flashlight":"torcia",
                    "food box":"scatola",
                    "food can":"lattina",
                    "food cup":"barattolo",
                    "food jar":"barattolo",
                    "garlic":"aglio",
                    "lemon":"limone",
                    "lime":"lime",
                    "onion":"cipolla",
                    "orange":"arancia",
                    "peach":"pesca",
                    "pear":"pera",
                    "potato":"patata",
                    "tomato":"pomodoro",
                    "soda can":"lattina",
                    "marker":"pennarello",
                    "plate":"piatto",
                    "notebook":"quaderno",
                    "keyboard":"tastiera",
                    "glue stick":"colla",
                    "sponge":"spugna",
                    "toothpaste":"dentifricio",
                    "toothbrush":"spazzolino"
                    }
    try:
        return translate_dict[name]
    except:
        return name

def normalize_color(color):
    color_normalized=[]
    for i,f in enumerate(color):
        if i%3==0:
            color_normalized.append(f/256)
        else:
            color_normalized.append((f+128)/256)
    return color_normalized


def sorted_dict(dictionary):
    return {k: v for k, v in sorted(dictionary.items(), key=lambda item: item[1], reverse=True)}            


## Data

In [4]:
obj_dir = "/content/drive/My Drive/Tesi/Code/Object_classification"
obj_dir = "/Users/marco/Google Drive/Tesi/Code/Object_classification"
data_dir = obj_dir+"/Data"
model_filename = obj_dir+"/model.pkl"
exclusion_list=["binder","camera","cell_phone"]
test_folder=["apple_3",
             "ball_6",
             "bell_pepper_1",
             "bowl_3",
             "cereal_box_1",
             "coffe_mug_5",
             "comb_5",
             "dry_battery_5",
             "flashlight_4",
             "food_box_6",
             "food_can_2",
             "food_jar_1",
             "garlic_1",
             "glue_stick_3",
             "keyboard_2",
             "lemon_1",
             "lime_1",
             "marker_6",
             "notebook_2",
             "onion_1",
             "orange_3",
             "pear_4",
             "plate_5",
             "potato_5",
             "soda_can_2",
             "sponge_8",
             "tomato_1",
             "toothbrush_2",
             "toothpaste_2"
             ]
if new_data:
    color_train=[]
    shape_train=[]
    texture_train=[]
    color_test=[]
    shape_test=[]
    texture_test=[]
    y_train=[]
    y_test=[]
    file_list=glob(data_dir+'/**', recursive=True)
    number_of_files=len(file_list)
    for j,filename in enumerate(file_list):
        if os.path.isfile(filename) and filename.endswith(".txt"):
            print("{:.2f}%".format(j*100/number_of_files))
            name=" ".join(filename.split("_")[:-3]).rsplit("/", 1)[1]
            if name in exclusion_list:
                continue
            name=translate(name)
            folder=filename.split("/")[-2]
            with open(filename, "r") as f:
                features=[]
                try:
                    lines=f.readlines()
                    for line in lines:
                        features.append(ast.literal_eval(line))
                    if len(features)==3:        
                        color,shape,texture=features
                        color=normalize_color(color)
                        if folder in test_folder:
                            color_test.append(color)
                            shape_test.append(shape)
                            texture_test.append(texture)
                            y_test.append(name)
                        else:
                            color_train.append(color)
                            shape_train.append(shape)
                            texture_train.append(texture)    
                            y_train.append(name)
                except:
                    print("Error in {}".format(filename))
                    print(lines)
                    continue    
    y_train=np.array(y_train)
    y_test=np.array(y_test)
    color_train=np.array(color_train)
    shape_train=np.array(shape_train)
    texture_train=np.array(texture_train)
    color_test=np.array(color_test)
    shape_test=np.array(shape_test)
    texture_test=np.array(texture_test)
else:
    X_train=np.load(obj_dir+"/input_train.npy")
    X_test=np.load(obj_dir+"/input_test.npy")
    color_train=np.load(obj_dir+"/color_train.npy")
    shape_train=np.load(obj_dir+"/shape_train.npy")
    texture_train=np.load(obj_dir+"/texture_train.npy")
    color_test=np.load(obj_dir+"/color_test.npy")
    shape_test=np.load(obj_dir+"/shape_test.npy")
    texture_test=np.load(obj_dir+"/texture_test.npy")
    y_train=np.load(obj_dir+"/output_train.npy") 
    y_test=np.load(obj_dir+"/output_test.npy")   

0.03%
0.04%
0.05%
0.06%
0.06%
0.07%
0.08%
0.09%
0.10%
0.10%
0.11%
0.12%
0.13%
0.13%
0.14%
0.15%
0.16%
0.17%
0.17%
0.18%
0.19%
0.20%
0.21%
0.21%
0.22%
0.23%
0.24%
0.25%
0.25%
0.26%
0.27%
0.28%
0.29%
0.29%
0.30%
0.31%
0.32%
0.33%
0.34%
0.35%
0.36%
0.36%
0.37%
0.38%
0.39%
0.40%
0.40%
0.41%
0.42%
0.43%
0.44%
0.44%
0.45%
0.46%
0.47%
0.48%
0.48%
0.49%
0.50%
0.51%
0.51%
0.52%
0.53%
0.54%
0.55%
0.55%
0.56%
0.57%
0.58%
0.59%
0.59%
0.60%
0.61%
0.62%
0.63%
0.63%
0.64%
0.65%
0.66%
0.67%
0.67%
0.68%
0.69%
0.70%
0.71%
0.71%
0.72%
0.73%
0.74%
0.74%
0.75%
0.76%
0.77%
0.78%
0.78%
0.79%
0.80%
0.81%
0.82%
0.82%
0.83%
0.84%
0.85%
0.86%
0.86%
0.87%
0.88%
0.89%
0.90%
0.90%
0.91%
0.92%
0.93%
0.93%
0.94%
0.95%
0.96%
0.97%
0.97%
0.98%
0.99%
1.00%
1.01%
1.01%
1.02%
1.03%
1.04%
1.05%
1.05%
1.06%
1.07%
1.08%
1.09%
1.09%
1.10%
1.11%
1.12%
1.12%
1.13%
1.14%
1.15%
1.16%
1.16%
1.17%
1.18%
1.19%
1.20%
1.20%
1.21%
1.22%
1.23%
1.24%
1.24%
1.25%
1.26%
1.27%
1.28%
1.28%
1.29%
1.30%
1.31%
1.32%
1.32%
1.33%
1.34%
1.35%
1.35

In [5]:
if new_data:
    X_train=np.array([np.kron(np.kron(c,s),t) for c,s,t in zip(color_train,shape_train,texture_train)])
    X_test=np.array([np.kron(np.kron(c,s),t) for c,s,t in zip(color_test,shape_test,texture_test)]) 

## Save input data

In [6]:
if new_data and save_params:
    np.save(obj_dir+"/input_train.npy",X_train)
    np.save(obj_dir+"/input_test.npy",X_test)
    np.save(obj_dir+"/color_train.npy",color_train)
    np.save(obj_dir+"/shape_train.npy",shape_train)
    np.save(obj_dir+"/texture_train.npy",texture_train)
    np.save(obj_dir+"/color_test.npy",color_test)
    np.save(obj_dir+"/shape_test.npy",shape_test)
    np.save(obj_dir+"/texture_test.npy",texture_test)
    np.save(obj_dir+"/output_test.npy",y_test)
    np.save(obj_dir+"/output_train.npy",y_train)

## Split dataset

In [7]:
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

## Classifier fitting

In [21]:
if load_old_params:
    with open(model_filename, 'rb') as file:
        clf = pickle.load(file)
else:
    clf = make_pipeline(StandardScaler(), SGDClassifier()) 
    clf.fit(X_train,y_train)

## Saving parameters

In [22]:
if save_params:
    with open(model_filename, 'wb') as file:
        pickle.dump(clf, file)

## Score

In [23]:
'''
# sicuro - insicuro - indeciso_tra_2 - non_ho_capito
#sicuro
if primo>0 and primo-secondo>10
elif primo>0
elif primo-secondo<10 and secondo-terzo>10
else 

def classify_prediction(prediction,tollerance=30):
    sure=[]
    unsure=[]
    dubious=[]
    cannot_answer=[]
    for pred in prediction:
        o,p=pred
        values=list(p.values())
        keys=list(p.keys())
        # sure
        if values[0]>0 and values[1]<-tollerance/3 and values[0]-values[1]>tollerance: 
            sure.append(pred)
        # unsure        
        elif values[0]>-tollerance/10 and (values[0]-values[1]>tollerance/2 or values[1]<0):
            unsure.append(pred)
        # dubious    
        elif (values[0]>-tollerance/5 and values[0]-values[1]<tollerance/5) or (values[0]>0 and values[1]>0 and values[2]<0):
            dubious.append(pred)
        
        # cannot_answer
        else:
            cannot_answer.append(pred)
    return {"sure":sure, "unsure":unsure, "dubious":dubious, "cannot_answer":cannot_answer} 


'''
def classify_prediction(prediction,tollerance=300):
    sure=[]
    unsure=[]
    dubious=[]
    cannot_answer=[]
    for pred in prediction:
        o,p=pred
        values=list(p.values())
        keys=list(p.keys())
        # sure
        if values[0]>0 and values[1]<-tollerance/3 and values[0]-values[1]>tollerance: 
            sure.append(pred)
        # unsure        
        elif values[0]>-tollerance and values[0]-values[1]>tollerance/2:
            unsure.append(pred)
        # dubious    
        elif (values[0]>-tollerance and values[0]-values[1]<tollerance/2) :
            dubious.append(pred)
        # cannot_answer
        else:
            cannot_answer.append(pred)
    return {"sure":sure, "unsure":unsure, "dubious":dubious, "cannot_answer":cannot_answer}               

def calculate_accuracy(category,prediction):
    counter=0
    if category=="dubious":
        for o,p in pred:
            if o in list(p.keys())[0:2]:
                counter+=1
    elif category=="cannot_answer":
        for o,p in pred:
            if o not in list(p.keys())[0:2]:
                counter+=1
    else:
        for o,p in pred:
            if o == list(p.keys())[0]:
                counter+=1                       
    return counter/len(pred)            



In [24]:
label_prob=clf.decision_function(X_test)
pred=[[y_test[j],sorted_dict({clf.classes_[i]:v for i,v in enumerate(row)})] for j,row in enumerate(label_prob)]
#selected=[[o,list(p.values())[0]-list(p.values())[1],", ".join([str(a)+":"+str(round(b,2)) for a,b in list(p.items())[:5]])] for o,p in pred if o in list(p.keys())[2:] ]
pred_classified=classify_prediction(pred)

for l,pred in pred_classified.items():
    print(l.upper())
    print(20*"-")
    selected=[]
    for o,p in pred:
        if l=="dubious" and o not in list(p.keys())[0:2]:
            selected.append([o,list(p.values())[0]-p[o],", ".join([str(a)+":"+str(round(b,2)) for a,b in list(p.items())[:5]])])
        elif l=="cannot_answer" and o in list(p.keys())[0:2]:
            selected.append([o,list(p.values())[0]-p[o],", ".join([str(a)+":"+str(round(b,2)) for a,b in list(p.items())[:5]])])

        elif l=="unsure" and o == list(p.keys())[1]:
            selected.append([o,list(p.values())[0]-p[o],", ".join([str(a)+":"+str(round(b,2)) for a,b in list(p.items())[:5]])])

        elif (l=="sure") and o != list(p.keys())[0]:
            selected.append([o,list(p.values())[0]-p[o],", ".join([str(a)+":"+str(round(b,2)) for a,b in list(p.items())[:5]])])
    print(tabulate(selected, headers=['Original', "Difference",'Predicted']))
    dif=[i[1] for i in selected]
    #print(sum(dif)/len(dif))
    print("Total: {}/{}".format(len(selected),len(X_test)))
    print("Percentage: {:.2f}".format(len(pred)/len(X_test)))
    accuracy=calculate_accuracy(l,pred)
    print("Accuracy: {:.2f}".format(accuracy))      

SURE
--------------------
Original      Difference  Predicted
----------  ------------  -------------------------------------------------------------------------------------
spugna           909.166  cipolla:85.21, palla:-471.22, tazza:-509.7, mela:-774.57, spugna:-823.96
barattolo       1291.68   palla:1087.73, barattolo:-203.95, cipolla:-238.49, tazza:-240.57, telefono:-616.61
ciotola          549.457  piatto:62.65, ciotola:-486.8, cipolla:-674.17, palla:-866.6, spugna:-958.07
ciotola          581.384  piatto:110.24, ciotola:-471.15, cipolla:-506.97, palla:-549.11, spugna:-824.79
quaderno        2100.27   piatto:1048.61, tazza:-340.3, palla:-609.22, mela:-782.71, calcolatrice:-861.51
quaderno        1192.75   piatto:315.96, tazza:-131.04, scatola:-333.21, mela:-771.94, quaderno:-876.79
quaderno        1839.26   piatto:732.13, tazza:-149.81, scatola:-255.81, mela:-643.62, ciotola:-712.65
quaderno        1439.51   piatto:390.67, scatola:-138.19, tazza:-182.52, mela:-719.37, quaderno:-1

## Test

In [25]:
clf.score(X_test,y_test)

0.8346560846560847