# Object classification

In [None]:
from sklearn.linear_model import SGDClassifier 
from sklearn.model_selection import train_test_split
import numpy as np
import os
import ast
from glob import glob
import random
import traceback
from tabulate import tabulate
import pickle

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from matplotlib import pyplot as plt

## Parameters

In [None]:
new_data=True
load_old_params=True
save_params=False
selected_space=True

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Utils functions

In [None]:
def translate(name):
    translate_dict={"apple":"mela",
                    "ball":"palla",
                    "bell pepper":"peperone",
                    "binder":"raccoglitore",
                    "bowl":"ciotola",
                    "calculator":"calcolatrice",
                    "camera":"fotocamera",
                    "cell phone":"telefono",
                    "cereal box":"scatola",
                    "coffee mug":"tazza",
                    "comb":"spazzola",
                    "dry battery":"batteria",
                    "flashlight":"torcia",
                    "food box":"scatola",
                    "food can":"lattina",
                    "food cup":"barattolo",
                    "food jar":"barattolo",
                    "garlic":"aglio",
                    "lemon":"limone",
                    "lime":"lime",
                    "onion":"cipolla",
                    "orange":"arancia",
                    "peach":"pesca",
                    "pear":"pera",
                    "potato":"patata",
                    "tomato":"pomodoro",
                    "soda can":"lattina",
                    "marker":"pennarello",
                    "plate":"piatto",
                    "notebook":"quaderno",
                    "keyboard":"tastiera",
                    "glue stick":"colla",
                    "sponge":"spugna",
                    "toothpaste":"dentifricio",
                    "toothbrush":"spazzolino"
                    }
    try:
        return translate_dict[name]
    except:
        return name

def normalize_color(color):
    return color
    color_normalized=[]
    for i,f in enumerate(color):
        if i%3==0:
            color_normalized.append(f/256)
        else:
            color_normalized.append((f+128)/256)
    return color_normalized


def sort_and_cut_dict(dictionary,limit=3):
    iterator=sorted(dictionary.items(), key=lambda item: item[1], reverse=True)[:limit]
    coef=sum([i[1] for i in iterator])
    return {k: v/coef for k, v in iterator}            


## Data

In [None]:
obj_dir = "/content/drive/My Drive/Tesi/Code/Object_classification"
#obj_dir = "/Users/marco/Google Drive/Tesi/Code/Object_classification"
data_dir = obj_dir+"/Data"
model_filename = obj_dir+"/model.pkl"
exclusion_list=["binder","camera","cell phone","dry battery"]
test_folder=["apple_3",
             "bell_pepper_1",
             "bowl_3",
             "cereal_box_1",
             "coffe_mug_5",
             "comb_5",
             "flashlight_4",
             "food_box_6",
             "food_can_2",
             "garlic_1",
             "glue_stick_3",
             "keyboard_2",
             "lemon_1",
             "lime_1",
             "onion_1",
             "orange_1",
             "pear_4",
             "plate_5",
             "potato_5",
             "soda_can_2",
             "sponge_8",
             "tomato_1",
             "toothbrush_2"
             ]
if new_data:
    color_train=[]
    shape_train=[]
    texture_train=[]
    color_test=[]
    shape_test=[]
    texture_test=[]
    y_train=[]
    y_test=[]
    file_list=glob(data_dir+'/**', recursive=True)
    number_of_files=len(file_list)
    with open(obj_dir+"/dictionary.pickle","rb") as f:
        dictionary=pickle.load(f)
        for j,filename in enumerate(file_list):
            if os.path.isfile(filename) and filename.endswith(".txt"):
                print("{:.2f}%".format(j*100/number_of_files))
                name=" ".join(filename.split("_")[:-3]).rsplit("/", 1)[1]
                if name in exclusion_list:
                    continue
                name=translate(name)
                folder=filename.split("/")[-2]

                if folder not in dictionary.keys():
                    continue
                with open(filename, "r") as f:
                    features=[]
                    try:
                        lines=f.readlines()
                        for line in lines:
                            features.append(ast.literal_eval(line))
                        if len(features)==3:        
                            color,shape,texture=features
                            color=normalize_color(color)
                            if folder in test_folder:
                                color_test.append(color)
                                shape_test.append(shape)
                                texture_test.append(texture)
                                if selected_space:
                                    y_test.append(folder)
                                else:    
                                    y_test.append(name)
                            else:
                                color_train.append(color)
                                shape_train.append(shape)
                                texture_train.append(texture)    
                                if selected_space:
                                    y_train.append(folder)
                                else:    
                                    y_train.append(name)
                    except:
                        print("Error in {}".format(filename))
                        continue    
        y_train=np.array(y_train)
        y_test=np.array(y_test)
        X_train=np.array([np.concatenate((c, s, t), axis=None) for c,s,t in zip(color_train,shape_train,texture_train)])
        X_test=np.array([np.concatenate((c, s, t), axis=None) for c,s,t in zip(color_test,shape_test,texture_test)]) 

        color_train=np.array(color_train)
        shape_train=np.array(shape_train)
        texture_train=np.array(texture_train)
        color_test=np.array(color_test)
        shape_test=np.array(shape_test)
        texture_test=np.array(texture_test)
        X_train=color_train
        X_test=color_test
            
    
else:
    X_train=np.load(obj_dir+"/input_train.npy")
    X_test=np.load(obj_dir+"/input_test.npy")
    color_train=np.load(obj_dir+"/color_train.npy")
    shape_train=np.load(obj_dir+"/shape_train.npy")
    texture_train=np.load(obj_dir+"/texture_train.npy")
    color_test=np.load(obj_dir+"/color_test.npy")
    shape_test=np.load(obj_dir+"/shape_test.npy")
    texture_test=np.load(obj_dir+"/texture_test.npy")
    y_train=np.load(obj_dir+"/output_train.npy") 
    y_test=np.load(obj_dir+"/output_test.npy")   

0.07%
0.07%
0.08%
0.09%
0.10%
0.10%
0.11%
0.12%
0.13%
0.13%
0.14%
0.15%
0.16%
0.16%
0.17%
0.18%
0.19%
0.19%
0.20%
0.21%
0.22%
0.22%
0.23%
0.24%
0.25%
0.25%
0.26%
0.27%
0.27%
0.28%
0.29%
0.30%
0.30%
0.31%
0.32%
0.33%
0.33%
0.34%
0.35%
0.36%
0.36%
0.37%
0.38%
0.39%
0.39%
0.40%
0.41%
0.42%
0.42%
0.43%
0.44%
0.45%
0.45%
0.46%
0.47%
0.48%
0.48%
0.49%
0.51%
0.51%
0.52%
0.53%
0.53%
0.54%
0.55%
0.56%
0.56%
0.57%
0.58%
0.59%
0.59%
0.60%
0.61%
0.62%
0.62%
0.63%
0.64%
0.65%
0.65%
0.66%
0.67%
0.68%
0.68%
0.69%
0.70%
0.71%
0.71%
0.72%
0.73%
0.74%
0.74%
0.75%
0.76%
0.77%
0.77%
0.78%
0.79%
0.79%
0.80%
0.81%
0.82%
0.82%
0.83%
0.84%
0.85%
0.85%
0.86%
0.87%
0.88%
0.88%
0.89%
0.90%
0.91%
0.91%
0.92%
0.94%
0.94%
0.95%
0.96%
0.97%
0.97%
0.98%
0.99%
1.00%
1.00%
1.01%
1.02%
1.03%
1.03%
1.04%
1.05%
1.05%
1.06%
1.07%
1.08%
1.08%
1.09%
1.10%
1.11%
1.11%
1.12%
1.13%
1.14%
1.14%
1.15%
1.16%
1.17%
1.17%
1.18%
1.19%
1.20%
1.20%
1.21%
1.22%
1.23%
1.23%
1.24%
1.25%
1.26%
1.26%
1.27%
1.28%
1.29%
1.29%
1.30%
1.31%
1.31

## Save input data

In [None]:
if selected_space:
    new_y_train=[]    
    for i in y_train:
        new_label=dictionary[i][1]
        #new_label=new_label.split("-")[0]
        new_y_train.append(new_label)
    new_y_test=[]    
    for i in y_test:
        new_label=dictionary[i][1]
        #new_label=new_label.split("-")[0]
        new_y_test.append(new_label)
    y_train=np.array(new_y_train)
    y_test=np.array(new_y_test)

In [None]:
if new_data and save_params:
    np.save(obj_dir+"/input_train.npy",X_train)
    np.save(obj_dir+"/input_test.npy",X_test)
    np.save(obj_dir+"/color_train.npy",color_train)
    np.save(obj_dir+"/shape_train.npy",shape_train)
    np.save(obj_dir+"/texture_train.npy",texture_train)
    np.save(obj_dir+"/color_test.npy",color_test)
    np.save(obj_dir+"/shape_test.npy",shape_test)
    np.save(obj_dir+"/texture_test.npy",texture_test)
    np.save(obj_dir+"/output_test.npy",y_test)
    np.save(obj_dir+"/output_train.npy",y_train)

## Classifier fitting

In [None]:
if load_old_params and False:
    with open(model_filename, 'rb') as file:
        clf = pickle.load(file)
else:
    clf = RandomForestClassifier(n_jobs=-1, n_estimators=30)
    clf.fit(X_train,y_train)
    print(clf.score(X_test,y_test))

## Saving parameters

In [None]:
if save_params:
    with open(model_filename, 'wb') as file:
        pickle.dump(clf, file)

## Score

In [None]:
def classify_prediction(prediction):
    sure=[]
    unsure=[]
    dubious=[]
    cannot_answer=[]
    for pred in prediction:
        o,p=pred
        values=list(p.values())
        keys=list(p.keys())
        # sure
        if values[0]>0.8: 
            sure.append(pred)
        # unsure        
        elif values[0]>0.6:
            unsure.append(pred)
        # dubious    
        elif values[0]>0.4:
            dubious.append(pred)
        # cannot_answer
        else:
            cannot_answer.append(pred)
    return {"sure":sure, "unsure":unsure, "dubious":dubious, "cannot_answer":cannot_answer}               

def calculate_accuracy(category,prediction):
    counter=0
    if category=="dubious":
        for o,p in pred:
            if o in list(p.keys())[0:2]:
                counter+=1
    elif category=="cannot_answer":
        for o,p in pred:
            if o not in list(p.keys())[0:2]:
                counter+=1
    else:
        for o,p in pred:
            if o.split("-")[0] in list(p.keys())[0]:
                counter+=1                       
    return counter/len(pred)            



In [None]:
label_prob=clf.predict_proba(X_test)
pred=[[y_test[j],sort_and_cut_dict({clf.classes_[i]:v for i,v in enumerate(row)})] for j,row in enumerate(label_prob)]
pred_classified=classify_prediction(pred)
print("TOTAL TEST: {}".format(len(pred)))
for l,pred in pred_classified.items():
    print(l.upper())
    print(40*"-")
    selected=[]
    for o,p in pred:
        if l=="dubious" and o not in list(p.keys())[0:2]:
            selected.append([o,", ".join([str(a)+":"+str(round(b,2)) for a,b in list(p.items())])])
        elif l=="cannot_answer" and o in list(p.keys())[0:2]:
            selected.append([o,", ".join([str(a)+":"+str(round(b,2)) for a,b in list(p.items())])])

        elif l=="unsure" and o.split("-")[0] not in list(p.keys())[0]:
            selected.append([o,", ".join([str(a)+":"+str(round(b,2)) for a,b in list(p.items())])])

        elif (l=="sure") and o != list(p.keys())[0]:
            selected.append([o,", ".join([str(a)+":"+str(round(b,2)) for a,b in list(p.items())])])
    print(tabulate(selected, headers=['Original','Predicted']))
    print("Not correct: {}/{} - {:.2f}%".format(len(selected),len(pred),len(selected)*100/len(pred)))
    accuracy=calculate_accuracy(l,pred)
    print("Accuracy: {:.2f}".format(accuracy))    

## Test

In [None]:
clf.score(X_test,y_test)

In [None]:
plt.plot(clf.feature_importances_)

In [None]:
clf.feature_importances_

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import pandas as pd
def classification_report(y_true, y_pred):
    print(f"Accuracy: {accuracy_score(y_true, y_pred)}.")
    print(f"Precision: {precision_score(y_true, y_pred, average='weighted', zero_division=True)}.")
    print(f"Recall: {recall_score(y_true, y_pred, average='weighted')}.")
    print(f"F1-Score: {f1_score(y_true, y_pred, average='weighted')}.")

    print("\nSuddivisione per Classe")

    matrix = confusion_matrix(y_true, y_pred)
    # i falsi positivi si trovano sommando le colonne ed eliminando l'elemento diagonale (che rappresenta i veri positivi)
    FP = matrix.sum(axis=0) - np.diag(matrix)  
    # i falsi negativi invece si individuano sommando le righe
    FN = matrix.sum(axis=1) - np.diag(matrix)
    TP = np.diag(matrix)
    TN = matrix.sum() - (FP + FN + TP)
    class_names = np.unique(y_true)
    metrics_per_class = {}
    class_accuracies = (TP+TN)/(TP+TN+FP+FN)
    class_precisions = TP/(TP+FP)
    class_recalls = TP/(TP+FN)
    class_f1_scores = (2 * class_precisions * class_recalls) / (class_precisions + class_recalls)
    i=0

    for name in class_names:
        metrics_per_class[name] = [class_accuracies.tolist().pop(i), class_precisions.tolist().pop(i), class_recalls.tolist().pop(i), class_f1_scores.tolist().pop(i), FP.tolist().pop(i), FN.tolist().pop(i)]
        i += 1

    result = pd.DataFrame(metrics_per_class, index=["Accuracy", "Precision", "Recall", "F1 Score", "FP", "FN"]).transpose() 

    print(result, end="\n\n")
    return metrics_per_class

In [None]:
#from sklearn.metrics import classification_report
y_true=y_test
y_pred=clf.predict(X_test)
d=classification_report(y_true, y_pred)
exclusion_list=["batteria","ciotola","piatto","cipolla","pomodoro"]
for k in exclusion_list:
    del d[k]

In [None]:
data=[]
labels = []
for k,v in d.items():
    data.append([k]+v[:4])
    labels.append(k)
data=np.array(data)
colors = ['red','yellow','blue','green']
df = pd.DataFrame(data.T, index=["Label","Accuracy", "Precision", "Recall", "F1 Score"]).transpose()
#df=df.set_index("Label")
df[["Accuracy", "Precision", "Recall", "F1 Score"]]=df[["Accuracy", "Precision", "Recall", "F1 Score"]].apply(pd.to_numeric) 
ax = df.plot(x="Label", y=["Accuracy", "Precision", "Recall", "F1 Score"], kind="barh",figsize=(15,15))


plt.show()
