# For Image Retrieval

In [1]:
### For DataFrame
import pandas as pd
import numpy as np

##### For Query Processing 
import nltk
from nltk.corpus import stopwords
import re
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
from nltk.stem import PorterStemmer

### For plotting images
from PIL import Image, ImageDraw, ImageFont
from IPython.display import display
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from os import listdir
from PIL import Image as PImage 
import os 
%matplotlib inline

pd.set_option('display.max_rows', None)  # or 1000

In [2]:
tagged_data_df = pd.read_pickle("tagged_data/tag_data.pkl")
categorical_image_df = pd.read_csv("tagged_data/category_wise_image.csv")

In [3]:
tagged_data_df

Unnamed: 0,image_name,objects,category,repeated_objects,repeated_objects_coordinates,model_size
0,000000000139.jpg,"[person, vase, chair, tvmonitor]","[person, gadget, furniture, household]","[person, person, person, chair, chair, tvmonit...",[{0: [[283.42548 164.44724 303.92645 ...,"(416, 416)"
1,000000000285.jpg,[bear],[animal],[bear],"[{0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6:...",
2,000000000632.jpg,"[book, bottle, chair, bed, pottedplant, tvmoni...","[gadget, furniture, other, household]","[bottle, chair, pottedplant, pottedplant, bed,...","[{0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6:...",
3,000000000724.jpg,[stop sign],[transport],"[stop sign, stop sign]","[{0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6:...",
4,000000000776.jpg,[teddy bear],[other],"[teddy bear, teddy bear]","[{0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6:...",
5,000000000785.jpg,"[person, skis]","[person, sports]","[person, skis]",[{0: [[184.56575 45.949722 318.38983 ...,
6,000000000802.jpg,"[oven, refrigerator]",[household],"[oven, refrigerator]","[{0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6:...",
7,000000000872.jpg,"[person, baseball glove]","[person, sports]","[person, person, baseball glove]",[{0: [[104.84656 74.73595 252.87292 ...,
8,000000000885.jpg,"[person, tennis racket]","[person, sports]","[person, person, person, person, person, perso...",[{0: [[179.9573 174.67824 276.92285 ...,
9,000000001000.jpg,"[person, handbag, tennis racket]","[person, sports, wearable]","[person, person, person, person, person, perso...",[{0: [[172.15987 92.2438 230.15309 342...,


In [4]:
category_df = pd.read_csv("tagged_data/category.csv")

In [5]:
number_of_objects = 0

In [6]:
objects_to_detect = []

### Query Processing 

In [7]:
def query_processing(sentence):
    # #sentence = input()
    # sentence = 'Fetch the images of persons and handbags'
    sentence = sentence.lower()
    words = nltk.word_tokenize(sentence)

    lemmatizer_words = []
    lemmatizer = WordNetLemmatizer()

    for word in words:
        lemmatizer_words.append(lemmatizer.lemmatize(word, wordnet.VERB))

    ## Stop words Removal
    stop_words = set(stopwords.words("english"))
    refined_words_of_lemmatizer = [word for word in lemmatizer_words if word not in stop_words]
    refined_words_of_lemmatizer = ' '.join(refined_words_of_lemmatizer)

    ## To remove puntuation marks
    final_words_lemmatizer = re.sub(r"[\W]", ' ', refined_words_of_lemmatizer)
    final_words_lemmatizer=final_words_lemmatizer.split()
    
    return final_words_lemmatizer

In [8]:
def stemmer(final_words_lemmatizer):
    stemmer_words= []
    stemmer = PorterStemmer()
    
    exceptional_words = ['bus', 'aeroplane']
    
    for word in final_words_lemmatizer:
        if word in exceptional_words:
            stemmer_words.append(word)
            continue
            
        else:
            if word.endswith('s'):
                stemmer_words.append(stemmer.stem(word))
            else:
                stemmer_words.append(word)
            
    
    return stemmer_words

In [9]:
def custom_words_removal(object_list):

    words_to_remove = ['image', 'picture',' many', 'fetch', 'get', 'locations', 'location', 'show', 'present', 'many', 'database']

    object_list = [ i for i in object_list if i not in words_to_remove ]
    

    return object_list

In [10]:
def generate_counter(df, objects_to_detect, if_repeated_objects=False):
    
    if if_repeated_objects:
        objects_list = df.repeated_objects.values
        
    else:
        objects_list = df.objects.values

    counter = list()
    for objects in objects_list:
        c = 0
        for i in objects_to_detect:
            c += objects.count(i)

        counter.append(c)
        
    return counter

In [11]:
def generate_relevance_score(df, number_of_objects, not_type=False, how_many=False):
    
    max_score = max(df.counter)
    
    counter = df.counter.values
    
    if how_many: 
        relevance_score = [i/max_score for i in counter]
    
    elif not_type:
        relevance_score = [1 - (i/number_of_objects) for i in counter]
        
    else:
        relevance_score = [i/number_of_objects for i in counter]
    
    return relevance_score

In [12]:
def relevance_score_sorting(df, number_of_objects, sentence, how_many= False):
    splitted_sentence = sentence.split()

    max_score = max(tagged_data_df.counter)
    
    if how_many:
        df['relevance_score'] = generate_relevance_score(df, number_of_objects, False, True)
        ranked_df = df.sort_values('relevance_score', ascending = False)
        
        return ranked_df
        
    
    elif 'not' in splitted_sentence:
        if len(df[df.counter == 0]):
            df['relevance_score'] = generate_relevance_score(df, number_of_objects, True)
            ranked_df = df.sort_values('relevance_score', ascending = False)
            return ranked_df

        else:
            return False
    
    else:
        df['relevance_score'] = generate_relevance_score(df, number_of_objects)
        ranked_df = df.sort_values('relevance_score', ascending = False)
        
        return ranked_df

In [13]:
def object_in_db(df):
    if df['counter'].any():
        return True

    else:
        return False

In [14]:
def show_images(PATH):

    # This is to get the directory that the program 
    # is currently running in. 
    dir_path = os.path.dirname(os.path.realpath(PATH)) 
    dir_path += "\\val2017\\"

    # for root, dirs, files in os.walk(dir_path): 
    #     for file in files: 

    for img in img_names:
        PATH = dir_path + img

        p = PATH
        print(p)
        image = mpimg.imread(p) # images are color images
        plt.gca().clear()
        plt.imshow(image);
        display.display(plt.gcf())

In [15]:
def load_class_names(file_name):
    """Returns a list of class names read from `file_name`."""
    with open(file_name, 'r') as f:
        class_names = f.read().splitlines()
    return class_names

In [16]:
def class_id_to_detect(img_names, object_list, ranked_df, category_df):
    boxes_dicts = list()
    class_id_to_detect = list()

    for img in img_names:
        boxes_dicts.append(ranked_df[ranked_df.image_name == img].repeated_objects_coordinates.values[0][0])


    for objects in object_list:
        if objects in class_names:
            class_id_to_detect.append(category_df[category_df.class_name == objects].index.values[0])

    return [class_id_to_detect, boxes_dicts]

In [17]:
def objects_frequency(obj, ranked_df):

    img_names = []
    repeated_object_len = []

    for i in range(ranked_df.shape[0]):
        for objects in obj:
            if objects in ranked_df.iloc[i][1]:
                if ranked_df.iloc[i][0] not in img_names:
                    img_names.append(ranked_df.iloc[i][0])
                    repeated_object_len.append(len(ranked_df[ranked_df.image_name == ranked_df.iloc[i][0]].repeated_objects.values[0]))
    
    return [img_names, repeated_object_len]


In [18]:
def bounding_box(ranked_df, img_names, boxes_dicts, repeated_object_len, class_id_to_detect, class_names, model_size):
    
#     model_size = ranked_df.iloc[0]['model_size']
    

    for num, img_name, boxes_dict, objects_len in zip(range(len(img_names)), img_names,
                                             boxes_dicts, repeated_object_len):
            img = Image.open('val2017/'+img_name)
            draw = ImageDraw.Draw(img)
            font = ImageFont.truetype(font='files/futur.ttf',
                                      size=(img.size[0] + img.size[1]) // 150)
            resize_factor = \
                (img.size[0] / model_size[0], img.size[1] / model_size[1])

            for cls in class_id_to_detect:
                boxes = boxes_dict[cls]
                if np.size(boxes) != 0:
                    color = np.random.permutation([np.random.randint(256), 255, 0])
                    for box in boxes:

                        xy, confidence = box[:4], box[4]
                        xy = [xy[i] * resize_factor[i % 2] for i in range(4)]
                        x0, y0 = xy[0], xy[1]
                        thickness = (img.size[0] + img.size[1]) // 200
                        for t in np.linspace(0, 1, thickness//4):
                            xy[0], xy[1] = xy[0] + t, xy[1] + t
                            xy[2], xy[3] = xy[2] - t, xy[3] - t
                            draw.rectangle(xy, outline=tuple(color))
                        text = '{}'.format(class_names[cls])

                        text_size = draw.textsize(text, font=font)
                        draw.rectangle([x0, y0 - text_size[1], x0 + text_size[0], y0], fill=tuple(color))
                        draw.text((x0, y0 - text_size[1]), text, fill='black', font=font)



            display(img)

In [19]:
def check_query_type(sentence):
    image_retrieval = False
    is_there_type = False
    location_type = False
    splitted_sentence = sentence.split()

    if sentence[:2] == 'is' or sentence[:2] == 'if':
        return 1
    
    if splitted_sentence[0] == 'how' and splitted_sentence[1] == 'many' and (sentence.find('where') > 0 or sentence.find('location') > 0):
        return 2
        
    if splitted_sentence[0] == 'how' and splitted_sentence[1] == 'many':
        return 3
    
    if 'where' in splitted_sentence or sentence.find('location') > 0:
        return 4

    if sentence.find('images') > 0 or sentence.find('pictures') > 0 or sentence.find('image') > 0 or sentence.find('picture') > 0:
        return 5

    else:
        return False



In [20]:
def yes_no_type(df,number_of_objects, sentence):
    
    ranked_df = relevance_score_sorting(df, number_of_objects, sentence)
    
    return ranked_df

In [21]:
def VQA(sentence, tagged_data_df, class_names, PATH):
    print()

    model_size = tagged_data_df.iloc[0].model_size
    
    sentence = sentence.lower()

    splitted_sentence = sentence.split()

    query_result = check_query_type(sentence)


    if query_result:

        objects_to_detect = stemmer(custom_words_removal(query_processing(sentence)))
        #print(objects_to_detect)
        number_of_objects = len(objects_to_detect)

        tagged_data_df['counter'] = generate_counter(tagged_data_df, objects_to_detect)
        
        if object_in_db(tagged_data_df):

            if query_result == 1:
                ranked_df = yes_no_type(tagged_data_df,number_of_objects, sentence)

                if ranked_df.iloc[0].relevance_score:
                    if 'and' in splitted_sentence and ranked_df.iloc[0].relevance_score == 1:
                        print("Yes in the following image:")
                        print(ranked_df.iloc[0]['image_name'][:1])

                    elif 'not' in splitted_sentence and ranked_df.iloc[0].relevance_score == 1:
                        print("Yes in the following image:")
                        for img_name in ranked_df[ranked_df.relevance_score != 0]['image_name'].values:
                            print(img_name)

                    else:
                        if ranked_df.iloc[0].relevance_score == 1:
                            print("Yes in the following image:")
                            print()
                            for img_name in ranked_df[ranked_df.relevance_score != 0]['image_name'].values:
                                print(img_name)
                        
                        else:
                            print('Sorry, there is no image with all the objects present in it. Only images with either of the objects present in it:')
                            print()
                            for img_name in ranked_df[ranked_df.relevance_score != 0]['image_name'].values:
                                print(img_name)

                else:  
                    print("No image could be found.")

            if query_result == 2:
                tagged_data_df['counter'] = generate_counter(tagged_data_df, objects_to_detect, True)
                ranked_df = relevance_score_sorting(tagged_data_df, number_of_objects, sentence, True)

                if ranked_df.iloc[0].relevance_score:
                    if 'or' in splitted_sentence and 'and' in splitted_sentence:
                        if splitted_sentence.index('or') < splitted_sentence.index('and'):
                            fetched_df = ranked_df[ranked_df.relevance_score!=0]
                            print("%s\t\t\t%s" %("Image Name", "Total count"))
                            for img, i in zip(fetched_df['image_name'].values,  fetched_df.counter.values):
                                print("%s\t\t\t%d" %(img, i))
                                
                            image_names, repeated_object_len = objects_frequency(objects_to_detect, ranked_df[ranked_df.relevance_score == 1])
                            class_ids, boxes_dict= class_id_to_detect(image_names, objects_to_detect, ranked_df[ranked_df.relevance_score == 1], category_df)
                            bounding_box(ranked_df[ranked_df.relevance_score == 1], image_names, boxes_dict, repeated_object_len, class_ids, class_names, model_size)
                            quit()

                    if 'and' in splitted_sentence and ranked_df.iloc[0].relevance_score == 1 and len(objects_to_detect) > 1:
                        fetched_df = ranked_df[ranked_df.relevance_score==1]
                        print("%s\t\t\t%s" %("Image Name", "Total count"))
                        for img, i in zip(fetched_df['image_name'].values,  fetched_df.counter.values):
                            print("%s\t\t\t%d" %(img, i))
                            
                        image_names, repeated_object_len = objects_frequency(objects_to_detect, ranked_df[ranked_df.relevance_score == 1])
                        class_ids, boxes_dict= class_id_to_detect(image_names, objects_to_detect, ranked_df[ranked_df.relevance_score == 1], category_df)
                        bounding_box(ranked_df[ranked_df.relevance_score == 1], image_names, boxes_dict, repeated_object_len, class_ids, class_names, model_size)



                    elif 'and' in splitted_sentence and ranked_df.iloc[0].relevance_score != 1:
                        print('Sorry, there is no image with all the objects present in it. Only images with either of the objects present in it:')
                        print()
                        for img_name in ranked_df[ranked_df.relevance_score != 0]['image_name'].values:
                            print(img_name)
                        
                        image_names, repeated_object_len = objects_frequency(objects_to_detect, ranked_df[ranked_df.relevance_score == 1])
                        class_ids, boxes_dict= class_id_to_detect(image_names, objects_to_detect, ranked_df[ranked_df.relevance_score == 1], category_df)
                        bounding_box(ranked_df[ranked_df.relevance_score == 1], image_names, boxes_dict, repeated_object_len, class_ids, class_names, model_size)


                    else:
                        #print("else")
                        fetched_df = ranked_df[ranked_df.relevance_score!=0]
                        print("%s\t\t\t%s" %("Image Name", "Total count"))
                        for img, i in zip(fetched_df['image_name'].values,  fetched_df.counter.values):
                            print("%s\t\t\t%d" %(img, i))
                            
                        image_names, repeated_object_len = objects_frequency(objects_to_detect, ranked_df[ranked_df.relevance_score != 0])
                        class_ids, boxes_dict= class_id_to_detect(image_names, objects_to_detect, ranked_df[ranked_df.relevance_score != 0], category_df)
                        bounding_box(ranked_df[ranked_df.relevance_score != 0], image_names, boxes_dict, repeated_object_len, class_ids, class_names, model_size)


                else:  
                    print("No image could be found.")

            if query_result == 3:
                tagged_data_df['counter'] = generate_counter(tagged_data_df, objects_to_detect, True)
                ranked_df = relevance_score_sorting(tagged_data_df, number_of_objects, sentence, True)

                if ranked_df.iloc[0].relevance_score:
                    if 'and' in splitted_sentence and ranked_df.iloc[0].relevance_score == 1:
                        fetched_df = ranked_df[ranked_df.relevance_score==1]
                        print("%s\t\t\t%s" %("Image Name", "Total count"))
                        for img, i in zip(fetched_df['image_name'].values,  fetched_df.counter.values):
                            print("%s\t\t\t%d" %(img, i))


                    elif 'and' in splitted_sentence and ranked_df.iloc[0].relevance_score != 1:
                        print('Sorry, there is no image with all the objects present in it. Only images with either of the objects present in it:')
                        print()
                        for img_name in ranked_df[ranked_df.relevance_score != 0]['image_name'].values:
                            print(img_name)

                    else:
                        fetched_df = ranked_df[ranked_df.relevance_score!=0]
                        fetched_df = fetched_df.rename(columns={"counter": "Total count"})
                        blankIndex=[''] * len(fetched_df)
                        fetched_df.index=blankIndex
                        display(fetched_df[["image_name", "Total count"]])
                else:  
                    print("No image could be found.")




            if query_result == 4:
                ranked_df = relevance_score_sorting(tagged_data_df, number_of_objects, sentence)

                if ranked_df.iloc[0].relevance_score:
                    if 'or' in splitted_sentence and 'and' in splitted_sentence:
                        if splitted_sentence.index('or') < splitted_sentence.index('and'):
                            print("Yes in the following image:")
                            for img_name in ranked_df[ranked_df.relevance_score != 0]['image_name'].values:
                                print(img_name)

                            image_names, repeated_object_len = objects_frequency(objects_to_detect, ranked_df[ranked_df.relevance_score == 1])
                            class_ids, boxes_dict= class_id_to_detect(image_names, objects_to_detect, ranked_df[ranked_df.relevance_score == 1], category_df)
                            bounding_box(ranked_df[ranked_df.relevance_score != 0], image_names, boxes_dict, repeated_object_len, class_ids, class_names, model_size)
                            quit()

                    if 'and' in splitted_sentence and ranked_df.iloc[0].relevance_score == 1:
                        print("Yes in the following image:")
                        image_names, repeated_object_len = objects_frequency(objects_to_detect, ranked_df[ranked_df.relevance_score == 1])
                        class_ids, boxes_dict= class_id_to_detect(image_names, objects_to_detect, ranked_df[ranked_df.relevance_score == 1], category_df)
                        bounding_box(ranked_df[ranked_df.relevance_score == 1], image_names, boxes_dict, repeated_object_len, class_ids, class_names, model_size)

                    elif 'and' in splitted_sentence and ranked_df.iloc[0].relevance_score != 1:
                        print('Sorry, there is no image with all the objects present in it. Only images with either of the objects present in it:')
                        image_names, repeated_object_len = objects_frequency(objects_to_detect, ranked_df[ranked_df.relevance_score >0])
                        class_ids, boxes_dict= class_id_to_detect(image_names, objects_to_detect, ranked_df[ranked_df.relevance_score >0], category_df)
                        bounding_box(ranked_df[ranked_df.relevance_score > 0], image_names, boxes_dict, repeated_object_len, class_ids, class_names, model_size)

                    else:
                        print("Yes in the following image:")
                        image_names, repeated_object_len = objects_frequency(objects_to_detect, ranked_df[ranked_df.relevance_score == 1])
                        class_ids, boxes_dict= class_id_to_detect(image_names, objects_to_detect, ranked_df[ranked_df.relevance_score == 1], category_df)
                        bounding_box(ranked_df[ranked_df.relevance_score != 0], image_names, boxes_dict, repeated_object_len, class_ids, class_names, model_size)

                else:  
                    print("No image could be found.")


            if query_result == 5:
                ranked_df = relevance_score_sorting(tagged_data_df, number_of_objects, sentence)
               
                if 'not' in splitted_sentence and ranked_df.iloc[0].relevance_score == 1:
                        print("Following are the relevant images:")
                        for img_name in ranked_df[ranked_df.relevance_score != 0]['image_name'].values:
                            print(img_name)

                elif ranked_df.iloc[0].relevance_score:
                    if 'and' in splitted_sentence and ranked_df.iloc[0].relevance_score == 1:
                        print("Following are the relevant images:")
                        for img in ranked_df[ranked_df.relevance_score==1]['image_name'].values:
                            print(img)

                    elif 'and' in splitted_sentence and ranked_df.iloc[0].relevance_score != 1:
                        print('Sorry, there is no image with all the objects present in it. Only images with either of the objects present in it:')
                        print()
                        for img_name in ranked_df[ranked_df.relevance_score != 0]['image_name'].values:

                            print(img_name)

                    else:
                        print("Following are the relevant images:")
                        for img_name in ranked_df[ranked_df.relevance_score != 0]['image_name'].values:
                            print(img_name)

                else: 
                    print("No image could be found.")
        
        else:
            print("'",objects_to_detect[0],"'","can't be found anywhere in the database.")
        
        
    else:
        print('Sorry!! The query could not be processed. RETRY!!!!')
        
#     display(ranked_df)

In [22]:
PATH = 'val2017'

class_names = load_class_names('files/coco.names')

if __name__ == "__main__":
    sentence = input("Enter your query: ")
    VQA(sentence, tagged_data_df, class_names, PATH)

Enter your query: fetch images having persons.

Following are the relevant images:
000000000139.jpg
000000006894.jpg
000000002261.jpg
bike.jpg
bike2.jpg
bike3.jpg
city.jpg
mobile.jpg
000000007108.jpg
000000002006.jpg
000000008844.jpg
000000012576.jpg
000000014473.jpg
000000017207.jpg
000000021903.jpg
000000027696.jpg
000000002153.jpg
handbag.jpg
000000000785.jpg
000000001584.jpg
000000000872.jpg
000000000885.jpg
000000001000.jpg
000000001490.jpg
000000001353.jpg
000000001296.jpg
000000001268.jpg


In [None]:
len(tagged_data_df.index)

In [33]:
tagged_data_df

Unnamed: 0,image_name,objects,category,repeated_objects,repeated_objects_coordinates,model_size
0,000000000139.jpg,"[person, vase, chair, tvmonitor]","[person, gadget, furniture, household]","[person, person, person, chair, chair, tvmonitor, tvmonitor, vase]","[{0: [[283.42548 164.44724 303.92645 293.98764 0.77991056], [268.69693 159.8794 301.80533 298.88007 0.70930517], [246.89917 168.93462 262.7517 209.75203 0.6187554]], 1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 11: [], 12: [], 13: [], 14: [], 15: [], 16: [], 17: [], 18: [], 19: [], 20: [], 21: [], 22: [], 23: [], 24: [], 25: [], 26: [], 27: [], 28: [], 29: [], 30: [], 31: [], 32: [], 33: [], 34: [], 35: [], 36: [], 37: [], 38: [], 39: [], 40: [], 41: [], 42: [], 43: [], 44: [], 45: [], 46: [], 47: [], 48: [], 49: [], 50: [], 51: [], 52: [], 53: [], 54: [], 55: [], 56: [[189.27026 211.48619 234.3088 312.7315 0.9971607], [239.48274 214.35828 284.16626 309.26532 0.9101394]], 57: [], 58: [], 59: [], 60: [], 61: [], 62: [[ 4.2363167 165.20781 98.98071 257.58936 0.9987388], [362.8981 202.95123 418.17136 287.88525 0.9276264]], 63: [], 64: [], 65: [], 66: [], 67: [], 68: [], 69: [], 70: [], 71: [], 72: [], 73: [], 74: [], 75: [[359.3077 295.21106 380.45486 391.32947 0.8258967]], 76: [], 77: [], 78: [], 79: []}]","(416, 416)"
1,000000000285.jpg,[bear],[animal],[bear],"[{0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 11: [], 12: [], 13: [], 14: [], 15: [], 16: [], 17: [], 18: [], 19: [], 20: [], 21: [[ 14.954834 36.614822 392.6891 400.388 0.99640614]], 22: [], 23: [], 24: [], 25: [], 26: [], 27: [], 28: [], 29: [], 30: [], 31: [], 32: [], 33: [], 34: [], 35: [], 36: [], 37: [], 38: [], 39: [], 40: [], 41: [], 42: [], 43: [], 44: [], 45: [], 46: [], 47: [], 48: [], 49: [], 50: [], 51: [], 52: [], 53: [], 54: [], 55: [], 56: [], 57: [], 58: [], 59: [], 60: [], 61: [], 62: [], 63: [], 64: [], 65: [], 66: [], 67: [], 68: [], 69: [], 70: [], 71: [], 72: [], 73: [], 74: [], 75: [], 76: [], 77: [], 78: [], 79: []}]",
2,000000000632.jpg,"[book, bottle, chair, bed, pottedplant, tvmonitor]","[gadget, furniture, other, household]","[bottle, chair, pottedplant, pottedplant, bed, tvmonitor, book, book, book, book, book, book, book, book, book, book]","[{0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 11: [], 12: [], 13: [], 14: [], 15: [], 16: [], 17: [], 18: [], 19: [], 20: [], 21: [], 22: [], 23: [], 24: [], 25: [], 26: [], 27: [], 28: [], 29: [], 30: [], 31: [], 32: [], 33: [], 34: [], 35: [], 36: [], 37: [], 38: [], 39: [[ 62.316753 163.09392 73.76775 201.23988 0.732369]], 40: [], 41: [], 42: [], 43: [], 44: [], 45: [], 46: [], 47: [], 48: [], 49: [], 50: [], 51: [], 52: [], 53: [], 54: [], 55: [], 56: [[158.6972 199.92618 220.9643 272.7572 0.9949126]], 57: [], 58: [[220.10919 190.19208 281.6218 301.8253 0.9953306], [122.006454 113.038956 153.65004 196.39742 0.9343172]], 59: [[ 1.4355316 211.44173 261.17017 407.02325 0.997743 ]], 60: [], 61: [], 62: [[371.38184 60.51973 416.34973 406.3758 0.6004921]], 63: [], 64: [], 65: [], 66: [], 67: [], 68: [], 69: [], 70: [], 71: [], 72: [], 73: [[337.94662 213.51086 356.88425 248.45029 0.70582956], [338.45328 165.89165 341.87906 198.62886 0.7055709], [323.48822 213.3573 340.5058 248.2008 0.6656642], [319.47894 145.41354 344.5699 150.3226 0.653985], [330.4149 166.60457 334.16885 198.51012 0.64365196], [346.19125 166.38786 349.97787 198.21019 0.6231219], [319.92548 136.99168 343.04724 142.77797 0.6215922], [340.03088 125.557396 357.93323 155.41446 0.5994389], [296.08408 167.67415 300.5576 195.8076 0.5613837], [274.5827 7.296982 375.93274 345.04614 0.5526804]], 74: [], 75: [], 76: [], 77: [], 78: [], 79: []}]",
3,000000000724.jpg,[stop sign],[transport],"[stop sign, stop sign]","[{0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 11: [[127.15971 58.575768 287.99518 184.2685 0.99986196], [226.23186 214.2239 245.70232 239.14682 0.5798921]], 12: [], 13: [], 14: [], 15: [], 16: [], 17: [], 18: [], 19: [], 20: [], 21: [], 22: [], 23: [], 24: [], 25: [], 26: [], 27: [], 28: [], 29: [], 30: [], 31: [], 32: [], 33: [], 34: [], 35: [], 36: [], 37: [], 38: [], 39: [], 40: [], 41: [], 42: [], 43: [], 44: [], 45: [], 46: [], 47: [], 48: [], 49: [], 50: [], 51: [], 52: [], 53: [], 54: [], 55: [], 56: [], 57: [], 58: [], 59: [], 60: [], 61: [], 62: [], 63: [], 64: [], 65: [], 66: [], 67: [], 68: [], 69: [], 70: [], 71: [], 72: [], 73: [], 74: [], 75: [], 76: [], 77: [], 78: [], 79: []}]",
4,000000000776.jpg,[teddy bear],[other],"[teddy bear, teddy bear]","[{0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 11: [], 12: [], 13: [], 14: [], 15: [], 16: [], 17: [], 18: [], 19: [], 20: [], 21: [], 22: [], 23: [], 24: [], 25: [], 26: [], 27: [], 28: [], 29: [], 30: [], 31: [], 32: [], 33: [], 34: [], 35: [], 36: [], 37: [], 38: [], 39: [], 40: [], 41: [], 42: [], 43: [], 44: [], 45: [], 46: [], 47: [], 48: [], 49: [], 50: [], 51: [], 52: [], 53: [], 54: [], 55: [], 56: [], 57: [], 58: [], 59: [], 60: [], 61: [], 62: [], 63: [], 64: [], 65: [], 66: [], 67: [], 68: [], 69: [], 70: [], 71: [], 72: [], 73: [], 74: [], 75: [], 76: [], 77: [[ 1.2941293e+02 -7.1884155e-02 4.1105045e+02 3.6061121e+02  9.9150205e-01], [ 7.977051 166.7887 329.5465 419.7221 0.9623926]], 78: [], 79: []}]",
5,000000000785.jpg,"[person, skis]","[person, sports]","[person, skis]","[{0: [[184.56575 45.949722 318.38983 377.66992 0.99969804]], 1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 11: [], 12: [], 13: [], 14: [], 15: [], 16: [], 17: [], 18: [], 19: [], 20: [], 21: [], 22: [], 23: [], 24: [], 25: [], 26: [], 27: [], 28: [], 29: [], 30: [[135.12183 352.4773 396.51483 389.3908 0.7325192]], 31: [], 32: [], 33: [], 34: [], 35: [], 36: [], 37: [], 38: [], 39: [], 40: [], 41: [], 42: [], 43: [], 44: [], 45: [], 46: [], 47: [], 48: [], 49: [], 50: [], 51: [], 52: [], 53: [], 54: [], 55: [], 56: [], 57: [], 58: [], 59: [], 60: [], 61: [], 62: [], 63: [], 64: [], 65: [], 66: [], 67: [], 68: [], 69: [], 70: [], 71: [], 72: [], 73: [], 74: [], 75: [], 76: [], 77: [], 78: [], 79: []}]",
6,000000000802.jpg,"[oven, refrigerator]",[household],"[oven, refrigerator]","[{0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 11: [], 12: [], 13: [], 14: [], 15: [], 16: [], 17: [], 18: [], 19: [], 20: [], 21: [], 22: [], 23: [], 24: [], 25: [], 26: [], 27: [], 28: [], 29: [], 30: [], 31: [], 32: [], 33: [], 34: [], 35: [], 36: [], 37: [], 38: [], 39: [], 40: [], 41: [], 42: [], 43: [], 44: [], 45: [], 46: [], 47: [], 48: [], 49: [], 50: [], 51: [], 52: [], 53: [], 54: [], 55: [], 56: [], 57: [], 58: [], 59: [], 60: [], 61: [], 62: [], 63: [], 64: [], 65: [], 66: [], 67: [], 68: [], 69: [[ 50.81048 183.92485 156.73456 348.83807 0.95270526]], 70: [], 71: [], 72: [[236.0094 116.90501 395.48932 345.4525 0.9999409]], 73: [], 74: [], 75: [], 76: [], 77: [], 78: [], 79: []}]",
7,000000000872.jpg,"[person, baseball glove]","[person, sports]","[person, person, baseball glove]","[{0: [[104.84656 74.73595 252.87292 392.6239 0.99981934], [195.4986 59.896286 293.43665 363.98535 0.97136533]], 1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 11: [], 12: [], 13: [], 14: [], 15: [], 16: [], 17: [], 18: [], 19: [], 20: [], 21: [], 22: [], 23: [], 24: [], 25: [], 26: [], 27: [], 28: [], 29: [], 30: [], 31: [], 32: [], 33: [], 34: [], 35: [[250.57239 103.441 289.06982 130.12045 0.6104481]], 36: [], 37: [], 38: [], 39: [], 40: [], 41: [], 42: [], 43: [], 44: [], 45: [], 46: [], 47: [], 48: [], 49: [], 50: [], 51: [], 52: [], 53: [], 54: [], 55: [], 56: [], 57: [], 58: [], 59: [], 60: [], 61: [], 62: [], 63: [], 64: [], 65: [], 66: [], 67: [], 68: [], 69: [], 70: [], 71: [], 72: [], 73: [], 74: [], 75: [], 76: [], 77: [], 78: [], 79: []}]",
8,000000000885.jpg,"[person, tennis racket]","[person, sports]","[person, person, person, person, person, person, person, tennis racket]","[{0: [[179.9573 174.67824 276.92285 388.22888 0.99774146], [386.54468 35.901657 416.3073 239.03941 0.98131794], [195.19559 89.97572 259.428 243.9764 0.9076416], [266.95047 -0.8052521 314.7139 12.3524885 0.89858854], [319.8828 -1.2922797 362.0998 13.544613 0.81905323], [161.77557 -0.74150705 217.9159 11.735094 0.6193796 ], [3.6699127e+02 1.0777426e-01 4.0984790e+02 1.3760944e+01 5.2259487e-01]], 1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 11: [], 12: [], 13: [], 14: [], 15: [], 16: [], 17: [], 18: [], 19: [], 20: [], 21: [], 22: [], 23: [], 24: [], 25: [], 26: [], 27: [], 28: [], 29: [], 30: [], 31: [], 32: [], 33: [], 34: [], 35: [], 36: [], 37: [], 38: [[258.42816 264.03296 308.38025 299.81445 0.99808466]], 39: [], 40: [], 41: [], 42: [], 43: [], 44: [], 45: [], 46: [], 47: [], 48: [], 49: [], 50: [], 51: [], 52: [], 53: [], 54: [], 55: [], 56: [], 57: [], 58: [], 59: [], 60: [], 61: [], 62: [], 63: [], 64: [], 65: [], 66: [], 67: [], 68: [], 69: [], 70: [], 71: [], 72: [], 73: [], 74: [], 75: [], 76: [], 77: [], 78: [], 79: []}]",
9,000000001000.jpg,"[person, handbag, tennis racket]","[person, sports, wearable]","[person, person, person, person, person, person, person, person, person, person, handbag, tennis racket, tennis racket]","[{0: [[172.15987 92.2438 230.15309 342.6855 0.9993506], [217.2927 124.392975 268.12698 410.66348 0.99712807], [ 75.71554 137.19456 129.54822 323.56128 0.9969489], [ 30.588953 161.15405 71.42515 340.0547 0.984059], [324.4959 174.94733 415.58966 390.04706 0.98234576], [254.02638 162.49951 302.51923 399.95764 0.98234314], [275.08215 96.36527 322.28247 198.44336 0.98008347], [129.10283 173.21667 182.23662 323.02927 0.97583497], [265.42633 175.15028 333.78754 414.83325 0.9725716], [229.69603 105.81365 263.42804 167.19296 0.74328774]], 1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 11: [], 12: [], 13: [], 14: [], 15: [], 16: [], 17: [], 18: [], 19: [], 20: [], 21: [], 22: [], 23: [], 24: [], 25: [], 26: [[ 12.64414 190.62912 46.780113 308.70712 0.6431357]], 27: [], 28: [], 29: [], 30: [], 31: [], 32: [], 33: [], 34: [], 35: [], 36: [], 37: [], 38: [[ 31.825918 256.46484 58.96405 342.34888 0.84125453], [129.94185 176.85774 176.12206 295.19006 0.83941793]], 39: [], 40: [], 41: [], 42: [], 43: [], 44: [], 45: [], 46: [], 47: [], 48: [], 49: [], 50: [], 51: [], 52: [], 53: [], 54: [], 55: [], 56: [], 57: [], 58: [], 59: [], 60: [], 61: [], 62: [], 63: [], 64: [], 65: [], 66: [], 67: [], 68: [], 69: [], 70: [], 71: [], 72: [], 73: [], 74: [], 75: [], 76: [], 77: [], 78: [], 79: []}]",


In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
import numpy as np

labelencoder_class_name = LabelEncoder()
labelencoder_category_name = LabelEncoder()

category_df2.class_name = labelencoder_class_name.fit_transform(category_df2.class_name)
category_df2.category_name = labelencoder_category_name.fit_transform(category_df2.category_name)
X = np.array(category_df2.class_name).reshape(-1, 1)

test = pd.DataFrame({'class_name': category_df.class_name, 'class_name_vector': category_df2.class_name, 'category_name': category_df.category_name, 'category_name_vector': category_df2.category_name})
category_df2

In [None]:
clf = DecisionTreeClassifier(random_state=0)
clf.fit(X, category_df2.category_name)

In [None]:
X_test = np.array(category_df2.class_name[:10]).reshape(-1, 1)
X_test

In [None]:
pred = clf.predict(X_test)

In [None]:
pred

In [None]:

print(labelencoder_category_name.inverse_transform(pred))

In [None]:
for i in range(len(pred)):
    print(pred[i], y_test[i])

In [None]:
from PIL import Image, ImageDraw, ImageFont
from IPython.display import display