In [1]:
import pandas as pd
import glob
import os
import sqlite3
import warnings
warnings.filterwarnings('ignore')
from spellchecker import SpellChecker
import matplotlib.pyplot as plt

In [2]:
spell = SpellChecker()
annotation_path = './db.sqlite3'
conn = sqlite3.connect(annotation_path)

In [3]:
imagenet_models = [
    'inception_imageneta_bias',
    'squeezenet_imageneta_bias'
]

utensil_models = [
    'inception_utensil_color_bias',
    # 'inception_utensil_shape_bias',
    'inception_utensil_both_bias',
    'squeezenet_utensil_both_bias',
    'squeezenet_utensil_color_bias',
    # 'squeezenet_utensil_shape_bias'
]
imagenet_classes = [
    'ants',
    'mantis',
    'bees'
]
utensil_classes = [
    'TEA_SPOON',
    'FISH_SLICE',
    'DINNER_CUTLERY'
]

In [4]:
def preprocess(x):
    x = x.lower()
    if x == '':
        return x
    return spell.correction(x)

In [5]:
def utensil_preprocess(df):
    for cols in ['primary_concept','part','shapes','colors']:
        df[cols] = df[cols].apply(preprocess)
    df['primary_concept'] = df['primary_concept'].replace(' ','_',regex=True)
    df['primary_concept'] = df['primary_concept'].replace('son','spoon')
    df['part'] = df['part'].replace(' ','_',regex=True)
    df = df.replace('yellowgreen','yellow')
    df['colors'] = df['colors'].replace('silver and brown','silver,brown')
    df['colors'] = df['colors'].replace('black and gold','black,gold')
    df['colors'] = df['colors'].replace(' ','',regex=True)
    df = df['colors'].str.split(',', expand=True).stack().reset_index(level=0).set_index('level_0').rename(columns={0:'colors'}).join(df.drop('colors', axis=1))
    df['colors'] = df.colors.str.replace(r'(^.*yellow.*$)', 'yellow')
    df['colors'] = df.colors.str.replace(r'(^.*silver.*$)', 'silver')
    df['colors'] = df.colors.str.replace(r'(^.*green.*$)', 'green')
    df['colors'] = df.colors.str.replace(r'(^.*grey.*$)', 'grey')
    df['colors'] = df.colors.str.replace(r'(^.*white.*$)', 'white')
    df['colors'] = df.colors.str.replace(r'(^.*brown.*$)', 'brown')
    df['colors'] = df.colors.str.replace(r'(^.*red.*$)', 'red')
    df['colors'] = df.replace('yellowgreen','yellow')
    df['colors'] = df.colors.str.replace(r'(^.*blue.*$)', 'blue')
    df['colors'] = df.colors.str.replace(r'(^.*aquamarine.*$)', 'aquamarine')
    df['shapes'] = df['shapes'].replace('background','')
    
    return df

In [6]:
def inception_imagenet_preprocess(df):
    for cols in ['primary_concept','part','shapes','colors']:
        df[cols] = df[cols].apply(preprocess)
    df['colors'] = df['colors'].replace(',transparent, but having a reflection','white')
    df['colors'] = df['colors'].replace('green og grey','green,grey')
    df['colors'] = df['colors'].replace('yellowblackgold','yellow,black,gold')
    df['colors'] = df['colors'].replace('multicoloured','white')
    df['colors'] = df['colors'].replace('unknow','white')
    df['colors'] = df['colors'].replace(' and ',',',regex=True)
    df['colors'] = df['colors'].replace(' ',',',regex=True)
    df['colors'] = df['colors'].replace(',,',',',regex=True)
    df = df['colors'].str.split(',', expand=True).stack().reset_index(level=0).set_index('level_0').rename(columns={0:'colors'}).join(df.drop('colors', axis=1))
    df['colors'] = df['colors'].replace('yellowgreen','yellow')
    df['colors'] = df.colors.str.replace(r'(^.*yellow.*$)', 'yellow')
    df['colors'] = df.colors.str.replace(r'(^.*silver.*$)', 'silver')
    df['colors'] = df.colors.str.replace(r'(^.*green.*$)', 'green')
    df['colors'] = df.colors.str.replace(r'(^.*grey.*$)', 'grey')
    df['colors'] = df.colors.str.replace(r'(^.*white.*$)', 'white')
    df['colors'] = df.colors.str.replace(r'(^.*brown.*$)', 'brown')
    df['colors'] = df.colors.str.replace(r'(^.*red.*$)', 'red')
    df['colors'] = df.colors.str.replace(r'(^.*blue.*$)', 'blue')
    df['shapes'] = df['shapes'].replace('background','')
    df['shapes'] = df['shapes'].replace('-','_',regex=True)
    df = df.replace(' ','_',regex=True)

    return df

In [7]:
def squeezenet_imagenet_preprocess(df):
    for cols in ['primary_concept','part','shapes','colors']:
        df[cols] = df[cols].apply(preprocess)
    df['primary_concept'] = df['primary_concept'].replace(' ','_',regex=True)
    df['primary_concept'] = df['primary_concept'].replace('leave','leaves')
    df['primary_concept'] = df['primary_concept'].replace('leaves','leaf')
    df['shapes'] = df['shapes'].replace(' ','_',regex=True)
    df['shapes'] = df['shapes'].replace(' ','_',regex=True)
    df['part'] = df['part'].replace('-','_',regex=True)
    df['part'] = df['part'].replace(' ','_',regex=True)
    df['part'] = df['part'].replace('5','',regex=True)
    df['colors'] = df['colors'].replace('ghostwrite','ghostwhite')
    df['colors'] = df['colors'].replace(' and ',',',regex=True)
    df['colors'] = df['colors'].replace('-',',',regex=True)
    df['colors'] = df['colors'].replace(' ',',',regex=True)
    df = df['colors'].str.split(',', expand=True).stack().reset_index(level=0).set_index('level_0').rename(columns={0:'colors'}).join(df.drop('colors', axis=1))
    df['colors'] = df.replace('yellowgreen','yellow')
    df['colors'] = df.colors.str.replace(r'(^.*white.*$)', 'white')
    df['colors'] = df.colors.str.replace(r'(^.*yellow.*$)', 'yellow')
    df['colors'] = df.colors.str.replace(r'(^.*silver.*$)', 'silver')
    df['colors'] = df.colors.str.replace(r'(^.*green.*$)', 'green')
    df['colors'] = df.colors.str.replace(r'(^.*grey.*$)', 'grey')
    df['colors'] = df.colors.str.replace(r'(^.*brown.*$)', 'brown')
    df['colors'] = df.colors.str.replace(r'(^.*red.*$)', 'red')
    df['colors'] = df.colors.str.replace(r'(^.*blue.*$)', 'blue')
    df['shapes'] = df['shapes'].replace('background','')
    df['shapes'] = df['shapes'].replace('-','_',regex=True)
    df = df.replace(' ','_',regex=True)

    return df

In [11]:
for modelname in imagenet_models:
    for classname in imagenet_classes:
        df_name = 'class_concepts_images/'+modelname+'_'+classname+'.csv'
        sql = '''select 
                case  
                when pred1label == "{0}" then pred1score
                when pred2label == "{0}" then pred2score
                when pred3label == "{0}" then pred3score end
                as y,
                a.id as id,
                 objectname as primary_concept, partobject as part, color as colors, shape as shapes
                 from apiservice_images a inner join apiservice_Annotations b on a.id=b.imageid
                 where modelname="{1}" '''.format(classname,modelname)
        df = pd.read_sql(sql, conn)
        for cols in ['primary_concept','part','shapes','colors']:
            df[cols] = df[cols].apply(preprocess)
        if 'inception' in modelname:
            df = inception_imagenet_preprocess(df)
        else:
            df = squeezenet_imagenet_preprocess(df)
        df.to_csv(df_name,index=False)   