In [18]:
import numpy as np
import PIL
import pandas as pd
import os
import copy
import scipy.io

things_stim_path = '/user_data/mmhender/things/'
things_images_root = os.path.join(things_stim_path, 'Images')


In [71]:
import sys
sys.path.append('/user_data/mmhender/texture_synthesis/code/')
import things_utils

In [85]:
# things_utils.process_concepts()
things_utils.subsample_concepts()

saving to /user_data/mmhender/things/concepts_use.npy


In [74]:
fn = '/user_data/mmhender/things/concepts_removeoverlap.npy'
concept_info = np.load(fn, allow_pickle=True).item()

In [84]:
neach = [len(cc) for cc in concept_info['concept_names']]
np.min(neach)

12

In [86]:
fn = '/user_data/mmhender/things/concepts_use.npy'
concept_info = np.load(fn, allow_pickle=True).item()

In [88]:
neach = [len(cc) for cc in concept_info['concept_names_subsample']]
np.min(neach)

12

In [91]:
concept_info['concept_names_subsample']

[array(['toucan', 'ostrich', 'chick', 'pigeon', 'turkey', 'duck', 'eagle',
        'parrot', 'vulture', 'seagull', 'pelican', 'peacock'], dtype='<U18'),
 array(['thumb', 'leg', 'skin', 'arm', 'wrist', 'elbow', 'ear', 'tongue',
        'shoulder', 'hair', 'eye', 'mouth'], dtype='<U18'),
 array(['jumpsuit', 'blazer', 'shorts', 'pantyhose', 'jersey',
        'fishnet_stockings', 'underwear', 'cape', 'turtleneck', 'pantsuit',
        'sweatsuit', 'cardigan'], dtype='<U18'),
 array(['souffle', 'popsicle', 'birthday_cake', 'donut', 'tiramisu',
        'baklava', 'brownie', 'whipped_cream', 'creme_brulee', 'chocolate',
        'mousse', 'pudding'], dtype='<U18'),
 array(['espresso', 'tea', 'milk', 'eggnog', 'hot_chocolate', 'smoothie',
        'lemonade', 'juice', 'latte', 'wine', 'soda', 'champagne'],
       dtype='<U18'),
 array(['stereo', 'television', 'tablet', 'computer_screen', 'radio',
        'remote_control', 'modem', 'cellphone', 'tape_recorder',
        'projector', 'cd_player', 'm

In [19]:
filename = os.path.join(things_stim_path,'things_concepts.tsv')

df = pd.read_csv(filename, sep='\t')
concept_list = np.array(df['Word'])
ids_list = np.array(df['uniqueID'])
concept_list = [c.replace(' ', '_') for c in concept_list]
n_concepts = len(concept_list)
# concepts are the fine-grained/basic level names

info_folder = os.path.join(things_stim_path,'27 higher-level categories')
categ_names = scipy.io.loadmat(os.path.join(info_folder, 'categories.mat'))['categories'][0]
categ_names = [categ_names[ii][0] for ii in range(len(categ_names))]
categ_names = [categ.replace(' ', '_') for categ in categ_names]
n_categ = len(categ_names)
# categories are the high-level/superordinate names

# load the "bottom-up" (human-generated) groupings
dat = scipy.io.loadmat(os.path.join(info_folder, 'category_mat_bottom_up.mat'))
cmat = dat['category_mat_bottom_up']

# there is a swap in this labeling betweeen "hot-air balloon" and "hot chocolate"
# (maybe a typo?)
# i am manually switching them here
cmat_fixed = copy.deepcopy(cmat)
tmp = copy.deepcopy(cmat[801,:])
cmat_fixed[801,:] = cmat[803,:]
cmat_fixed[803,:] = tmp

# now going to fix these a bit to get rid of anything ambiguous
cmat_adjusted = copy.deepcopy(cmat_fixed).astype(bool) 

# removing any duplicate concept names here (these are ambiguous meaning words like bat)
un, counts = np.unique(concept_list, return_counts=True)
duplicate_conc = un[counts>1]
duplicate_conc_inds = np.where([conc in duplicate_conc for conc in concept_list])
cmat_adjusted[duplicate_conc_inds,:] = False

# remove any concepts that have the same name as one of the categories (for example "fruit")
duplicate_inds = np.where([conc in categ_names for conc in concept_list])[0]
cmat_adjusted[duplicate_inds,:] = False

# deciding how to resolve overlap between categories. 
# always prioritizing the "lower-level" category here
# for example use "bird" label over "animal"
categories_prioritize = ['bird','insect','dessert','drink','fruit','vegetable']

for cc1 in range(n_categ):

    for cc2 in np.arange(cc1+1, n_categ):

        overlap = cmat_adjusted[:,cc1] & cmat_adjusted[:,cc2]

        cat1 = categ_names[cc1]
        cat2 = categ_names[cc2]

        if np.sum(overlap)>0:

            if (cat1 in categories_prioritize) and (cat2 not in categories_prioritize):
                # remove concept from the not-prioritized category
                cmat_adjusted[overlap,cc2] = False
            elif (cat2 in categories_prioritize) and (cat1 not in categories_prioritize):
                cmat_adjusted[overlap,cc1] = False        
            else:
                # if neither is prioritized, don't use the concept at all
                cmat_adjusted[overlap,cc1] = False
                cmat_adjusted[overlap,cc2] = False


# now decide which concepts and categories to exclude from this remaining set.
# exclusion criteria:
# exclude any categories that are supersets of other categories
# (for example exclude food, keep dessert)
# exclude concepts that are supersets of other concepts
# (for example exclude berry, keep strawberry).
# also excluding some concepts that are not very well-known 
# (for example spark plug).

categ_exclude = list(pd.read_csv(os.path.join(things_stim_path, 'categ_exclude.csv'))['categ_exclude'])
concepts_exclude = list(pd.read_csv(os.path.join(things_stim_path, 'conc_exclude.csv'))['concepts_exclude'])

# remove these from the concept matrix

categ_names_new = [cc for cc in categ_names if cc not in categ_exclude]
categ_inds_keep = [cc for cc in range(n_categ) if categ_names[cc] not in categ_exclude]

n_categ = len(categ_inds_keep)
categ_names = categ_names_new

cmat_adjusted = cmat_adjusted[:,categ_inds_keep]

conc_inds_overlapping = np.sum(cmat_adjusted, axis=1)>1
conc_inds_notcategorized = np.sum(cmat_adjusted, axis=1)==0
conc_inds_exclude = np.array([conc in concepts_exclude for conc in concept_list])

conc_inds_keep = ~conc_inds_overlapping & ~conc_inds_notcategorized & ~conc_inds_exclude

cmat_adjusted = cmat_adjusted[conc_inds_keep,:]

cmat_adjusted.shape

(491, 21)

In [51]:
filename = os.path.join(things_stim_path,'things_concepts.tsv')

df = pd.read_csv(filename, sep='\t')
concept_list = np.array(df['Word'])
ids_list = np.array(df['uniqueID'])
concept_list = [c.replace(' ', '_') for c in concept_list]
n_concepts = len(concept_list)
# concepts are the fine-grained/basic level names

info_folder = os.path.join(things_stim_path,'27 higher-level categories')
categ_names = scipy.io.loadmat(os.path.join(info_folder, 'categories.mat'))['categories'][0]
categ_names = [categ_names[ii][0] for ii in range(len(categ_names))]
categ_names = [categ.replace(' ', '_') for categ in categ_names]
n_categ = len(categ_names)
# categories are the high-level/superordinate names

# load the "bottom-up" (human-generated) groupings
dat = scipy.io.loadmat(os.path.join(info_folder, 'category_mat_bottom_up.mat'))
cmat = dat['category_mat_bottom_up']

# there is a swap in this labeling betweeen "hot-air balloon" and "hot chocolate"
# (maybe a typo?)
# i am manually switching them here
cmat_fixed = copy.deepcopy(cmat)
tmp = copy.deepcopy(cmat[801,:])
cmat_fixed[801,:] = cmat[803,:]
cmat_fixed[803,:] = tmp

# now going to fix these a bit to get rid of anything ambiguous
cmat_adjusted = copy.deepcopy(cmat_fixed).astype(bool) 

# removing any duplicate concept names here (these are ambiguous meaning words like bat)
un, counts = np.unique(concept_list, return_counts=True)
duplicate_conc = un[counts>1]
duplicate_conc_inds = np.where([conc in duplicate_conc for conc in concept_list])
cmat_adjusted[duplicate_conc_inds,:] = False

# remove any concepts that have the same name as one of the categories (for example "fruit")
duplicate_inds = np.where([conc in categ_names for conc in concept_list])[0]
cmat_adjusted[duplicate_inds,:] = False

# remove first the "food" and "animal" categories, because these share a lot of members with 
# other categories like "dessert", "bird"
categ_exclude_first = ['food','animal']
categ_names_new = [cc for cc in categ_names if cc not in categ_exclude_first]
categ_inds_keep = [cc for cc in range(n_categ) if categ_names[cc] not in categ_exclude_first]
n_categ = len(categ_inds_keep)
categ_names = categ_names_new
cmat_adjusted = cmat_adjusted[:,categ_inds_keep]

# now decide which concepts and categories to exclude from this remaining set.
# exclusion criteria:
# exclude any categories that are supersets of other categories
# (for example exclude food, keep dessert)
# exclude concepts that are supersets of other concepts
# (for example exclude berry, keep strawberry).
# also excluding some concepts that are not very well-known 
# (for example spark plug).

categ_exclude = list(pd.read_csv(os.path.join(things_stim_path, 'categ_exclude.csv'))['categ_exclude'])
concepts_exclude = list(pd.read_csv(os.path.join(things_stim_path, 'conc_exclude.csv'))['concepts_exclude'])

# from set of all categories, find the ones that are overlapping for any categories
conc_inds_overlapping = np.sum(cmat_adjusted, axis=1)>1
conc_inds_exclude = np.array([conc in concepts_exclude for conc in concept_list])

categ_names_new = [cc for cc in categ_names if cc not in categ_exclude]
categ_inds_keep = [cc for cc in range(n_categ) if categ_names[cc] not in categ_exclude]
n_categ = len(categ_inds_keep)
categ_names = categ_names_new
cmat_adjusted = cmat_adjusted[:,categ_inds_keep]

conc_inds_notcategorized = np.sum(cmat_adjusted, axis=1)==0
conc_inds_keep = ~conc_inds_overlapping & ~conc_inds_notcategorized & ~conc_inds_exclude
cmat_adjusted = cmat_adjusted[conc_inds_keep,:]

cmat_adjusted.shape

(491, 21)

In [56]:
conc_use = np.array(concept_list)[conc_inds_keep]
ids_use = np.array(ids_list)[conc_inds_keep]


In [57]:
ids_use.shape

(491,)

In [61]:
image_names = dict()

for categ_ind in range(n_categ):
    ids = ids_use[cmat_adjusted[:,categ_ind]]
    for conc in ids:
        files = os.listdir(os.path.join(things_images_root, conc))
        files.sort()
        image_names[conc] = files

In [63]:
concepts_each_categ = [conc_use[cmat_adjusted[:,ca]] for ca in range(n_categ)]
ids_each_categ = [ids_use[cmat_adjusted[:,ca]] for ca in range(n_categ)]
    

In [67]:
ids_each_categ[0]==concepts_each_categ[0]

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True])

In [52]:
conc_use2 = np.array(concept_list)[conc_inds_keep]

In [53]:
[conc for conc in conc_use2 if conc not in conc_use]

[]

In [54]:
[conc for conc in conc_use if conc not in conc_use2]

[]

In [47]:
conc_use = ['cardinal',
 'chick',
 'cockatoo',
 'crow',
 'duck',
 'duckling',
 'eagle',
 'flamingo',
 'goose',
 'hawk',
 'hummingbird',
 'ostrich',
 'owl',
 'parrot',
 'peacock',
 'pelican',
 'penguin',
 'pheasant',
 'pigeon',
 'puffin',
 'rooster',
 'seagull',
 'swan',
 'toucan',
 'turkey',
 'vulture',
 'ankle',
 'arm',
 'chin',
 'ear',
 'elbow',
 'eye',
 'finger',
 'hair',
 'hip',
 'knee',
 'leg',
 'mouth',
 'navel',
 'neck',
 'nose',
 'shoulder',
 'skin',
 'thumb',
 'toe',
 'tongue',
 'tooth',
 'torso',
 'wrist',
 'bathrobe',
 'blazer',
 'blouse',
 'bra',
 'cape',
 'cardigan',
 'chaps',
 'corset',
 'costume',
 'dress',
 'fishnet_stockings',
 'fur_coat',
 'glove',
 'hood',
 'hoodie',
 'jacket',
 'jeans',
 'jersey',
 'jumpsuit',
 'kilt',
 'kimono',
 'lab_coat',
 'leggings',
 'leotard',
 'loincloth',
 'nightshirt',
 'overalls',
 'pajamas',
 'pantsuit',
 'pantyhose',
 'polo_shirt',
 'shawl',
 'shorts',
 'skirt',
 'snowsuit',
 'sock',
 'sweatsuit',
 'swimsuit',
 't-shirt',
 'toga',
 'tube_top',
 'turtleneck',
 'tuxedo',
 'undershirt',
 'underwear',
 'uniform',
 'vest',
 'baklava',
 'banana_split',
 'birthday_cake',
 'brownie',
 'cheesecake',
 'chocolate',
 'creme_brulee',
 'cupcake',
 'donut',
 'flan',
 'fruitcake',
 'fudge',
 'mousse',
 'parfait',
 'pastry',
 'pie',
 'popsicle',
 'pudding',
 'shortbread',
 'sorbet',
 'souffle',
 'tiramisu',
 'wedding_cake',
 'whipped_cream',
 'champagne',
 'eggnog',
 'espresso',
 'hot_chocolate',
 'juice',
 'latte',
 'lemonade',
 'milk',
 'smoothie',
 'soda',
 'tea',
 'wine',
 'cd_player',
 'cellphone',
 'computer_screen',
 'laptop',
 'metal_detector',
 'modem',
 'projector',
 'radio',
 'remote_control',
 'stereo',
 'tablet',
 'tape_recorder',
 'television',
 'apple',
 'banana',
 'blackberry',
 'blueberry',
 'cantaloupe',
 'cherry',
 'coconut',
 'cranberry',
 'fig',
 'grape',
 'grapefruit',
 'kiwi',
 'lemon',
 'lime',
 'mango',
 'mulberry',
 'orange',
 'papaya',
 'peach',
 'pear',
 'pineapple',
 'plum',
 'pomegranate',
 'prune',
 'raspberry',
 'star_fruit',
 'strawberry',
 'watermelon',
 'beanbag',
 'bench',
 'bookshelf',
 'bunkbed',
 'coffee_table',
 'desk',
 'dresser',
 'footrest',
 'lectern',
 'loveseat',
 'nightstand',
 'ottoman',
 'recliner',
 'rocking_chair',
 'sofa_bed',
 'step_stool',
 'workbench',
 'candelabra',
 'centerpiece',
 'coaster',
 'curtain',
 'doily',
 'fireplace',
 'frame',
 'lampshade',
 'mirror',
 'pillow',
 'potpourri',
 'tapestry',
 'terrarium',
 'vase',
 'wallpaper',
 'wreath',
 'ant',
 'bee',
 'beetle',
 'butterfly',
 'caterpillar',
 'cockroach',
 'dragonfly',
 'earwig',
 'fly',
 'grasshopper',
 'ladybug',
 'lightning_bug',
 'mosquito',
 'moth',
 'praying_mantis',
 'wasp',
 'bottle_opener',
 'can_opener',
 'cleaver',
 'corkscrew',
 'cutting_board',
 'eggbeater',
 'grater',
 'grinder',
 'ladle',
 'measuring_cup',
 'nutcracker',
 'peeler',
 'rolling_pin',
 'scoop',
 'sifter',
 'skewer',
 'spatula',
 'strainer',
 'tongs',
 'whisk',
 'bandage',
 'bedpan',
 'brace',
 'cane',
 'crutch',
 'first-aid_kit',
 'gauze',
 'gurney',
 'inhaler',
 'plaster_cast',
 'sling',
 'stethoscope',
 'stretcher',
 'syringe',
 'wheelchair',
 'accordion',
 'bagpipe',
 'banjo',
 'bassoon',
 'bell',
 'bongo',
 'cello',
 'chime',
 'clarinet',
 'cymbal',
 'flute',
 'french_horn',
 'gong',
 'guitar',
 'harmonica',
 'harp',
 'kazoo',
 'mandolin',
 'organ',
 'piano',
 'recorder',
 'saxophone',
 'tambourine',
 'triangle',
 'trombone',
 'trumpet',
 'tuba',
 'ukulele',
 'violin',
 'xylophone',
 'binder',
 'clipboard',
 'corkboard',
 'envelope',
 'eraser',
 'folder',
 'highlighter',
 'ink',
 'notebook',
 'paper',
 'paperclip',
 'paperweight',
 'pencil_sharpener',
 'penholder',
 'rubber_band',
 'scissors',
 'staple',
 'stapler',
 'tape',
 'thumbtack',
 'whiteboard',
 'airbag',
 'bumper',
 'car_door',
 'car_seat',
 'dashboard',
 'engine',
 'exhaust_pipe',
 'filter',
 'gearshift',
 'grille',
 'handbrake',
 'headlight',
 'headrest',
 'hubcap',
 'license_plate',
 'rearview_mirror',
 'steering_wheel',
 'sunroof',
 'taillight',
 'windshield_wiper',
 'aloe',
 'apple_tree',
 'bamboo',
 'bonsai',
 'bush',
 'cactus',
 'clover',
 'fern',
 'grapevine',
 'grass',
 'hedge',
 'ivy',
 'marijuana',
 'moss',
 'poinsettia',
 'seaweed',
 'tumbleweed',
 'weed',
 'baseball',
 'baseball_bat',
 'baseball_glove',
 'basketball',
 'basketball_hoop',
 'bobsled',
 'bowling_ball',
 'boxing_gloves',
 'cleat',
 'dartboard',
 'football',
 'football_helmet',
 'goalpost',
 'golf_club',
 'hockey_stick',
 'iceskate',
 'javelin',
 'ping-pong_table',
 'pool_table',
 'puck',
 'punching_bag',
 'racket',
 'rollerblade',
 'rollerskate',
 'saddle',
 'scoreboard',
 'skateboard',
 'ski',
 'ski_pole',
 'snowboard',
 'soccer_ball',
 'surfboard',
 'target',
 'tennis_ball',
 'trampoline',
 'volleyball',
 'beachball',
 'doll',
 'dollhouse',
 'dreidel',
 'gyroscope',
 'hobbyhorse',
 'hula_hoop',
 'kaleidoscope',
 'kite',
 'lego',
 'marble',
 'pinwheel',
 'pogo_stick',
 'puppet',
 'rattle',
 'rocking_horse',
 'slime',
 'squirt_gun',
 'stilt',
 'teddy_bear',
 'train_set',
 'whoopee_cushion',
 'yo-yo',
 'artichoke',
 'arugula',
 'asparagus',
 'beet',
 'bell_pepper',
 'bok_choy',
 'broccoli',
 'brussels_sprouts',
 'cabbage',
 'carrot',
 'cauliflower',
 'celery',
 'chive',
 'corn',
 'cucumber',
 'eggplant',
 'garlic',
 'green_beans',
 'jalapeno',
 'kale',
 'leek',
 'lettuce',
 'okra',
 'onion',
 'pea',
 'potato',
 'pumpkin',
 'radish',
 'rhubarb',
 'scallion',
 'spinach',
 'sprouts',
 'squash',
 'sweet_potato',
 'zucchini',
 'airplane',
 'buggy',
 'camper',
 'carriage',
 'dirt_bike',
 'garbage_truck',
 'golf_cart',
 'hearse',
 'hot-air_balloon',
 'humvee',
 'jeep',
 'limousine',
 'minivan',
 'motorcycle',
 'police_car',
 'quad',
 'rickshaw',
 'roadsweeper',
 'school_bus',
 'snowmobile',
 'snowplow',
 'station_wagon',
 'subway',
 'taxi',
 'train_car',
 'trolley',
 'unicycle',
 'van',
 'wagon',
 'arrow',
 'bazooka',
 'blowgun',
 'brass_knuckles',
 'cannon',
 'cannonball',
 'catapult',
 'crossbow',
 'dagger',
 'flamethrower',
 'grenade',
 'landmine',
 'machete',
 'machine_gun',
 'missile',
 'revolver',
 'rifle',
 'slingshot',
 'spear',
 'sword',
 'torpedo',
 'whip']

In [24]:
for cc1 in range(n_categ):
    for cc2 in np.arange(cc1+1, n_categ):
        overlap = cmat_adjusted[:,cc1] & cmat_adjusted[:,cc2]
        if np.sum(overlap)>0:
            cmat_adjusted[overlap,cc1] = False
            cmat_adjusted[overlap,cc2] = False

concepts_each_categ_adj = [np.array(concept_list)[cmat_adjusted[:,ii]==1] for ii in range(n_categ)]

In [27]:
# remove any concepts that we previously specified to exclude
concepts_each_categ_adj = [[cc for cc in conc if cc not in concepts_exclude] for conc in concepts_each_categ_adj]

In [18]:
ids_each_categ = [np.array(ids_list)[cmat_adjusted[:,ii]==1] for ii in range(n_categ)]

image_names = dict()

for categ_ind in range(n_categ):
    for conc in ids_each_categ[categ_ind]:
        files = os.listdir(os.path.join(things_images_root, conc))
        files.sort()
        image_names[conc] = files

remove from dessert and drink:
['milkshake']
remove from fruit and vegetable:
['tomato']
remove from furniture and home_decor:
['coat_rack']
remove from sports_equipment and toy:
['frisbee']
remove from toy and vehicle:
['scooter']
remove from toy and weapon:
['boomerang']


In [19]:
concepts_all = np.concatenate(concepts_each_categ_adj)
categ_all = np.repeat(categ_names, [len(cc) for cc in concepts_each_categ_adj])

In [6]:
categ_names_new = [cc for cc in categ_names if cc not in categ_exclude]
categ_inds_keep = [cc for cc in range(n_categ) if categ_names[cc] not in categ_exclude]
categ_names_new

['bird',
 'body_part',
 'clothing',
 'dessert',
 'drink',
 'electronic_device',
 'fruit',
 'furniture',
 'home_decor',
 'insect',
 'kitchen_tool',
 'medical_equipment',
 'musical_instrument',
 'office_supply',
 'part_of_car',
 'plant',
 'sports_equipment',
 'toy',
 'vegetable',
 'vehicle',
 'weapon']

In [6]:
categ_names_new3 = [cc for cc in categ_names if cc not in categ_exclude]
categ_inds_keep3 = [cc for cc in range(n_categ) if categ_names[cc] not in categ_exclude]
categ_names_new3

['bird',
 'body_part',
 'clothing',
 'dessert',
 'drink',
 'electronic_device',
 'fruit',
 'furniture',
 'home_decor',
 'insect',
 'kitchen_tool',
 'medical_equipment',
 'musical_instrument',
 'office_supply',
 'part_of_car',
 'plant',
 'sports_equipment',
 'toy',
 'vegetable',
 'vehicle',
 'weapon']

In [57]:
[categ_names_new[ii]==categ_names_new3[ii] for ii in range(len(categ_names_new))]

[True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True]

In [7]:
concept_names_new3 = [[conc for conc in concepts_each_categ_adj[cc] if conc not in concepts_exclude] \
                         for cc in categ_inds_keep3]

In [10]:
concepts_each_categ_adj[1]

array(['cardinal', 'cockatoo', 'crow', 'duckling', 'eagle', 'flamingo',
       'goose', 'hawk', 'hummingbird', 'owl', 'parrot', 'peacock',
       'pelican', 'pheasant', 'pigeon', 'puffin', 'rooster', 'seagull',
       'swan', 'toucan', 'turkey', 'vulture'], dtype='<U18')

In [8]:
concept_names_new3[0]

['cardinal',
 'cockatoo',
 'crow',
 'duckling',
 'eagle',
 'flamingo',
 'goose',
 'hawk',
 'hummingbird',
 'owl',
 'parrot',
 'peacock',
 'pelican',
 'pheasant',
 'pigeon',
 'puffin',
 'rooster',
 'seagull',
 'swan',
 'toucan',
 'turkey',
 'vulture']

In [16]:
concept_names_new3[0]

['cardinal',
 'cockatoo',
 'crow',
 'duckling',
 'eagle',
 'flamingo',
 'goose',
 'hawk',
 'hummingbird',
 'owl',
 'parrot',
 'peacock',
 'pelican',
 'pheasant',
 'pigeon',
 'puffin',
 'rooster',
 'seagull',
 'swan',
 'toucan',
 'turkey',
 'vulture']

In [70]:
[len(conc) for conc in concept_names_new3], [len(conc) for conc in concept_names_new]

([22,
  23,
  47,
  18,
  12,
  13,
  28,
  17,
  16,
  16,
  20,
  15,
  30,
  21,
  20,
  18,
  36,
  23,
  31,
  29,
  22],
 [26,
  23,
  47,
  24,
  12,
  13,
  28,
  17,
  16,
  16,
  20,
  15,
  30,
  21,
  20,
  18,
  36,
  23,
  35,
  29,
  22])

In [59]:
concept_names_new4 = []

for ca, categ in enumerate(categ_names):
   
    if categ not in categ_exclude:

        conc_inds_plot = np.where(categ_all==categ)[0]
        
        conc_inds_plot = [ci for ci in conc_inds_plot if concepts_all[ci] not in concepts_exclude]
        
        concept_names_new4 += [[conc for conc in concepts_all[conc_inds_plot] if conc not in concepts_exclude]]

In [25]:
categ_inds_keep

[1, 2, 3, 6, 7, 8, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26]

In [8]:
len(concept_names_new[0])

26

In [43]:
concept_names_new2 = []

for ca, categ in enumerate(categ_names):
   
    if categ not in categ_exclude:

        conc_inds_plot = np.where(categ_all==categ)[0]
        
        conc_inds_plot = [ci for ci in conc_inds_plot if concepts_all[ci] not in concepts_exclude]
        
        concept_names_new2 += [[conc for conc in concepts_all[conc_inds_plot] if conc not in concepts_exclude]]

In [49]:
np.all(np.all([[concept_names_new[ii][jj]==concept_names_new2[ii][jj] for jj in range(len(concept_names_new[ii]))] \
                for ii in range(len(concept_names_new))]))

True

In [None]:
np.all(np.all([[concept_names_new[ii][jj]==concept_names_new3[ii][jj] for jj in range(len(concept_names_new[ii]))] \
                for ii in range(len(concept_names_new))]))

IndexError: list index out of range