In [None]:
import pandas as pd
import numpy as np
import copy

In [None]:
#Update these variables to work with your setup
dataFolder = 'data2016'
BASIC_FEATURES = dataFolder + "/image_features/features_basic.csv"
COLOR_FEATURES = dataFolder + "/image_features/features_color.csv"
TEXTURE_FEATURES = dataFolder + "/image_features/features_texture.csv"
OBJECT_FEATURES = dataFolder + "/image_features/features_objects.csv"
SCENE_FEATURES = dataFolder + "/image_features/features_scene.csv"
FACE_FEATURES = dataFolder + "/image_features/features_faces.csv"
WORDNET_DOMAINS_FEATURES = dataFolder+ "/image_features/features_objects_wordnet_domains.csv"
ALL_FEATURES = dataFolder + "/image_features/features_image_all.csv" #will be generated
AVERAGE_FEATURES = dataFolder + "/image_features/features_image_all_averages.csv" #will be generated

In [None]:
#BASIC - ONLY RUN ONCE
features_basic = pd.read_csv(BASIC_FEATURES)
features_basic = features_basic.drop(['black_%', 'gray_%',
       'silver_%', 'white_%', 'maroon_%', 'red_%.1', 'olive_%', 'yellow_%',
       'green_%.1', 'lime_%', 'teal_%', 'aqua_%', 'navy_%', 'blue_%.1',
       'purple_%', 'fuchsia_%'],axis=1)
features_basic.to_csv(BASIC_FEATURES)

In [None]:
#COLOR - ONLY RUN ONCE
features_color = pd.read_csv(COLOR_FEATURES,header=None)
features_color.columns = ['id','imageNum','black','blue','brown','grey','green','orange','pink','purple','red','white',
                    'yellow']
features_color.to_csv(dataFolder + "/image_features/features_color_unnormalized.csv")

colors = ['black','blue','brown','grey','green','orange','pink','purple','red','white','yellow']
features_color[colors] = features_color[colors].div(features_color[colors].sum(axis=1), axis=0)
features_color.to_csv(COLOR_FEATURES)

In [None]:
#TEXTURE - ONLY RUN ONCE
features_texture = pd.read_csv(TEXTURE_FEATURES,header=None)
features_texture.columns = ['id','imageNum','contrast','correlation','energy','homogeneity']
features_texture.to_csv(TEXTURE_FEATURES)

In [None]:
#FACES - ONLY RUN ONCE
features_faceDetection = pd.read_csv(FACE_FEATURES,header=None)
features_faceDetection.columns = ['fileName','num_faces']
imageNums = []
ids = []
for it,row in features_faceDetection.iterrows():
    imageNums.append(row.fileName[-6])
    ids.append(row.fileName.split('/')[-2])
features_faceDetection['id'] = ids
features_faceDetection['imageNum'] = imageNums
features_faceDetection = features_faceDetection.drop(['fileName'],axis=1)
features_faceDetection.to_csv(FACE_FEATURES)

In [None]:
#SCENES - ONLY RUN ONCE
sceneFeatures = pd.read_csv(SCENE_FEATURES,header=None)

labels = ["abbey","airport terminal","alley","amphitheater","amusement park",
         "aquarium","aqueduct","arch","art gallery","art studio","assembly line",
         "attic","auditorium","apartment building outdoor","badlands","ballroom",
         "bamboo forest","banquet hall","bar","baseball_field","basement","basilica",
         "bayou","beauty salon","bedroom","boardwalk","boat deck","bookstore",
         "botanical garden","bowling alley","boxing ring","bridge","building facade",
         "bus interior","butchers shop","butte","bakery shop","cafeteria","campsite",
         "candy store","canyon","castle","cemetery","chalet","classroom","closet",
         "clothing store","coast","cockpit","coffee shop","conference center",
         "conference room","construction site","corn field","corridor","cottage garden",
         "courthouse","courtyard","creek","crevasse","crosswalk","cathedral outdoor",
         "church outdoor","dam","dining room","dock","dorm room","driveway","desert sand",
         "desert vegetation","dinette home","doorway outdoor","engine room","excavation",
         "fairway","fire escape","fire station","food court","forest path","forest road",
         "formal garden","fountain","field cultivated","field wild","galley","game room",
         "garbage dump","gas station","gift shop","golf course","harbor","herb garden",
         "highway","home office","hospital","hospital room","hot spring","hotel room",
         "hotel outdoor", "ice cream parlor","iceberg","igloo","islet",
         "ice skating rink outdoor","inn outdoor","jail cell","kasbah",
         "kindergarden classroom","kitchen","kitchenette","laundromat","lighthouse",
         "living room","lobby","locker room","mansion","marsh","martial arts gym","mausoleum",
         "medina","motel","mountain","mountain snowy","music studio","market outdoor",
         "monastery outdoor","museum indoor","nursery","ocean","office","office building",
         "orchard","pagoda","palace","pantry","parking lot","parlor","pasture","patio",
         "pavilion","phone booth","picnic area","playground","plaza","pond","pulpit",
         "racecourse","raft","railroad track","rainforest","reception",
         "residential neighborhood","restaurant","restaurant kitchen","restaurant patio",
         "rice paddy","river","rock arch","rope bridge","ruin","runway","sandbar",
         "schoolhouse","sea cliff","shed","shoe shop","shopfront","shower","ski resort",
         "ski slope","sky","skyscraper","slum","snowfield","staircase","supermarket","swamp",
         "stadium baseball","stadium football","stage indoor","subway station platform",
         "swimmming pool outdoor","television studio","topiary garden","tower","train railway",
         "tree farm","trench","temple east asia","temple south asia","track outdoor",
         "train station platform","underwater coral reef","valley","vegetable garden",
         "veranda","viaduct","volcano","waiting room","water tower","watering hole",
         "wheat field","wind farm","windmill","yard"]

labels = ["PLACES "+label for label in labels]

labelsPlus = copy.copy(labels)
labelsPlus.insert(0,'imageNum')
labelsPlus.insert(0,'id')
sceneFeatures.columns = labelsPlus

sceneFeatures.to_csv(SCENE_FEATURES)

In [None]:
#OBJECTS - ONLY RUN ONCE
features_objects2 = pd.read_csv(OBJECT_FEATURES)
#features_objects2 = features_objects2.drop('Unnamed: 0',axis=1)

fullFileNames = np.ravel(features_objects2[['Unnamed: 0']].as_matrix())

ids = []
imageNums = []
for fileName in fullFileNames:
    imageNums.append(fileName[-5])
    split = fileName.split('/')
    ids.append(split[-2])
    
features_objects2['id'] = ids
features_objects2['imageNum'] = imageNums

features_objects2 = features_objects2.drop('Unnamed: 0',axis=1)

f = open('imagenet_synset_words.txt', 'r')
labels = []
for line in f:
    #line = line[line.find(" ")+1:-1]
    labels.append(line)
    
labels = ["IMAGENET "+label for label in labels]
labels.append('id')
labels.append('imageNum')
features_objects2.columns = labels

features_objects2 = features_objects2.groupby(['id','imageNum']).sum().reset_index()

#Some of the images don't have any bounding boxes generated for them
#Add all 0s for these images
ids = ['aaa']
imageNums = ['5','5','3','3']

allArs = []
for i in range(len(ids)):
    ar = [ids[i],imageNums[i]] + [0]*1000
    allArs.append(ar)
    
noBoxes = pd.DataFrame(data=allArs,columns=features_objects2.columns)
features_objects2 = features_objects2.append(noBoxes)

features_objects2.to_csv(OBJECT_FEATURES)

In [None]:
#ONLY RUN ONCE
#Generate all features spreadsheet
#features = [features_basic,features_color,features_faceDetection,features_scene,features_sift,features_texture,
#            features_objects]
features = [features_basic,features_color,features_texture,features_objects,features_scene]
allFeatures = features[0].merge(features[1],how='outer')
for i in range(2,len(features)):
    allFeatures = allFeatures.merge(features[i],how='outer')
allFeatures.to_csv(ALL_FEATURES)

In [None]:
#ONLY RUN ONCE
#Generate average features spreadsheet
averageFeatures = allFeatures.drop('imageNum',axis=1)
averageFeatures = averageFeatures.groupby('id').mean()
averageFeatures = averageFeatures.reset_index()
#averageFeatures['num_images'] = allFeatures[['id','imageNum']].groupby('id').count()['imageNum'].as_matrix()
averageFeatures.to_csv(AVERAGE_FEATURES)