**The entire process was run in batches because feature extraction was too time consuming.**

In [1]:
from sklearn.decomposition import PCA
import pandas as pd
from sklearn.pipeline import Pipeline
import numpy as np
import pickle

**Reading the dataframe**

In [2]:
df = pd.read_csv('./data/extracted_features_sub_dataset_1.csv')

**Extracting the desired features from the dataframe**

In [3]:
x = df.loc[:,["f {}".format(i+1) for i in range(4096) ]].values

**Loading and applying PCA for the data**

In [None]:
pca = PCA(n_components=512)
principalComponents = pca.fit_transform(x)

**Storing of the PCA model to be used later in the search**

In [None]:
filename = './data/pca_model.pkl'
pickle.dump(pca, open(filename, 'wb'))

**Storing the final dataframe to be indexed in ElasticSearch**

In [None]:
principalDf = pd.DataFrame(data = principalComponents, columns = ["f {}".format(i+1) for i in range(512) ] )

In [None]:
important_columns = ['ImageID', 'OriginalURL', 'AuthorID', 'Title', 'tags']

In [None]:
result = pd.concat([df.loc[:,important_columns], principalDf], axis=1, join='inner')

In [None]:
result.to_csv('./data/extracted_features_sub_dataset_1_reducted.csv')

In [None]:
result.head()

Unnamed: 0,ImageID,OriginalURL,AuthorID,Title,tags,f 1,f 2,f 3,f 4,f 5,...,f 503,f 504,f 505,f 506,f 507,f 508,f 509,f 510,f 511,f 512
0,4fa8054781a4c382,https://farm3.staticflickr.com/5310/5898076654...,michael-beat,...die FNF-Kerze,"Person, Clock, Clothing, Flower, Human face, H...",0.010552,-0.003925,-0.022008,0.008379,-0.003412,...,-0.001277,0.000387,-0.001471,0.000747,-0.001833,0.000651,0.00073,-0.000271,0.001058,-0.00097
1,b37f763ae67d0888,https://c1.staticflickr.com/1/67/197493648_628...,drstarbuck,Three boys on a hill,"Person, Man, Plant, Clothing",0.002671,-0.012279,0.010701,-0.025629,0.000239,...,0.000276,-0.000151,-0.001541,-0.001535,-0.001718,0.002071,0.001282,0.00033,-0.001213,0.000608
2,7e8584b0f487cb9e,https://c7.staticflickr.com/8/7056/7143870979_...,circasassy,A Christmas carol and The cricket on the heart...,"Person, Human body, Woman, Human hair, Human h...",-0.002613,0.011978,0.012581,0.000491,-0.006902,...,-0.000687,0.002708,-0.000391,-0.001446,-0.000363,-0.001231,0.002271,-0.00052,0.002895,0.002471
3,249086e72671397d,https://c6.staticflickr.com/4/3930/15342460029...,codnewsroom,Suburban Law Enforcement Academy 20th Annivers...,"Person, Building, Window",0.003937,-0.012419,-0.019988,0.010696,-0.003101,...,-0.001327,0.000981,-0.000959,0.000232,-0.000457,-0.00116,-0.000236,0.00117,0.00055,0.001443
4,3c2fad96113ca97e,https://c7.staticflickr.com/1/668/21529344631_...,96228372@N06,Morning Light,"Person, Woman, Girl, Clothing",0.007681,0.010849,-0.007089,-0.00235,-0.003655,...,-0.000341,-0.000643,-6.2e-05,0.000335,5.3e-05,0.000805,-0.001937,-0.001491,-0.001454,0.002359
