The following program extracts features from images of a restaurant from VGG net pretrained model
Paper: Very Deep Convolutional Networks for Large-Scale Image Recognition K. Simonyan, A. Zisserman
arXiv:1409.1556

#VGG16 model - ILSVRC - 2014 competition
#Mean = [103.909, 116.779, 123.68]
#BGR format

I have tried to extract hidden layer activations from different layers of the network for every image and averaged the mean of all images belong to one restaurant. I have used the averaged activation values as features for restaurants.

I have used Theano and Keras for extracting layer level weights and activations for images

In [2]:
#Yelp
import numpy as np
import pandas as pd
import os
#import h5py
#import cv2
from scipy.misc import imread, imresize, imshow
import pickle
from matplotlib import pyplot as plt
from keras.utils.generic_utils import Progbar
import time
import random

In [3]:
#Deep learning packages
import theano
from keras.models import Sequential
from keras.optimizers import SGD
from keras.activations import relu
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers.core import Dense, Flatten, Dropout

DEBUG: nvcc STDOUT mod.cu
   Creating library C:/Users/Sharath/AppData/Local/Theano/compiledir_Windows-7-6.1.7601-SP1-Intel64_Family_6_Model_63_Stepping_2_GenuineIntel-2.7.11-64/tmpizvobt/265abc51f7c376c224983485238ff1a5.lib and object C:/Users/Sharath/AppData/Local/Theano/compiledir_Windows-7-6.1.7601-SP1-Intel64_Family_6_Model_63_Stepping_2_GenuineIntel-2.7.11-64/tmpizvobt/265abc51f7c376c224983485238ff1a5.exp

Using gpu device 0: Tesla K20c (CNMeM is disabled, CuDNN 3007)
Using Theano backend.
  "downsample module has been moved to the theano.tensor.signal.pool module.")


In [4]:
#get data
train = pd.read_csv("train.csv")
sub = pd.read_csv("sample_submission.csv")
train_biz = pd.read_csv("train_photo_to_biz_ids.csv")
test_biz = pd.read_csv("test_photo_to_biz.csv")

In [None]:
#Check the data
train.head()
train_biz.head()
train_biz.dtypes

In [5]:
train.isnull().values.any()
train.isnull().sum() #4 NAs in labels
train = train.dropna()
#Change the train data type
train["labels"]
labels = [l.split() for l in train["labels"]]
labels = map((lambda x: [int(i) for i in x]), labels)
train["labels"] = labels

In [6]:
#resize, reshape as per input layer of VGG - [None,3,224,224]
# As per VGG paper - input is mean subtracted 224X224 RGB image
def input_adjust(path):
    pic = imread(path) #reads in rgb format
    pic = imresize(pic, (224,224,3)) #As required for input layer
    pic = pic.astype(np.float32)
    
    #Convert to BGR format
#     pic = pic[:,:,[2,1,0]]
#     pic[:,:,0] -= 103.909
#     pic[:,:,1] -= 116.779
#     pic[:,:,2] -= 123.68
    
    #read in RGB, keeping the same format
    pic[:,:,0] -= 123.68
    pic[:,:,1] -= 116.779
    pic[:,:,2] -= 103.909
    
    #reshape to 3X224X224
    pic = pic.reshape(3,224,224)
    pic = pic.astype(np.float32)
    return pic

In [7]:
def get_biz_feature(features):
    f = features.mean(axis = 0)
    return list(f)

In [19]:
def get_max_feature(features):
    f = features.max(axis = 0)
    return list(f)

In [8]:
'''Very Deep Convolutional Networks for Large-Scale Image Recognition
K. Simonyan, A. Zisserman
arXiv:1409.1556'''
#VGG16 model - ILSVRC - 2014 competition
#Mean = [103.909, 116.779, 123.68]
#BGR format
#

def VGG_16(weights_path = None):
    model = Sequential()
    model.add(ZeroPadding2D((1,1), input_shape = (3,224,224)))
    model.add(Convolution2D(64,3,3, activation="relu"))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(64,3,3, activation="relu"))
    model.add(MaxPooling2D((2,2), strides = (2,2)))
    
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(128, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(128, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(Flatten())
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1000, activation='softmax'))

    if weights_path:
        model.load_weights(weights_path)

    return model

In [9]:
def get_network():
    model = VGG_16('vgg16_weights.h5')
    sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(optimizer=sgd, loss='categorical_crossentropy')

    #Now get the layers of interest from VGG net
    #We are interested in using the layers of net before the dense layer
    my_out = model.layers[32].get_output(train = False)
    input_layer = model.get_input(train = False)

    my_net = theano.function([input_layer], my_out)
       
    return my_net

In [13]:
def extended_network():
    model = VGG_16('vgg16_weights.h5')
    sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(optimizer=sgd, loss='categorical_crossentropy')
    
    #Now get the layers of interest from VGG net
    #We are interested in using the layers of net before the dense layer
    dense2 = model.layers[34].get_output(train = False)
    dense1 = model.layers[33].get_input(train = False)

    ext_net = theano.function([dense1], dense2)
    return ext_net

In [14]:
ext_net = extended_network()

In [None]:
#First get the network with the layers of interest from VGG net
net = get_network()

In [None]:
# Get features for train businesses
folder = "F:\\sharath\\yelpdata\\train_photos"
train_data = pd.DataFrame(columns=["BusinessId","Target","Features"])
train_features = []
status = Progbar(train.shape[0])
for idx, row in train.iterrows():
    labels = row["labels"]
    bid = row["business_id"]

    #for a given business ID get the photo IDs
    pic_ids = train_biz.loc[(train_biz.business_id == bid),"photo_id"]
    pic_path = [os.path.join(folder,str(p)+".jpg").replace("\\","/") for p in pic_ids]
    #Get pics in the format required for VGG
    pics = [input_adjust(path) for path in pic_path]
    pics = np.array(pics)
    pics = pics.reshape(len(pics),3,224,224)
    pics = pics.astype(np.float32)
    print("\n %d pics for business ID %s is ready for net" %(pics.shape[0], str(bid)))
    
    #Get 4096 length feature vector for every pic in pics (that belong to a business ID)
    #The features are generated from VGG network upto 37th layer (Before the dense layers)
    #features = net(pics) #shape should be = len(pic_ids) X 4096
    
    #Unfortunately, I am getting a memory failure error when my input size is more than 30X3X224X224 in gpu
    #It is working fine in cpu because virtual memory feature of cpu. gpu requires contiguous memory allocation,
    #which might fail occassionally while working with large arrays
    
    #A small work around to avoid memory failure in gpu
    length = int(pics.shape[0])
    features = np.zeros(shape=[length,4096])
    for i in range(0,length,30):
        start = i
        if i+30 <= length:
            end = i+30
        else:
            end = length
        features[start:end,:] = net(pics[start:end,:,:,:])
        #print(start, end)

    #Store the features of each pic
    train_features.append([pic_ids, features]) #pics_features[0-1996][0-1]

    #Get the mean feature for all the pics that belong to a business
    business_feature = get_biz_feature(features)
    train_data.loc[idx] = [bid, labels, business_feature]
    status.add(1)
    
#Save pics wise features
with open("F:\sharath\yelpdata\\train_pics.pkl_2", "wb") as f:
    pickle.dump(train_features, f, protocol = pickle.HIGHEST_PROTOCOL)
f.close()
print("features for train photos saved to %s" %("F:\sharath\yelpdata\\train_pics.pkl_2"))

with open("F:\sharath\yelpdata\\train_data_2.csv", "w") as f:
    train_data.to_csv(f, index = False)
f.close()
print("features for train businesses saved to %s" %("F:\sharath\yelpdata\\train_data_2.csv"))

In [None]:
with open("F:\sharath\yelpdata\\train_data.csv", "w") as f:
    train_data.to_csv(f, index = False)
f.close()
print("features for train businesses saved to %s" %("F:\sharath\yelpdata\\train_data.csv"))

In [None]:
# Get features for test businesses
folder = "F:\\sharath\\yelpdata\\test_photos"
#test = pd.read_csv("test_photo_to_biz.csv")
test_bids = set(test_biz.business_id.values) #get unique business IDs
test_bids = [bid for bid in test_bids] #convert into a pure list
test_data = pd.DataFrame(columns=["BusinessId","Features"])
test_features = []

start = time.time()
status = Progbar(len(test_bids))
for idx, bid in enumerate(test_bids):
    #for a given business ID get the photo IDs
    pic_ids = test_biz.loc[(test_biz.business_id == bid),"photo_id"]
    
    #To minimize the time, I will consider max 150 pics per restaurant
    if len(pic_ids) > 100:
        t = random.sample(range(len(pic_ids)), 100)
        pic_ids = pic_ids.iloc[t]
    
    pic_path = [os.path.join(folder,str(p)+".jpg") for p in pic_ids]
    
    #Get pics in the format required for VGG
    pics = [input_adjust(path) for path in pic_path]
    pics = np.array(pics)
    pics = pics.reshape(len(pics),3,224,224)
    print("\n %d pics for business ID %s is ready for net" %(pics.shape[0], str(bid)))
    
    #Get 4096 length feature vector for every pic in pics (that belong to a business ID)
    #The features are generated from VGG network upto 37th layer (Before the dense layers)
    #features = net(pics) #shape should be = len(pic_ids) X 4096
    
    #Unfortunately, I am getting a memory failure error when my input size is more than 30X3X224X224 in gpu
    #It is working fine in cpu because virtual memory feature. gpu requires contiguous memory allocation,
    #which might fail occassionally while working with large arrays
    
    #A small work around to avoid memory failure in gpu
    length = int(pics.shape[0])
    features = np.zeros(shape=[length,4096])
    for i in range(0,length,30):
        start = i
        if i+30 <= length:
            end = i+30
        else:
            end = length
        features[start:end,:] = net(pics[start:end,:,:,:])
        #print(start, end)

    #Store the features of each pic
    test_features.append([pic_ids, features]) #pics_features[0-1996][0-1]

    #Get the mean feature for all the pics that belong to a business
    business_feature = get_biz_feature(features)
    test_data.loc[idx] = [bid, business_feature]
    status.add(1)
print("Time taken for test features %d" %(time.time()-start))
    
#Save pics wise features
with open("F:\sharath\yelpdata\\test_pics_2.pkl", "wb") as f:
    pickle.dump(test_features, f, protocol = pickle.HIGHEST_PROTOCOL)
f.close()
print("features for test photos saved to %s" %("F:\sharath\yelpdata\\test_pics_2.pkl"))
    
#Save business features
with open("F:\sharath\yelpdata\\test_data_2.csv", "w") as f:
    test_data.to_csv(f, index = False)
f.close()
print("features for test businesses saved to %s" %("F:\sharath\yelpdata\\test_data_2.csv"))

In [100]:
#train data 1996 biz ids
#test data 10000 biz ids
#feed through layer 33 to 34 (dense1 to dense2)
def get_out34(df):

    input_train_32 = df.ix[:,"Features"]
    input_train_32 =  map((lambda x: [float(f) for f in x[1:-1].split(",")]), input_train_32)
    input_train_32 = np.array(input_train_32, dtype=np.float32)

    # get output of 34 layer
    out_train_34 = ext_net(input_train_32)
    return out_train_34

In [101]:
train_data_2 = pd.read_csv("F:\sharath\yelpdata\\train_data_2.csv")
out_train_34=get_out34(train_data_2)
#Create a new df
t=out_train_34.tolist()
train_data_3 = pd.DataFrame(columns=["BusinessId","Target","Features"])
for idx, row in train_data_2.iterrows():
    train_data_3.loc[idx] = [row["BusinessId"],row["Target"], t[idx]]
#Save business features
with open("F:\sharath\yelpdata\\train_data_3.csv", "w") as f:
    train_data_3.to_csv(f, index = False)
f.close()

In [102]:
#test data 10000 biz ids
#feed through layer 33 to 34 (dense1 to dense2)
test_data_2 = pd.read_csv("F:\sharath\yelpdata\\test_data_2.csv")
out_test_34=get_out34(test_data_2)
#Create a new df
t=out_test_34.tolist()
test_data_3 = pd.DataFrame(columns=["BusinessId","Features"])
for idx, row in test_data_2.iterrows():
    test_data_3.loc[idx] = [row["BusinessId"], t[idx]]
#Save business features
with open("F:\sharath\yelpdata\\test_data_3.csv", "w") as f:
    test_data_3.to_csv(f, index = False)
f.close()

In [15]:
#load 32 layer o/p of each pic and feed fwd to 34th layer
with open("F:\sharath\yelpdata\\train_pics.pkl", "rb") as f:
    train_pics = pickle.load(f)
f.close()

#
df = pd.read_csv("F:\sharath\yelpdata\\train_data.csv")
train_data = pd.DataFrame(columns=["BusinessId","Target","Features"])
train_features = []
status = Progbar(len(train_pics))
for idx, pic_data in enumerate(train_pics):
    Xin = np.array(pic_data[1], dtype=np.float32)
    Xout = ext_net(Xin)
     
    #Store the features of each pic
    train_features.append([pic_data[0], Xout]) #pics_features[0-1996][0-1]

    #Get the mean feature for all the pics that belong to a business
    business_feature = get_biz_feature(Xout)
    train_data.loc[idx] = [df.ix[idx,"BusinessId"], df.ix[idx,"Target"], business_feature]
    status.add(1)

#Save business features - RGB - 2nd Dense layer - mean features = 1996X4096 size
with open("F:\sharath\yelpdata\\train_data_5.csv", "w") as f:
    train_data.to_csv(f, index = False)
f.close()



In [16]:
#load 32 layer o/p of each pic and get feed fwd to 34th layer
with open("F:\sharath\yelpdata\\test_pics.pkl", "rb") as f:
    test_pics = pickle.load(f)
f.close()
print len(test_pics)

10000


In [17]:
df = pd.read_csv("F:\sharath\yelpdata\\test_data.csv")
test_data = pd.DataFrame(columns=["BusinessId","Features"])
test_features = []
status = Progbar(len(test_pics))
for idx, pic_data in enumerate(test_pics):
    Xin = np.array(pic_data[1], dtype=np.float32)
    Xout = ext_net(Xin)
     
    #Store the features of each pic
    test_features.append([pic_data[0], Xout]) #pics_features[0-1996][0-1]

    #Get the mean feature for all the pics that belong to a business
    business_feature = get_biz_feature(Xout)
    test_data.loc[idx] = [df.ix[idx,"BusinessId"], business_feature]
    status.add(1)



In [18]:
#Save business features for test data - RGB - 2nd Dense layer - mean features = 10000X4096 size
with open("F:\sharath\yelpdata\\test_data_5.csv", "w") as f:
    test_data.to_csv(f, index = False)
f.close()

In [20]:
#Get max feature from pics feature = biz featute (instead of mean)
with open("F:\sharath\yelpdata\\train_pics.pkl", "rb") as f:
    train_pics = pickle.load(f)
f.close()

#
df = pd.read_csv("F:\sharath\yelpdata\\train_data.csv")
train_data = pd.DataFrame(columns=["BusinessId","Target","Features"])
train_features = []
status = Progbar(len(train_pics))
for idx, pic_data in enumerate(train_pics):
    Xin = np.array(pic_data[1], dtype=np.float32)
    
    #Get the mean feature for all the pics that belong to a business
    business_feature = get_biz_feature(Xin)
    train_data.loc[idx] = [df.ix[idx,"BusinessId"], df.ix[idx,"Target"], business_feature]
    status.add(1)

#Save business features - RGB - 2nd Dense layer - mean features = 1996X4096 size
with open("F:\sharath\yelpdata\\train_data_6.csv", "w") as f:
    train_data.to_csv(f, index = False)
f.close()



In [28]:
df = pd.read_csv("F:\sharath\yelpdata\\test_data.csv")
test_data = pd.DataFrame(columns=["BusinessId","Features"])
test_features = []
status = Progbar(len(test_pics))
for idx, pic_data in enumerate(test_pics):
    Xin = np.array(pic_data[1], dtype=np.float32)

    #Get the mean feature for all the pics that belong to a business
    business_feature = get_biz_feature(Xin)
    test_data.loc[idx] = [df.ix[idx,"BusinessId"], business_feature]
    status.add(1)
#Save business features for test data - RGB - 2nd Dense layer - mean features = 10000X4096 size
with open("F:\sharath\yelpdata\\test_data_6.csv", "w") as f:
    test_data.to_csv(f, index = False)
f.close()



In [143]:
# I will save the pickle files for 34th layer pic wise features
#train #Save pics wise features
with open("F:\sharath\yelpdata\\train_pics_2.pkl", "wb") as f:
    pickle.dump(train_features, f, protocol = pickle.HIGHEST_PROTOCOL)
f.close()
print("features for train photos saved to %s" %("F:\sharath\yelpdata\\train_pics_2.pkl"))

#test #Save pics wise features
with open("F:\sharath\yelpdata\\test_pics_2.pkl", "wb") as f:
    pickle.dump(test_features, f, protocol = pickle.HIGHEST_PROTOCOL)
f.close()
print("features for test photos saved to %s" %("F:\sharath\yelpdata\\test_pics_2.pkl"))

features for train photos saved to F:\sharath\yelpdata\train_pics_2.pkl
features for test photos saved to F:\sharath\yelpdata\test_pics_2.pkl


In [144]:
#----------------------- END -----------------------#
del test_features, train_features

In [None]:
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()
mlb.fit_transform(a)

In [None]:
weight_dic = {}
n=0
for layer in model.layers:
    w = layer.get_weights()
    weight_dic[n] = w
    n+=1


In [None]:
len(model.layers[33].get_weights())

In [None]:
model = VGG_16('vgg16_weights.h5')
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss='categorical_crossentropy')

#Now get the layers of interest from VGG net
#We are interested in using the layers of net before the dense layer
dense2 = model.layers[34].get_output(train = False)
dense1 = model.layers[33].get_input(train = False)

ext_net = theano.function([dense1], dense2)

In [None]:
f = h5py.File("new_weights.hdf5", "w")
f.create_dataset("dataset_1", data = new_weights, dtype='float32')
f.close()

In [None]:
dense = model.layers[32].get_output(train=False)

In [None]:
input_layer = model.get_input(train = False)

In [None]:
dense_f = theano.function([input_layer], dense)

In [None]:
im=cv2.imread("train_photos/2.jpg")

In [None]:
%matplotlib inline
plt.imshow(im)

In [None]:
model.layers[32]

In [None]:
pred = dense_f(im)

In [None]:
im.shape

In [None]:
def to_bool(s):
    series = pd.Series([1 if str(i) in str(s).split(" ") else 0 for i in range(9)])
    return series
y = train["labels"].apply(to_bool)

In [None]:
y.mean()
#predict 5,6,8 for all
sub["labels"] = "5 6 8"
sub.to_csv("naive.csv", index=False)

In [25]:
a=np.array(([1,2,3], [2,3,4]), dtype=np.int32)

In [27]:
a.max(axis=0)

array([2, 3, 4])