In [5]:
import numpy as np
import pandas as pd
import glob
import os
import re
import pickle
import matplotlib.pyplot as plt

import sklearn
from sklearn import cross_validation
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.svm import SVC, LinearSVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn import svm

import tensorflow as tf
import tensorflow.python.platform
from tensorflow.python.platform import gfile


model_dir = 'data/precomputed/svm/'
images_dir = 'data/train_full/'
list_images = glob.glob('{}/*/*g'.format(images_dir))

In [6]:
# setup tensorFlow graph initiation
def create_graph():
    with gfile.FastGFile(os.path.join(model_dir, 'inceptionv3.pb'), 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        _ = tf.import_graph_def(graph_def, name='')

In [7]:
# extract all features from pool layer of InceptionV3
def extract_trn_feat(list_images):
    nb_features = 2048
    features = np.empty((len(list_images), nb_features))
    labels = []
    create_graph()   
    with tf.Session() as sess:
        next_to_last_tensor = sess.graph.get_tensor_by_name('pool_3:0')
        
        for ind, image in enumerate(list_images):           
            if not gfile.Exists(image):
                tf.logging.fatal('File does not exist %s', image)
            image_data = gfile.FastGFile(image, 'rb').read()
            predictions = sess.run(next_to_last_tensor,
            {'DecodeJpeg/contents:0': image_data})
            features[ind,:] = np.squeeze(predictions)
            labels.append(image.split("/")[2])
            
        return features, labels

In [8]:
features, labels = extract_trn_feat(list_images)

pickle.dump(features, open('data/precomputed/svm/features', 'wb'))
pickle.dump(labels, open('data/precomputed/svm/labels', 'wb'))

features = pickle.load(open('data/precomputed/svm/features', 'rb'))
labels = pickle.load(open('data/precomputed/svm/labels', 'rb'))

In [9]:
clf = svm.SVC(kernel='linear', C=0.1, probability=True)

In [10]:
final_model = CalibratedClassifierCV(clf, cv=10, method='sigmoid')
final_model = clf.fit(features, labels)

In [13]:
test_dir = 'data/test/test_stg1/'
test_images = [test_dir+f for f in os.listdir(test_dir) if re.search('jpg|JPG', f)]

In [14]:
def extract_tst_feat(list_images):
    nb_features = 2048
    features = np.empty((len(list_images), nb_features))
    create_graph()
    with tf.Session() as sess:
        next_to_last_tensor = sess.graph.get_tensor_by_name('pool_3:0')
        
        for ind, image in enumerate(list_images):
            if not gfile.Exists(image):
                tf.logging.fatal('File does not exist %s', image)
            image_data = gfile.FastGFile(image, 'rb').read()
            predictions = sess.run(next_to_last_tensor,
            {'DecodeJpeg/contents:0': image_data})
            features[ind,:] = np.squeeze(predictions)
            
        return features

In [15]:
features_test = extract_tst_feat(test_images)
y_pred = final_model.predict_proba(features_test)

In [17]:
image_id = [i.split('/')[3] for i in test_images]

submit = open('submissions/svm/-LB_A.csv','w')
submit.write('image,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT\n')

for idx, id_n in enumerate(image_id):
    probs=['%s' % p for p in list(y_pred[idx, :])]
    submit.write('%s,%s\n' % (str(image_id[idx]),','.join(probs)))

submit.close()