In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing

import matplotlib.pyplot as plt
%matplotlib inline

from keras.models import Sequential
from keras.layers import Dense , Dropout , Lambda, Flatten, Conv2D, MaxPool2D
from keras.optimizers import SGD
from sklearn.model_selection import train_test_split

import cv2
import glob
import os
import pickle


#set global parameters
img_rows = 224
img_cols = 224
max_files = -1
read_from_cache = False


Using TensorFlow backend.


In [2]:
filelist = glob.glob('../input/train/*/*.*')
categories = np.unique([x.split('/')[3] for x in filelist])

In [3]:
categories

array(['classroompictures', 'diningpictures', 'entrancepictures',
       'exhibitionpictures', 'stairspictures'],
      dtype='<U18')

In [4]:
def read_image(path,img_rows,img_cols):
    img = cv2.imread(path)
    return cv2.resize(img, (img_cols, img_rows))

def read_train(img_rows,img_cols,max_files):
    
    # img_rows & img_cols set the size of the image in the output
    # max files is the maximal number of images to read from each category
    # use max_files=-1 to read all images within the train subfolders
    
    X_train = []
    y_train = []
    
    print('Read train images')
    for j,category in enumerate(categories):
        counter = 0
        print('Load folder {}'.format(category))
        path = os.path.join('..', 'input','train', category, '*.jpg')
        files = glob.glob(path)
        for fl in files:
            flbase = os.path.basename(fl)
            img = read_image(fl, img_rows, img_cols)
            X_train.append(np.asarray(img))
            y_train.append(j)
            counter+=1
            if (counter>=max_files)&(max_files>0):
                break
    
    return np.array(X_train), np.array(y_train)

def read_test(img_rows,img_cols):
    X_test = []
    ids = []
    print('Read test images')
    path = os.path.join('..', 'input','test', '*.jpg')
    files = glob.glob(path)
    for fl in files:
        flbase = os.path.basename(fl)
        img = read_image(fl, img_rows, img_cols)
        X_test.append(np.asarray(img))
        ids.append(fl.split('/')[-1])
    
    return np.array(ids), np.array(X_test)



In [5]:
def cache_data(data, path):
    # this is a helper function used to cache data once it was read and preprocessed
    if os.path.isdir(os.path.dirname(path)):
        file = open(path, 'wb')
        pickle.dump(data, file)
        file.close()
    else:
        print('Directory doesnt exists')

In [6]:
def restore_data(path):
    # this is a helper function used to restore cached data
    data = dict()
    if os.path.isfile(path):
        file = open(path, 'rb')
        data = pickle.load(file)
    return data

In [7]:
def save_model(model):
    # this is a helper function used to save a keras NN model architecture and weights
    json_string = model.to_json()
    if not os.path.isdir('cache'):
        os.mkdir('cache')
    open(os.path.join('cache', 'architecture.json'), 'w').write(json_string)
    model.save_weights(os.path.join('cache', 'model_weights.h5'), overwrite=True)

In [8]:
def read_model():
    # this is a helper function used to restore a keras NN model architecture and weights
    model = model_from_json(open(os.path.join('cache', 'architecture.json')).read())
    model.load_weights(os.path.join('cache', 'model_weights.h5'))
    return model

In [9]:
if not read_from_cache:
    X_train, y_train = read_train(img_rows,img_cols,max_files)
    cache_data(X_train,'../processed_input/X_train_{}X{}X3_{}_max_samples'.format(img_rows,img_cols,max_files))
    cache_data(y_train,'../processed_input/y_train_{}_max_samples'.format(max_files))
else:
    X_train = restore_data('../processed_input/X_train_{}X{}X3_{}_max_samples'.format(img_rows,img_cols,max_files))
    y_train = restore_data('../processed_input/y_train_{}_max_samples'.format(max_files))

Read train images
Load folder classroompictures
Load folder diningpictures
Load folder entrancepictures
Load folder exhibitionpictures
Load folder stairspictures


In [10]:
ids, X_test = read_test(img_rows=img_rows,img_cols=img_cols)

Read test images


In [11]:
pred = [[0.2,0.2,0.2,0.2,0.2],[0.2,0.2,0.2,0.2,0.2],[0.2,0.2,0.2,0.2,0.2],
       [0.2,0.2,0.2,0.2,0.2],[0.2,0.2,0.2,0.2,0.2]]# model predictions - put your model.predict(X_test) here
subm = pd.DataFrame(np.round(pred,decimals=5))
subm.columns = ['classroompictures', 'diningpictures', 'entrancepictures', 'exhibitionpictures', 'stairspictures']
subm['Id'] = ids
subm['index'] = subm.Id.apply(lambda x: int(x.split('.')[0]))
subm.sort_values(by=['index'],inplace=True,ascending=True)
subm.drop('index',axis=1,inplace=True)
subm

Unnamed: 0,classroompictures,diningpictures,entrancepictures,exhibitionpictures,stairspictures,Id
3,0.2,0.2,0.2,0.2,0.2,1.jpg
2,0.2,0.2,0.2,0.2,0.2,2.jpg
0,0.2,0.2,0.2,0.2,0.2,3.jpg
1,0.2,0.2,0.2,0.2,0.2,4.jpg
4,0.2,0.2,0.2,0.2,0.2,5.jpg


In [12]:
test_labels = pd.read_csv('../test_labels/test_labels.csv')


In [None]:
from sklearn.metrics import log_loss
log_loss(y_pred=subm.loc[:,'classroompictures':'stairspictures'],
         y_true=test_labels.loc[:,'classroompictures':'stairspictures'])

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
class_pred = subm.loc[:,'classroompictures':'stairspictures'].idxmax(axis = 1)
class_labels = test_labels.loc[:,'classroompictures':'stairspictures'].idxmax(axis = 1)
print(classification_report(y_pred=class_pred,
                            y_true=class_labels))

In [None]:
print(confusion_matrix(y_pred=class_pred,y_true=class_labels))