In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import skimage
from skimage import transform as tf
from skimage import measure, feature, color, filters, draw, segmentation, morphology, exposure, transform
from scipy import misc
from sklearn.cross_validation import train_test_split
from keras.layers.core import Dense, Activation, Merge, Flatten, Dropout
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.models import Sequential
import keras
from PIL import Image
import glob
import random, os
from sklearn.metrics.pairwise import cosine_similarity
import theano
from skimage.filters.rank import median
from skimage.morphology import disk
%matplotlib inline
theano.config.floatX = 'float32'
theano.config.device = 'gpu'

In [None]:
# Reading in test images into Python
def random_filename_generator(path):
    return random.choice([x for x in os.listdir(path) if os.path.isfile(os.path.join(path, x))])

def generate_images(max_images=5):
    image_list, target_list = [] , []
    ext_list=['ALB','BET','DOL','LAG','NoF','OTHER','SHARK','YFT']
    for i in range(8):
        path='/Users/thomas/Data Science Projects/Fish Identification/jpgs/train/'+ext_list[i]+'/*.jpg'
        target=[0,0,0,0,0,0,0,0]
        target[i]=1
        j=0
        file_names=[]
        for filename in glob.glob(path):
            file_names.append(filename)
        while j<max_images and 0<len(file_names):
            curr_file=random.choice(file_names)
            file_names.remove(curr_file)
            for filename in glob.glob(curr_file):
                im=misc.imread(filename)
                im=tf.resize(im,(360,640),4)
                im=color.rgb2gray(im)
                image_list.append(im)
                target_list.append(target)
                j+=1
    return image_list,target_list


In [None]:
conv_layers=[24, 24, 48] # Changed from 20, 40 to 24, 48
dense_layers=[1024,512]

model=Sequential()
model.add(Convolution2D(24,3,3, activation='relu', input_shape=(1,360,640), dim_ordering="th"))
model.add(MaxPooling2D(dim_ordering="th"))
for layer in conv_layers:
    model.add(Convolution2D(layer, 3, 3, activation='relu', dim_ordering="th"))
    model.add(MaxPooling2D(dim_ordering="tf"))
model.add(Flatten())
for dl in dense_layers:
    model.add(Dense(dl, activation='relu'))
    model.add(Dropout(0.3))
model.add(Dense(8, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer="adamax",metrics=['accuracy'])


In [None]:
X,Y=generate_images(45)
X=np.reshape(X,(len(X), 1, 360, 640))
x_train_0,x_test_0,y_train,y_test=train_test_split(X,Y,test_size=.15)

In [None]:
# # Making filtered images to extend training set

# x_train_f_1, x_test_f_1= [], []
# x_train_f_2, x_test_f_2= [], []
# for image in x_train_0:
#     x_train_f_1.append(filters.gaussian(image, sigma=.3))
# #     x_train_f_2.append(median(image, disk(.8)))
# for image in x_test_0:
#     x_test_f_1.append(filters.gaussian(image, sigma=.3))
# #     x_test_f_2.append(median(image, disk(.8)))
# x_train_0=list(x_train_0)
# x_test_0=list(x_test_0)
# x_train_0.extend(x_train_f_1)
# # x_train_0.extend(x_train_f_2)
# x_test_0.extend(x_test_f_1)
# # x_test_0.extend(x_test_f_2)

# x_train_f_1, x_test_f_1= [], []
# # x_train_f_2, x_test_f_2= [], []

# y_train=y_train*2
# y_test=y_test*2

# x_train_0=np.reshape(x_train_0,(len(x_train_0), 1, 360, 640))
# x_test_0=np.reshape(x_test_0,(len(x_test_0), 1, 360, 640))

In [None]:
model.fit(np.array(x_train_0),y_train,batch_size=21,nb_epoch=10,shuffle=True,verbose=1,\
                          validation_data=(np.array(x_test_0),y_test))

In [None]:
for i in range(5):
    X,Y=generate_images(45)
    X=np.reshape(X,(len(X), 1, 360, 640))
    x_train_0,x_test_0,y_train,y_test=train_test_split(X,Y,test_size=.15)
    model.fit(np.array(x_train_0),y_train,batch_size=54,nb_epoch=3,shuffle=True,verbose=1,\
                          validation_data=(np.array(x_test_0),y_test))

### Testing for Probabilities

In [None]:
def generate_testing_images():
    path='/Users/thomas/Data Science Projects/Fish Identification/jpgs/test_stg1/*.jpg'
    testing_image_list, testing_image_name= [], []
    for filename in glob.glob(path):
        im=misc.imread(filename)
        im=tf.resize(im,(360,640),4)
        im=color.rgb2gray(im)
        testing_image_list.append(im)
        testing_image_name.append(filename)
    return testing_image_list,testing_image_name

In [None]:
testing_images,testing_files=generate_testing_images()
testing_images=np.reshape(testing_images,(1000, 1, 360, 640))

In [None]:
# With 60 Images Per Class
testing_results_proba=model.predict_proba(np.array(testing_images))

In [None]:
df=pd.DataFrame(testing_results_proba)
df['image']=testing_files
df['image']=df['image'].apply(lambda x: x.split('/')[-1])
for i in range(8):
    ext_list=['ALB','BET','DOL','LAG','NoF','OTHER','SHARK','YFT']
    df[ext_list[i]]=df[i]
    del df[i]
df.to_csv('/Users/thomas/Data Science Projects/Fish Identification/jpgs/test_answers.csv')