# Hydromedusa Solmaris
*By Jan Jetze Beitler & Sinan Ersin*

## 0. Imports

In [None]:
from model import create_model
from file_handler import load_images
from functions import cross_validation
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
from keras import optimizers
import random
import numpy as np
import csv

## 1. Globals

In [None]:
# set data directories
root = '../Data/'
train_img_dir = 'train_images/'
train_lbl_fle = 'train_onelabel.csv'
test_img_dir = 'test_images/'
submission_csv = 'output.csv'

# if load_size is None, all images will be loaded
load_size = None
img_size = 64
batch_size = 80
num_epochs = 200
num_folds = 3 # used for cross validation

# if float, data will be splitted with float being percentage of test data
split_test_train = 0

# parameters for cross validation. See Markdown below for instructions
parameters = {}


#### Parameter tuning
Fill parameters-dict to run cross validations over parameters.

- *key* is parameter to tune.
- *value* is list of inputs for parameter.

Multiple key-value pairs possible.

## 2. Loading data

In [None]:
# load training data
Y, X = load_images(root + train_img_dir, 
                   labels_file= root + train_lbl_fle,
                   num=load_size, shape=img_size)

input_shape = X[0].shape

# split data if specified
if split_test_train:
    sample = random.sample(range(len(Y)), int(len(X) * split_test_train))
    y_test = Y[sample]
    y_train = np.delete(Y, sample, axis=0)
    x_test = X[sample]
    x_train = np.delete(X, sample, axis=0)
    y_test = np_utils.to_categorical(y_test, 121)
    y_train = np_utils.to_categorical(y_train, 121)

## 3. Model

In [None]:
# image generator for altering images to capture more possibilities of plankton
datagen = ImageDataGenerator(
    data_format='channels_first',
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip= True)

### 3. 1 Normal run

In [None]:
model = create_model(input_shape=input_shape)

if split_test_train:
    hist = model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
                                            steps_per_epoch=len(x_train) / batch_size, epochs=num_epochs)
    score = model.evaluate(x_test, y_test, verbose=1)
    
else:
    temp_x = X
    temp_y = np_utils.to_categorical(Y, 121)
    print(temp_y.shape)
        
    hist = model.fit_generator(datagen.flow(temp_x, temp_y, batch_size=batch_size),
                                            steps_per_epoch=len(temp_y) / batch_size, epochs=num_epochs)

### 3.2 Tests

In [None]:
outcome = cross_validation(parameters, X, Y, folds=num_folds, epochs=num_epochs, datagen=datagen)

### 3.3 Submission

In [None]:
submit_image_names, submit_images = load_images(root + test_img_dir, shape=img_size)

predicted_labels = model.predict(submit_images, verbose=1)
predicted_labels = predicted_labels.argmax(axis=1).reshape(len(predicted_labels), 1)
predictions = np.concatenate((submit_image_names, predicted_labels), axis=1)

with open(submission_csv, 'w') as outpt:
    writer = csv.writer(outpt)
    writer.writerow(['image', 'class'])
    np.apply_along_axis(writer.writerow, 1, predictions)