# Dataset Creation

In [None]:
import random
import numpy as np
import tensorflow

In [None]:
np.random.seed(123)
random.seed(123)
tensorflow.random.set_seed(123)

# Load Data

In [None]:
%run DataPreparation.ipynb

# 224 X 224 for VGG16 model,
# in the future match our CNN
width = 100
scale = 0.75
height = round(width*scale)
data = get_datasets(width=width, height=height, which='categories')

X_categories = data['X_categories']
y_categories = data['y_categories']

print(len(X_categories), 'Category Pictures')

In [None]:
# subside warnings from keras
# (these are tensorflow warnings because of the way keras uses tensorflow,
# not warings from our implementation)
import warnings
warnings.filterwarnings('ignore')

# Build Convolutional Neural Network for Object Categories Data

( Right now only on 100 train and 100 test )

In [None]:
import keras
from keras import layers
from keras import models
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import img_to_array, load_img

In [None]:
test_prop = 0.3
test_split = round(test_prop*len(X_categories))
num_classes = len(set(y_categories))
input_shape = X_categories[0].shape
    
# --- Shuffle Data -----------------------------------
idxs = np.array(range(len(X_categories)))
np.random.shuffle(idxs)
X_categories = X_categories[idxs]
y_categories = y_categories[idxs]

# --- Data Manipulation -----------------------------------
distinct_categories = sorted(list(set(y_categories)))
y_categories_cat = [
    distinct_categories.index(y) for y in y_categories
]
y_categories_cat = keras.utils.to_categorical(
    y_categories_cat, num_classes
)

# --- Train Test Split -----------------------------------
X_categories_test = X_categories[:test_split]
y_categories_test = y_categories_cat[:test_split]

X_categories_train = X_categories[test_split:]
y_categories_train = y_categories_cat[test_split:]

In [None]:
def fit_model(params):
    try:
        np.random.seed(123)
        random.seed(123)
        tensorflow.random.set_seed(123)

        # --- Constants -----------------------------------
        input_shape = X_categories[0].shape
        num_classes = len(set(y_categories))
        batch_size = params['batch_size']
        epochs = params['epochs']

        # size of window
        kernel_size = params['kernel_size']

        # factor of reduction
        pool_size = params['pool_size']
        strides = params['strides']

        learning_rate = params['learning_rate']

        if 'momentum' in params:
            momentum = params['momentum']
        else:
            momentum = 0

        # --- Model Architecture -----------------------------------
        model = models.Sequential()
        model.add(layers.Conv2D(
            64, kernel_size=kernel_size,
            activation='relu',
            input_shape=input_shape,
            strides = strides
        ))
        model.add(layers.Conv2D(
            64, kernel_size=kernel_size,
            activation='relu',
            strides = strides
        ))
        model.add(layers.MaxPooling2D(pool_size=pool_size))
        model.add(layers.Conv2D(
            32, kernel_size=kernel_size,
            activation='relu',
            strides = strides
        ))
        model.add(layers.MaxPooling2D(pool_size=pool_size))
        model.add(layers.Flatten())
        model.add(layers.Dense(num_classes, activation='softmax'))

        model.compile(loss=keras.losses.categorical_crossentropy,
                      optimizer=keras.optimizers.SGD(lr=learning_rate, momentum=momentum, nesterov=False),
        #               optimizer=keras.optimizers.Adam(lr=0.0001),
                      metrics=['categorical_accuracy'])

        # --- Fit Model -----------------------------------
        model.fit(
            X_categories_train, y_categories_train,
            batch_size=batch_size,
            epochs=epochs,
            verbose=1,
            validation_data = (X_categories_test, y_categories_test)
        )
        score = model.evaluate(
            X_categories_test, y_categories_test, 
            verbose=0
        )
        print("Score:", score)
        return model
    except:
        return model

### Example model: 0.113 train accuracy

In [None]:
params1 = {
    'batch_size': 100,
    'learning_rate': 0.0001,
    'epochs': 5,
    'kernel_size': (3,3),
    'pool_size': (2,2),
    'strides': (1,1)
}

model1 = fit_model(params1)

In [None]:
import pickle
with open("models/models_0.1126.pickle", "wb") as f:
    pickle.dump(model1, f)

### Example model 2: 35.05% accuracy

In [None]:
params3 = {
    'batch_size': 50,
    'learning_rate': 0.0003,
    'epochs': 14,
    'kernel_size': (3,3),
    'pool_size': (2,2),
    'strides': (1,1),
    'momentum': 0.006
}

model3 = fit_model(params3)

### Grid search

In [None]:
batch_sizes = [50,100,150]
learning_rates = [0.00007,0.0001,0.0002,0.0003]
momentums = [0.006,0.01,0.05]
my_lists = []
for batch_size in batch_sizes:
    for learning_rate in learning_rates:
        for momentum in momentums:
            np.random.seed(123)
            random.seed(123)
            tensorflow.random.set_seed(123)
            params2 = {
                'batch_size': batch_size,
                'learning_rate': learning_rate,
                'epochs': 15,
                'kernel_size': (3,3),
                'pool_size': (2,2),
                'strides': (1,1),
                'momentum': momentum
            }
            x1,x2 = fit_model(params2)
            my_lists.append([batch_size, learning_rate, momentum,x1,x2])

In [None]:
# stopped after 14 epochs
import pickle
with open("models/models_MARK.pickle", "wb") as f:
    pickle.dump(model_final, f)

In [None]:
with open("models/models_0.2383.pickle", "wb") as f:
    pickle.dump(model2, f)