In [1]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
#from imgaug import augmenters as iaa      --> used for Gaussian Blur that is not working right now.
import pandas as pd
from keras.models import Sequential 
from keras.layers import Conv2D, MaxPooling2D 
from keras.layers import Activation, Dropout, Flatten, Dense 
import os

## IMPORTANT: Change the values of num_train and num_validate below to actual values you used 

In [20]:
batch_size = 32
train_directory = '/Users/louissmidt/ELEC 301 Final Project/train_posters'
val_directory = '/Users/louissmidt/ELEC 301 Final Project/validation_posters'
num_train = 2894
num_validate = 200 
nv = num_validate // batch_size
nt = num_train // batch_size
epochs = 5

#### Guassian Blur is using ImgAug library, it does not work right now. Skip this cell

In [21]:
#def gaussian_blur(img):
#    """
#    Gaussian Blur of varying intensity applied to 30% of augmented images .
#    Called after resize and augmentation. Input Rank = 3, Output Rank = 3, 
#   imgaug requires Rank=4
#    """
#    aug = iaa.Sometimes(0.3, iaa.GaussianBlur(sigma=(0.1, 0.3)))
#    return aug.augment_images([img])



___
### Define the CNN model

In [22]:
#IMG dimensions: 182 * 268 * 3
model = Sequential()  
model.add(Conv2D(32, (3,3), input_shape=(268, 182, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D((2,2)))
          
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
                    
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.4))
model.add(Dense(4))
model.add(Activation("softmax"))

model.compile(loss="categorical_crossentropy",
             optimizer="sgd",
             metrics=["accuracy"])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_13 (Conv2D)           (None, 266, 180, 32)      896       
_________________________________________________________________
activation_21 (Activation)   (None, 266, 180, 32)      0         
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 133, 90, 32)       0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 131, 88, 32)       9248      
_________________________________________________________________
activation_22 (Activation)   (None, 131, 88, 32)       0         
_________________________________________________________________
max_pooling2d_14 (MaxPooling (None, 65, 44, 32)        0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 63, 42, 64)        18496     
__________

### Define data generators that feed mini batches into the model for training and validation. The train_datagen uses Keras data augmentation

In [23]:
train_datagen = ImageDataGenerator(
    #preprocessing_function=gaussian_blur(),
    rotation_range=15,
    #brightness_range=[0, 0.2],
    #shear_range=0.2,
    zoom_range=0.3,
    featurewise_center=False,
    zca_whitening=False,
    #vertical_flip=True,
    horizontal_flip=False,
    fill_mode="nearest",
    rescale=1./255)
    #validation_split=0.2)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(train_directory,
        target_size=(268, 182),  
        batch_size=batch_size,
        class_mode='categorical')
        #subset="training") 

# this is a similar generator, for validation data
validation_generator = test_datagen.flow_from_directory(
        val_directory,
        target_size=(268, 182),
        batch_size=batch_size,
        class_mode='categorical')

Found 2894 images belonging to 4 classes.
Found 200 images belonging to 4 classes.


### Fit the model to the Generators --> it'll print accuracy against the validation set at the end of each epoch

In [24]:
model.fit_generator(train_generator, verbose=1,
                   steps_per_epoch= nt,
                   epochs=epochs,
                   validation_data=validation_generator,
                   validation_steps= nv)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1c52974128>

In [176]:
model.save_weights('model1.h5') 

****
### Next Cells fit the data from a numpy matrix manually if the generator based method does not work. Bypasses data augmentation. 
#### Only run if you want the training data as a matrix in memory


In [194]:
## Import images and labels for training, separate into train and validate (first 200)

meta = pd.read_csv("train_data.csv", index_col=0)
trainLab = meta["Genre"][:2894]
valLab = meta["Genre"][2895:3094]

train_data = np.zeros((2894, 268, 182, 3)) # rgb channels last
val_data = np.zeros((200, 268, 182, 3))

#populate train_data
tInd = 0
for filename in os.listdir(train_directory):
    if filename.endswith(".jpg"):
        imgDir = train_dir + "/" + str(filename)
    else:
        continue
    
    img = load_img(imgDir)
    imgX = img_to_array(img)
    train_data[tInd] = imgX
    tInd += 1

#populate train_data
vInd = 0
for filename in os.listdir(val_dir):
    if filename.endswith(".jpg"):
        imgDir = val_dir + "/" + str(filename)
    else:
        continue
    
    img = load_img(imgDir)
    imgX = img_to_array(img)
    val_data[vInd] = imgX
    vInd += 1  

In [3]:
# import images for testing

dir2 = '/Users/louissmidt/ELEC 301 Final Project/test_posters'

test_data = np.zeros((344, 268, 182, 3)) # rgb channels last

ind2 = 0
for filename in os.listdir(dir2):
    if filename.endswith(".jpg"):
        imgDir = dir2 + "/" + str(filename)
    else:
        continue
    
    img = load_img(imgDir)
    imgX = img_to_array(img)
    test_data[ind2] = imgX
    ind2 += 1

#### Manual fit, not from Generator (Not working)

In [None]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_y_1 = LabelEncoder()
y = labelencoder_y_1.fit_transform(trainLab)

model.fit(x=train_data, y=trainLab, batch_size=batch_size, epochs=10, verbose=2, validation_split=0.1)