In [1]:
import shutil
import os
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mimg


from sklearn.model_selection import train_test_split

import cv2
import imgaug as aug
import imgaug.augmenters as iaa

from tensorflow.keras.utils import to_categorical
from tensorflow.keras import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Input, Dense, Flatten
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.callbacks import ModelCheckpoint, Callback, EarlyStopping

In /Users/demetri/opt/miniconda3/envs/hackshulich/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The text.latex.preview rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In /Users/demetri/opt/miniconda3/envs/hackshulich/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The mathtext.fallback_to_cm rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In /Users/demetri/opt/miniconda3/envs/hackshulich/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: Support for setting the 'mathtext.fallback_to_cm' rcParam is deprecated since 3.3 and will be removed two minor releases later; use 'mathtext.fallback : 'cm' instead.
In /Users/demetri/opt/miniconda3/envs/hackshulich/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The validate_bool_maybe_none function was deprecated in Matplotlib 3.3 and will b

In [2]:
# First thing we need to do is to extract the data from the zip folder.
# If this has not already been done, then do it.
if not (os.path.exists('TestImages') and os.path.exists("TrainImages")):
    shutil.unpack_archive('sacm2021.zip', '.')

In [3]:
# I need to extract the training images and associate them to a label
# Because the `train.csv` file has ids and classifications, I need to grab image ids from their file name.


samples_path = Path('TrainImages/TrainImages/')
samples_images = list(samples_path.glob('*.png'))

samples_ids = list(
    map(
        lambda x: int(str(x).replace(".png",'').replace(str(samples_path), '').replace("/", '')), 
        samples_images
    )
)

y = pd.read_csv('train.csv')
samples_frame = pd.DataFrame({'img': samples_images, "id": samples_ids}).merge(y)

train, val = train_test_split(samples_frame, train_size=0.6, random_state=0)

In [4]:
# Will corrupt the images with the following:

augmentations = [
    iaa.Fliplr(), # horizontal flips
    iaa.Affine(rotate=20), # roatation
    iaa.Multiply((1.2, 1.5)),
    iaa.GaussianBlur(sigma=(0, 3)),
    iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.005*255)),
    iaa.Invert(1.0, min_value=0, max_value=255)
    ]
augmenting_squence = iaa.SomeOf((0,3), augmentations) #random brightness

In [5]:
def data_gen(data, batch_size):
    # Get total number of samples in the data
    n = len(data)
    steps = n//batch_size
    
    # Define two numpy arrays for containing batch data and labels
    batch_data = np.zeros((batch_size, 224, 224, 3), dtype=np.float32)
    batch_labels = np.zeros((batch_size,4), dtype=np.float32)

    # Get a numpy array of all the indices of the input data
    indices = np.arange(n)
    
    # Initialize a counter
    i =0
    while True:
        np.random.shuffle(indices)
        # Get the next batch 
        count = 0
        next_batch = indices[(i*batch_size):(i+1)*batch_size]
        for j, idx in enumerate(next_batch):
            img_name = data.iloc[idx]['img']
            label = data.iloc[idx]['classification']
            
            # one hot encoding
            encoded_label = to_categorical(label, num_classes=4)
            # read the image and resize
            img = cv2.imread(str(img_name))
            img = cv2.resize(img, (224,224))
            
            # check if it's grayscale
            if img.shape[2]==1:
                img = np.dstack([img, img, img])
            
            # cv2 reads in BGR mode by default
            orig_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            # normalize the image pixels
            orig_img = img.astype(np.float32)
            
            batch_data[count] = augmenting_squence.augment_image(orig_img)/255
            batch_labels[count] = encoded_label
            
            
            
            count+=1
            
            
        i+=1
        yield batch_data, batch_labels
            
        if i>=steps:
            i=0



In [6]:
# Create validation data

valid_data = []
valid_labels = []

for row in val.to_dict(orient = 'records'):
    img = cv2.imread(str(row['img']))
    img = cv2.resize(img, (224,224))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype(np.float32)/255.
    label = to_categorical(row['classification'], num_classes=4)
    valid_data.append(img)
    valid_labels.append(label)
    
valid_data = np.array(valid_data)
valid_labels = np.array(valid_labels)

In [7]:
def build_model():
    input_img = Input(shape=(224,224,3), name='ImageInput')
    x = Conv2D(64, (3,3), activation='relu')(input_img)
    x = MaxPooling2D((2,2))(x)
    x = Conv2D(32, (3,3), activation='relu')(x)
    x = MaxPooling2D((2,2))(x)
    
    
    x = Flatten(name='flatten')(x)
    x = Dense(128, activation='relu')(x)
    x = Dense(128, activation='relu')(x)
    x = Dense(128, activation='relu')(x)
    x = Dense(4, activation='softmax')(x)
    
    model = Model(inputs=input_img, outputs=x)
    return model

model = build_model()
opt = Adam(lr=0.0001, decay=1e-5)
model.compile(loss='categorical_crossentropy', metrics=['accuracy'],optimizer=opt)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [8]:
batch_size = 16
nb_epochs = 50

# Get a train data generator
train_data_gen = data_gen(data=train, batch_size=batch_size)

# Define the number of training steps
nb_train_steps = train.shape[0]//batch_size


# # Fit the model
history = model.fit_generator(train_data_gen, 
                              epochs=nb_epochs, 
                              steps_per_epoch=nb_train_steps,
                              validation_data=(valid_data, valid_labels))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [18]:

test_samples_path = Path('TestImages/TestImages/')
test_samples_images = list(test_samples_path.glob('*.png'))

test_samples_ids = list(
    map(
        lambda x: str(x).replace(".png",'').replace(str(test_samples_path), '').replace("/", ''), 
        test_samples_images
    )
)

test_samples_frame = pd.DataFrame({'img': test_samples_images, "id": test_samples_ids})

In [26]:
lbl = []
for img in test_samples_images:
    img = cv2.imread(str(img))
    img = cv2.resize(img, (224,224))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype(np.float32)/255.
    
    pred = model.predict(img.reshape(1, 224,224,3)).argmax()
    
    lbl.append(pred)

In [25]:
model.predict(img.reshape(1, 224,224,3)).argmax()

2

In [27]:
test_samples_frame['classification'] = lbl
test_samples_frame.loc[:, ['id','classification']].to_csv('sub.csv', index = False)