In [62]:
#%tensorflow_version 2.x
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, models, callbacks
  
print(tf.version)

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


# Helper libraries
import numpy as np
import pandas as pd
#import matplotlib.pyplot as plt
import scipy.io as sio
from sklearn.model_selection import train_test_split
from sklearn.utils import compute_class_weight

from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from pathlib import Path
import os.path
#import datetime, os

from keras.applications import VGG16, VGG19, ResNet50, InceptionV3, InceptionResNetV2, Xception, MobileNet, DenseNet121, \
    NASNetMobile, EfficientNetB0, MobileNetV2, MobileNetV3Large
from tensorflow.python.keras.layers import Conv2D, Flatten, Dropout, Dense, MaxPooling2D


<module 'tensorflow._api.v2.version' from '/home/jarrad_foley135/miniconda3/envs/mlis_cw_car/lib/python3.9/site-packages/tensorflow/_api/v2/version/__init__.py'>
Num GPUs Available:  1


In [63]:
batch_size = 16
img_size = [240, 320]

df = pd.read_csv(r'machine-learning-in-science-ii-2023/training_norm.csv')
df['filename'] = df["image_id"].astype(str) + ".png"
print(df)


x_train = df[0:int(len(df) * 0.6)]
x_validate = df[int(len(df) * 0.6):int(len(df) * 0.8)]
x_evaluate = df[int(len(df) * 0.8):]

# x_train = df[0:int(len(df) * 0.8)]
# x_validate = df[int(len(df) * 0.8):]


       image_id   angle  speed   filename
0             1  0.4375    0.0      1.png
1             2  0.8125    1.0      2.png
2             3  0.4375    1.0      3.png
3             4  0.6250    1.0      4.png
4             5  0.5000    0.0      5.png
...         ...     ...    ...        ...
13788     13794  0.6250    1.0  13794.png
13789     13795  0.4375    1.0  13795.png
13790     13796  0.5625    0.0  13796.png
13791     13797  0.6250    0.0  13797.png
13792     13798  0.6875    1.0  13798.png

[13793 rows x 4 columns]


In [64]:
training_datagen = ImageDataGenerator(
    rescale = 1./255,
    fill_mode='nearest')

train_generator = training_datagen.flow_from_dataframe(
    dataframe=x_train,
    directory="machine-learning-in-science-ii-2023/training_data/training_data",
    x_col="filename",
    y_col=["angle","speed"],
    target_size=img_size,
    batch_size=16,
    shuffle=False,
    class_mode='other')

print(train_generator)

validation_datagen = ImageDataGenerator(rescale = 1./255)

val_generator = validation_datagen.flow_from_dataframe(
    dataframe=x_validate,
    directory="machine-learning-in-science-ii-2023/training_data/training_data",
    x_col="filename",
    y_col=["angle","speed"],
    target_size=img_size,
    shuffle=False,
    class_mode='other')

evaluate_datagen = ImageDataGenerator(rescale = 1./255)

eval_generator = evaluate_datagen.flow_from_dataframe(
    dataframe=x_evaluate,
    directory="machine-learning-in-science-ii-2023/training_data/training_data",
    x_col="filename",
    y_col=["angle","speed"],
    target_size=img_size,
    shuffle=False,
    class_mode='other')


Found 8275 validated image filenames.
<keras.preprocessing.image.DataFrameIterator object at 0x7f5a206c5e20>
Found 2759 validated image filenames.
Found 2759 validated image filenames.


In [65]:
def create_CNN_model(input_shape, hidden_layers, pretrained_model=None, num_non_trainable_layers=1,
                     output_layer={'BC': [1, 'sigmoid', 'binary_crossentropy'],
                                   'MC': [17, 'softmax', 'categorical_crossentropy']},
                     init='normal', optimize='adam', metrics=['accuracy', 'mse']):
    if pretrained_model:
        if pretrained_model == 'vgg16':
            base_model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
        elif pretrained_model == 'vgg19':
            base_model = VGG19(weights='imagenet', include_top=False, input_shape=input_shape)
        elif pretrained_model == 'resnet50':
            base_model = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)
        elif pretrained_model == 'inceptionv3':
            base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=input_shape)
        elif pretrained_model == 'inceptionresnetv2':
            base_model = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=input_shape)
        elif pretrained_model == 'xception':
            base_model = Xception(weights='imagenet', include_top=False, input_shape=input_shape)
        elif pretrained_model == 'mobilenet':
            base_model = MobileNet(weights='imagenet', include_top=False, input_shape=input_shape)
        elif pretrained_model == 'densenet':
            base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=input_shape)
        elif pretrained_model == 'nasnet':
            base_model = NASNetMobile(weights='imagenet', include_top=False, input_shape=input_shape)
        elif pretrained_model == 'efficientnet':
            base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=input_shape)
        else:
            raise ValueError('Invalid pretrain_model parameter. Please select from vgg16, vgg19, resnet50, '
                             'inceptionv3, inceptionresnetv2, xception, mobilenet, densenet, nasnet, or efficientnet')

        # Set layers to be non-trainable
        for layer in base_model.layers[:-num_non_trainable_layers]:
            layer.trainable = False

        # create model
        model = Sequential()
        model.add(base_model)

    else:
        # create model
        model = Sequential()

    # print(hidden_layers)
    k = 3
    s = 1
    p = 2
    r = 0.2

    for i, layer in enumerate(hidden_layers):
        #         print(layer)
        #         print(type(layer))
        layer = str(layer)
        if i == 0:
            if '_' in layer:
                params = layer.split('_')
                for i, param in enumerate(params):
                    if i == 0:
                        filters = int("".join(filter(str.isdigit, param)))
                    #                     print(filters)
                    elif i == 1:
                        k = int("".join(filter(str.isdigit, param)))
                    #                     print(k)
                    elif i == 2:
                        s = int("".join(filter(str.isdigit, param)))
            #                     print(s)
            else:
                filters = int("".join(filter(str.isdigit, layer)))
            #               print(filters)

            model.add(Conv2D(filters, kernel_size=(k, k), strides=(s, s), activation='relu', input_shape=input_shape))

        elif 'C' in layer:
            if '_' in layer:
                params = layer.split('_')
                for i, param in enumerate(params):
                    if i == 0:
                        filters = int("".join(filter(str.isdigit, param)))
                    #                     print(filters)
                    elif i == 1:
                        k = int("".join(filter(str.isdigit, param)))
                    #                     print(k)
                    elif i == 2:
                        s = int("".join(filter(str.isdigit, param)))
            #                     print(s)
            else:
                filters = int("".join(filter(str.isdigit, layer)))
            #               print(filters)

            model.add(Conv2D(filters, (k, k), strides=(s, s), activation='relu'))


        elif 'MP' in layer:
            stride_flag = False
            if '_' in layer:
                params = layer.split('_')
                for i, param in enumerate(params):
                    if i == 1:
                        p = int("".join(filter(str.isdigit, param)))
                    #                     print(p)
                    elif i == 2:
                        s = int("".join(filter(str.isdigit, param)))
                        stride_flag = True
            #                     print(s)

            model.add(MaxPooling2D(pool_size=(p, p), strides=(s, s) if stride_flag else None))

        elif 'BN' in layer:

            model.add(BatchNormalization())


        elif 'F' in layer:

            model.add(Flatten())
        #             print('F')

        elif 'D' in layer:
            if '_' in layer:
                params = layer.split('_')
                for i, param in enumerate(params):
                    if i == 1:
                        r = int("".join(filter(str.isdigit, param)))

            model.add(Dropout(r))

        else:

            model.add(Dense(int(layer), kernel_initializer=init, activation='relu'))

    loss_functions = []
    for layer, params in output_layer.items():
        model.add(Dense(params[0], activation=params[1], kernel_initializer=init))
        loss_functions.append(params[2])

    model.compile(loss=loss_functions, optimizer=optimize, metrics=metrics)

    return model


In [66]:
shape = (*img_size, 3) # inherited image size with 3 color filters
input_shape = [240, 320, 3]
mbnet = MobileNetV2(
    input_shape=input_shape, 
    include_top=False, 
    weights='imagenet',
    classifier_activation="relu"
)

vgg = MobileNetV2(
    input_shape=input_shape, 
    include_top=False, 
    weights='imagenet',
    classifier_activation="relu"
)

for layer in vgg.layers[:-1]:
    layer.trainable = False

#vgg.trainable = False # freeze the first layers to the imagenet weights

# CNN base
model = models.Sequential()

# model.add(layers.Conv2D(16, (3, 3), activation='relu', input_shape=shape))
# model.add(layers.BatchNormalization())
# model.add(layers.MaxPooling2D((2, 2)))

model.add(vgg)
model.build()
        
model.add(layers.Conv2D(32, (3, 3), activation='relu'))
model.add(layers.BatchNormalization())
#model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.BatchNormalization())
# model.add(layers.MaxPooling2D((2, 2), padding='same'))

# model.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
# model.add(layers.BatchNormalization())
# model.add(layers.MaxPooling2D((2, 2), padding='same'))

# model.add(layers.Conv2D(256, (3, 3), activation='relu', padding='same'))
# model.add(layers.BatchNormalization())
# model.add(layers.MaxPooling2D((2, 2), padding='same'))

#model.add(layers.Dropout(0.2))
model.add(layers.Flatten()),

model.add(layers.Dense(256, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.2))

model.add(layers.Dense(128, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.2))

model.add(layers.Dense(64, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.2))

model.add(layers.Dense(32, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.2))

# model.add(layers.Dense(16, activation='relu'))
# model.add(layers.BatchNormalization())
# model.add(layers.Dropout(0.2))

# Output layer
model.add(layers.Flatten())
model.add(layers.Dense(2,  activation='relu', kernel_initializer='normal'))



In [67]:
model.build()


In [68]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 mobilenetv2_1.00_224 (Funct  (None, 8, 10, 1280)      2257984   
 ional)                                                          
                                                                 
 conv2d_10 (Conv2D)          (None, 6, 8, 32)          368672    
                                                                 
 batch_normalization_30 (Bat  (None, 6, 8, 32)         128       
 chNormalization)                                                
                                                                 
 conv2d_11 (Conv2D)          (None, 4, 6, 64)          18496     
                                                                 
 batch_normalization_31 (Bat  (None, 4, 6, 64)         256       
 chNormalization)                                                
                                                      

In [69]:
model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.001),
    loss='mean_squared_error'
)

es = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.0015,
    patience=20,
    verbose=1,
    mode='min',
    restore_best_weights=True
)

history = model.fit(
    train_generator, 
    batch_size=batch_size,
    validation_data=val_generator,
    callbacks=[es],
    epochs=100
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 45: early stopping


In [70]:
filename = os.listdir("machine-learning-in-science-ii-2023/test_data/test_data") 

df = pd.DataFrame(filename)
df.columns = ["filename"]

df[['file', 'type']] = df.filename.str.split(".", expand = True)
df["file"] = df["file"].astype(str).astype(int)

df.sort_values(by=['file'], inplace=True)
df.reset_index(drop=True, inplace=True)
print(df)


test_datagen = ImageDataGenerator(rescale = 1./255)

test_images = test_datagen.flow_from_dataframe(
    dataframe=df,
    directory="machine-learning-in-science-ii-2023/test_data/test_data",
    x_col="filename",
    target_size=img_size,
    shuffle=False,
    class_mode=None)

print(test_images)

prediction = model.predict(test_images)
print(prediction.shape)
import math

a= prediction[:,0]
b= prediction[:,1]
b+=0.5
b= np.floor(b)

submissiondata={"angle":a , "speed":b} 
df = pd.DataFrame(submissiondata)
df.index += 1 
df.to_csv('Submission.csv')


      filename  file type
0        1.png     1  png
1        2.png     2  png
2        3.png     3  png
3        4.png     4  png
4        5.png     5  png
...        ...   ...  ...
1015  1016.png  1016  png
1016  1017.png  1017  png
1017  1018.png  1018  png
1018  1019.png  1019  png
1019  1020.png  1020  png

[1020 rows x 3 columns]
Found 1020 validated image filenames.
<keras.preprocessing.image.DataFrameIterator object at 0x7f5ad414ffd0>
(1020, 2)


In [71]:
from sklearn.metrics import classification_report

test_loss = model.evaluate(
    eval_generator,
    verbose=1
)

test_datagen = ImageDataGenerator(rescale = 1./255)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=x_evaluate,
    directory="machine-learning-in-science-ii-2023/training_data/training_data",
    x_col="filename",
    target_size=img_size,
    shuffle=False,
    class_mode=None)


prediction = model.predict(test_generator, verbose =1)

a= prediction[:,0]
b= prediction[:,1]
b+=0.5
b= np.floor(b)

submissiondata={"angle":a , "speed":b} 
df = pd.DataFrame(submissiondata)
df.index += 1 
df.to_csv('evaluate.csv')



Found 2759 validated image filenames.


In [72]:
diff = abs(x_evaluate["angle"] - a)

results = pd.DataFrame({"Train":x_evaluate["angle"], "Predicted":a, "Difference":diff})
results

#results.to_csv('evaluate.csv')

Unnamed: 0,Train,Predicted,Difference
11034,0.6250,0.777702,0.152702
11035,0.6875,0.564685,0.122815
11036,0.6250,0.626850,0.001850
11037,0.5000,0.509193,0.009193
11038,0.6250,0.582860,0.042140
...,...,...,...
13788,0.6250,0.677906,0.052906
13789,0.4375,0.473269,0.035769
13790,0.5625,0.506670,0.055830
13791,0.6250,0.643749,0.018749


In [73]:
results = pd.DataFrame({"Train":x_evaluate["speed"],"Predicted":b})
results

#results.to_csv('evaluate.csv')

Unnamed: 0,Train,Predicted
11034,1.0,1.0
11035,1.0,1.0
11036,1.0,1.0
11037,0.0,0.0
11038,0.0,0.0
...,...,...
13788,1.0,1.0
13789,1.0,1.0
13790,0.0,0.0
13791,0.0,0.0


In [74]:
model.save("model.h5")