In [None]:
# import modules

import numpy as np 
import pandas as pd
import os
import csv
import matplotlib.pyplot as plt
from PIL import Image
import sys
import keras
from keras.preprocessing import image_dataset_from_directory
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from keras.utils import to_categorical, Sequence
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Input
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

#os.chdir("../input/uos-com2028")

In [None]:
# initialise constants for the code

batch_size = 32
image_dimensions = 224
split = 0.0
seed = 42

In [None]:
# image generator function

data_gen = ImageDataGenerator(
    rotation_range=45,
    shear_range=0.2,
    zoom_range=0.2,
    fill_mode="nearest",
    horizontal_flip=True,
    brightness_range=(0.2,1.5),
    rescale=1./255,
    validation_split=split
)

In [None]:
# to get flow_from_dataframe to work I had to upload my own train.txt file with 0's in front of all classes which were single digits
# for example: 'train/0.jpg 5' would become 'train/0.jpg 05' so that all classes had the same length

traindf=pd.read_csv("../input/trainbutterflyzero/train_zero.csv",dtype=str)

In [None]:
# flow_from_dataframe for the training data

train_generator=data_gen.flow_from_dataframe(
    dataframe=traindf,
    directory="../input/uos-com2028/train/",
    x_col="id",
    y_col="label",
    subset="training",
    batch_size=batch_size,
    seed=seed,
    class_mode="categorical",
    target_size=(image_dimensions,image_dimensions)
)

# verify that the classes were created correctly

print(train_generator.class_indices)

In [None]:
# flow_from_dataframe for the validation data

valid_generator=data_gen.flow_from_dataframe(
    dataframe=traindf,
    directory="../input/uos-com2028/train/",
    x_col="id",
    y_col="label",
    subset="validation",
    batch_size=batch_size,
    seed=seed,
    class_mode="categorical",
    target_size=(image_dimensions,image_dimensions)
)

# verify that the classes were created correctly

print(valid_generator.class_indices)

In [None]:
# import vgg19 imagenet model then replace output layers with flatten and dense(23) layter

base_model = VGG19( input_shape=(image_dimensions, image_dimensions, 3), weights='imagenet', pooling="avg", include_top=False )
flatten = Flatten()(base_model.layers[-1].output)
output = Dense(23, activation='softmax')(flatten)

model = keras.Model(inputs=base_model.inputs, outputs=output)

model.summary()

In [None]:
# sgd optimizer

opt = keras.optimizers.SGD()

# callback parameters

early_stop = EarlyStopping(monitor='loss', mode='min', patience=8)
checkpoint = ModelCheckpoint("/kaggle/working/model_checkpointed", monitor='accuracy', verbose=1, save_best_only=True, mode='max')

model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

model_training = model.fit_generator(
    generator=train_generator,
    validation_data=valid_generator,
    epochs=50,
    callbacks=[early_stop, checkpoint]
)

In [None]:
# save model after so it can be used to generate a confusion matrix later

model.save('model_saved')

In [None]:
# predict each image and add result to array

files = os.listdir("../input/uos-com2028/test/test")

ids = []
pred = []
        
for file in files:
    ids.append(file.split(".jpg")[0])
    image = img_to_array(load_img("/kaggle/input/uos-com2028/test/test/"+file, color_mode='rgb', target_size=(image_dimensions, image_dimensions), interpolation='nearest'))
    pred.append(np.argmax(model.predict(image.reshape(-1, image_dimensions, image_dimensions, 3)), axis=1))

In [None]:
# output results to a csv file

with open('attempt_x.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["id", "label"])
    for x in range(len(ids)):
        writer.writerow([ids[x], pred[x][0]])