# **Necessary Imports**

In [None]:
import shutil
import os
import random

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
from tensorflow.keras.saving import load_model
from tensorflow.keras.models import Model
from tensorflow.keras.applications import RegNetX040
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D

# **Prepare Data**

In [None]:
%cd '/kaggle/input/ukraine-ml-bootcamp-2023'

In [None]:
data_table = pd.read_csv('train.csv')

In [None]:
def arrange_data(img_folder, tgt_folder, class_table):
    """
    arrange data into folders to feed it into ImageDataGenerator
    """
    for val in class_table['class_6'].unique():
        class_folder = os.path.join(tgt_folder, str(val))
        if not os.path.exists(class_folder):
            os.makedirs(class_folder)
    
    for index, (img_id, class_val) in class_table.iterrows():
        shutil.copy(os.path.join(img_folder, img_id), os.path.join(tgt_folder, str(class_val)))

In [None]:
os.makedirs('/kaggle/working/images')
os.makedirs('/kaggle/working/images/train')

In [None]:
arrange_data('./images/train_images', '/kaggle/working/images/train', data_table)

In [None]:
os.makedirs('/kaggle/working/images/validation')
for val in data_table['class_6'].unique():
    os.makedirs(os.path.join('/kaggle/working/images/validation', str(val)))

In [None]:
def train_test_split(train_folder, validation_folder, split_size=0.7):
    """
    Perform train-test splitting so that each class maintains its
    representation in both categories
    """
    for class_fldr in os.listdir(train_folder):
        folder = os.path.join(train_folder, class_fldr)
        num_imgs = len(os.listdir(folder))
        num_validation_imgs = num_imgs - int(split_size * num_imgs)
        mask = np.zeros((num_imgs,))
        mask[:num_validation_imgs] += 1
        random.shuffle(mask)
        for i, img in enumerate(os.listdir(folder)):
            if mask[i] == 1:
                shutil.move(os.path.join(folder, img),
                            os.path.join(validation_folder, class_fldr))

In [None]:
train_test_split(train_folder='/kaggle/working/images/train',
                 validation_folder='/kaggle/working/images/validation',
                 split_size=0.8)

In [None]:
# important constants
NUM_CLASSES = 6
EPOCHS = 10
BATCH_SIZE = 32

In [None]:
train_datagen = ImageDataGenerator(
    rotation_range=30,           # Random rotation up to 20 degrees
    width_shift_range=0.2,       # Random horizontal shift
    height_shift_range=0.2,      # Random vertical shift
    shear_range=0.2,             # Shear transformations
    zoom_range=0.3,              # Random zoom
    horizontal_flip=True,        # Random horizontal flip
    fill_mode='nearest'          # Fill strategy for new pixels
)
train_generator = train_datagen.flow_from_directory(
    '/kaggle/working/images/train',
    target_size=(400, 400),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

In [None]:
validation_datagen = ImageDataGenerator()
validation_generator = validation_datagen.flow_from_directory(
    '/kaggle/working/images/validation',
    target_size=(400, 400),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

# **Create the Model**

In [None]:
input_shape = (400, 400, 3)

base_model = RegNetX040(weights='imagenet', include_top=False, input_shape=input_shape)

x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(NUM_CLASSES, activation='softmax')(x)

model_regnet = Model(inputs=base_model.input, outputs=predictions)

model_regnet.summary()

# **Train the Model**

In [None]:
checkpoint_path = "/kaggle/working/model.best.keras"
checkpoint = tf.keras.callbacks.ModelCheckpoint(checkpoint_path,
                             monitor='val_accuracy',
                             verbose=1,
                             save_best_only=True,
                             save_weights_only=False,
                             mode='max')

In [None]:
model_regnet.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3, weight_decay=1e-2),
                           loss='categorical_crossentropy',
                           metrics=['accuracy'])
model_regnet.fit(train_generator, epochs=EPOCHS, validation_data=validation_generator, callbacks=[checkpoint])

In [None]:
model_regnet = load_model('/kaggle/working/model.best.keras')

In [None]:
model_regnet.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4, weight_decay=1e-2),
                           loss='categorical_crossentropy',
                           metrics=['accuracy'])
model_regnet.fit(train_generator, epochs=EPOCHS, validation_data=validation_generator, callbacks=[checkpoint])

In [None]:
model_regnet = load_model('/kaggle/working/model.best.keras')

In [None]:
model_regnet.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5, weight_decay=1e-2),
                           loss='categorical_crossentropy',
                           metrics=['accuracy'])
model_regnet.fit(train_generator, epochs=EPOCHS, validation_data=validation_generator, callbacks=[checkpoint])

In [None]:
model_regnet = load_model('/kaggle/working/model.best.keras')

# **Submit the Model**

In [None]:
submission = pd.read_csv('sample_submission.csv')

In [None]:
test_dir = './images/test_images'
image_paths = [os.path.join(test_dir, filename) for filename in submission['image_id']]

test_df = pd.DataFrame({'image_path': image_paths})

In [None]:
test_datagen = ImageDataGenerator()

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col='image_path',
    y_col=None,
    target_size=(400, 400),
    batch_size=BATCH_SIZE,
    class_mode=None,
    shuffle=False
)

predictions = model_regnet.predict(test_generator)

In [None]:
classes_preds = np.argmax(predictions, axis=1)
submission['class_6'] = pd.Series(classes_preds)

In [None]:
submission.to_csv('/kaggle/working/submission.csv', index=False)