# **Necessary Imports**

In [1]:
import shutil
import os
import random

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator



In [4]:
from tensorflow.keras.saving import load_model
from tensorflow.keras.models import Model
from tensorflow.keras.applications import RegNetX040
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D

# **Prepare Data**

In [6]:
%cd '/kaggle/input/ml-bootcamp'

/kaggle/input/ml-bootcamp


In [7]:
data_table = pd.read_csv('train.csv')

In [8]:
def arrange_data(img_folder, tgt_folder, class_table):
    """
    arrange data into folders to feed it into ImageDataGenerator
    """
    for val in class_table['class_6'].unique():
        class_folder = os.path.join(tgt_folder, str(val))
        if not os.path.exists(class_folder):
            os.makedirs(class_folder)
    
    for index, (img_id, class_val) in class_table.iterrows():
        shutil.copy(os.path.join(img_folder, img_id), os.path.join(tgt_folder, str(class_val)))

In [9]:
os.makedirs('/kaggle/working/images')
os.makedirs('/kaggle/working/images/train')

In [10]:
arrange_data('/kaggle/input/ml-bootcamp/images/train_images', '/kaggle/working/images/train', data_table)

In [11]:
os.makedirs('/kaggle/working/images/validation')
for val in data_table['class_6'].unique():
    os.makedirs(os.path.join('/kaggle/working/images/validation', str(val)))

In [12]:
def train_test_split(train_folder, validation_folder, split_size=0.7):
    """
    Perform train-test splitting so that each class maintains its
    representation in both categories
    """
    for class_fldr in os.listdir(train_folder):
        folder = os.path.join(train_folder, class_fldr)
        num_imgs = len(os.listdir(folder))
        num_validation_imgs = num_imgs - int(split_size * num_imgs)
        mask = np.zeros((num_imgs,))
        mask[:num_validation_imgs] += 1
        random.shuffle(mask)
        for i, img in enumerate(os.listdir(folder)):
            if mask[i] == 1:
                shutil.move(os.path.join(folder, img),
                            os.path.join(validation_folder, class_fldr))

In [13]:
train_test_split(train_folder='/kaggle/working/images/train',
                 validation_folder='/kaggle/working/images/validation',
                 split_size=0.8)

In [14]:
# important constants
NUM_CLASSES = 6
EPOCHS = 10
BATCH_SIZE = 32

In [15]:
train_datagen = ImageDataGenerator(
    rotation_range=30,           # Random rotation up to 20 degrees
    width_shift_range=0.2,       # Random horizontal shift
    height_shift_range=0.2,      # Random vertical shift
    shear_range=0.2,             # Shear transformations
    zoom_range=0.3,              # Random zoom
    horizontal_flip=True,        # Random horizontal flip
    fill_mode='nearest'          # Fill strategy for new pixels
)
train_generator = train_datagen.flow_from_directory(
    '/kaggle/working/images/train',
    target_size=(400, 400),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

Found 1887 images belonging to 6 classes.


In [16]:
validation_datagen = ImageDataGenerator()
validation_generator = validation_datagen.flow_from_directory(
    '/kaggle/working/images/validation',
    target_size=(400, 400),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

Found 473 images belonging to 6 classes.


# **Create the Model**

In [17]:
input_shape = (400, 400, 3)

base_model = RegNetX040(weights='imagenet', include_top=False, input_shape=input_shape)

x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(NUM_CLASSES, activation='softmax')(x)

model_regnet = Model(inputs=base_model.input, outputs=predictions)

model_regnet.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/regnet/regnetx040_notop.h5
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 400, 400, 3  0           []                               
                                )]                                                                
                                                                                                  
 regnetx040_prestem_rescaling (  (None, 400, 400, 3)  0          ['input_1[0][0]']                
 Rescaling)                                                                                       
                                                                                                  
 regnetx040_stem_conv (Conv2D)  (None, 200, 200, 32  864         ['regnetx040_prest

# **Train the Model**

In [18]:
checkpoint_path = "/kaggle/working/model.best.keras"
checkpoint = tf.keras.callbacks.ModelCheckpoint(checkpoint_path,
                             monitor='val_accuracy',
                             verbose=1,
                             save_best_only=True,
                             save_weights_only=False,
                             mode='max')

In [19]:
model_regnet.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3, weight_decay=1e-2),
                           loss='categorical_crossentropy',
                           metrics=['accuracy'])
model_regnet.fit(train_generator, epochs=EPOCHS, validation_data=validation_generator, callbacks=[checkpoint])

Epoch 1/10
Epoch 1: val_accuracy improved from -inf to 0.60888, saving model to /kaggle/working/model.best.keras
Epoch 2/10
Epoch 2: val_accuracy improved from 0.60888 to 0.65751, saving model to /kaggle/working/model.best.keras
Epoch 3/10
Epoch 3: val_accuracy did not improve from 0.65751
Epoch 4/10
Epoch 4: val_accuracy improved from 0.65751 to 0.67442, saving model to /kaggle/working/model.best.keras
Epoch 5/10
Epoch 5: val_accuracy did not improve from 0.67442
Epoch 6/10
Epoch 6: val_accuracy did not improve from 0.67442
Epoch 7/10
Epoch 7: val_accuracy improved from 0.67442 to 0.70613, saving model to /kaggle/working/model.best.keras
Epoch 8/10
Epoch 8: val_accuracy did not improve from 0.70613
Epoch 9/10
Epoch 9: val_accuracy improved from 0.70613 to 0.78013, saving model to /kaggle/working/model.best.keras
Epoch 10/10
Epoch 10: val_accuracy did not improve from 0.78013


<keras.callbacks.History at 0x7f475046b220>

In [20]:
model_regnet = load_model('/kaggle/working/model.best.keras')

In [21]:
model_regnet.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4, weight_decay=1e-2),
                           loss='categorical_crossentropy',
                           metrics=['accuracy'])
model_regnet.fit(train_generator, epochs=EPOCHS, validation_data=validation_generator, callbacks=[checkpoint])

Epoch 1/10
Epoch 1: val_accuracy improved from 0.78013 to 0.86892, saving model to /kaggle/working/model.best.keras
Epoch 2/10
Epoch 2: val_accuracy improved from 0.86892 to 0.88795, saving model to /kaggle/working/model.best.keras
Epoch 3/10
Epoch 3: val_accuracy improved from 0.88795 to 0.89641, saving model to /kaggle/working/model.best.keras
Epoch 4/10
Epoch 4: val_accuracy did not improve from 0.89641
Epoch 5/10
Epoch 5: val_accuracy did not improve from 0.89641
Epoch 6/10
Epoch 6: val_accuracy improved from 0.89641 to 0.89852, saving model to /kaggle/working/model.best.keras
Epoch 7/10
Epoch 7: val_accuracy did not improve from 0.89852
Epoch 8/10
Epoch 8: val_accuracy did not improve from 0.89852
Epoch 9/10
Epoch 9: val_accuracy did not improve from 0.89852
Epoch 10/10
Epoch 10: val_accuracy improved from 0.89852 to 0.90275, saving model to /kaggle/working/model.best.keras


<keras.callbacks.History at 0x7f46bf6b1600>

In [22]:
model_regnet = load_model('/kaggle/working/model.best.keras')

In [23]:
model_regnet.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5, weight_decay=1e-2),
                           loss='categorical_crossentropy',
                           metrics=['accuracy'])
model_regnet.fit(train_generator, epochs=EPOCHS, validation_data=validation_generator, callbacks=[checkpoint])

Epoch 1/10
Epoch 1: val_accuracy did not improve from 0.90275
Epoch 2/10
Epoch 2: val_accuracy did not improve from 0.90275
Epoch 3/10
Epoch 3: val_accuracy did not improve from 0.90275
Epoch 4/10
Epoch 4: val_accuracy did not improve from 0.90275
Epoch 5/10
Epoch 5: val_accuracy did not improve from 0.90275
Epoch 6/10
Epoch 6: val_accuracy did not improve from 0.90275
Epoch 7/10
Epoch 7: val_accuracy did not improve from 0.90275
Epoch 8/10
Epoch 8: val_accuracy did not improve from 0.90275
Epoch 9/10
Epoch 9: val_accuracy did not improve from 0.90275
Epoch 10/10
Epoch 10: val_accuracy did not improve from 0.90275


<keras.callbacks.History at 0x7f46bdf0a5c0>

In [24]:
model_regnet = load_model('/kaggle/working/model.best.keras')

# **Submit the Model**

In [25]:
submission = pd.read_csv('sample_submission.csv')

In [26]:
test_dir = './images/test_images'
image_paths = [os.path.join(test_dir, filename) for filename in submission['image_id']]

test_df = pd.DataFrame({'image_path': image_paths})

In [28]:
test_datagen = ImageDataGenerator()

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col='image_path',
    y_col=None,
    target_size=(400, 400),
    batch_size=BATCH_SIZE,
    class_mode=None,
    shuffle=False
)

predictions = model_regnet.predict(test_generator)

Found 756 validated image filenames.


In [29]:
classes_preds = np.argmax(predictions, axis=1)
submission['class_6'] = pd.Series(classes_preds)

In [30]:
submission.to_csv('/kaggle/working/submission.csv', index=False)