# Moving Images in Training set to folders wrt their class ids

In [1]:
import numpy as np 
import pandas as pd 
import os
import matplotlib.pyplot as plt
import cv2
import shutil

In [2]:
df = pd.read_csv(r'D:\datathonindoml-2022\train_labels.csv', names=['Filename','Class']) # loading the dataset 

In [3]:
df

Unnamed: 0,Filename,Class
0,0,1
1,1,13
2,2,13
3,3,14
4,4,6
...,...,...
15995,15995,2
15996,15996,15
15997,15997,3
15998,15998,9


In [4]:
labels = df.sort_values('Class')
class_names = list(labels.Class.unique()) # generating list of uniques class id's

In [5]:
class_names

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]

In [6]:
# building directory to store images wrt their class id given in the excel sheel 
for i in class_names:
    try:
        os.makedirs(os.path.join('D:/datathonindoml-2022/train_',str(i)), exist_ok = True)
        print("Folder ", str(i), "completed successfully")
    except OSError as error:
        print("Folder ", str(i), "couldn't made")

Folder  0 completed successfully
Folder  1 completed successfully
Folder  2 completed successfully
Folder  3 completed successfully
Folder  4 completed successfully
Folder  5 completed successfully
Folder  6 completed successfully
Folder  7 completed successfully
Folder  8 completed successfully
Folder  9 completed successfully
Folder  10 completed successfully
Folder  11 completed successfully
Folder  12 completed successfully
Folder  13 completed successfully
Folder  14 completed successfully
Folder  15 completed successfully


In [7]:
# seperating images and storing them in folders according to their id 
for i in class_names:   
    for c in list(df[df['Class']== i]['Filename']):  
        get_image = os.path.join('D:/datathonindoml-2022/train/train/'+str(c)+'.tif')
        move_image = shutil.copy(get_image,'D:/datathonindoml-2022/train_/'+ str(i) +'/')

# Generating Training set

In [8]:
import tensorflow as tf
from tensorflow.keras import datasets,layers,models
import matplotlib.pyplot as plt
import numpy as np
import os
import cv2
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical

In [9]:
import random
seed_constant=27
np.random.seed(seed_constant)
random.seed(seed_constant)
tf.random.set_seed(seed_constant)

In [10]:
CLASSES_LIST=["0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15"] # declaring class list
DATASET_DIR=r"D:/datathonindoml-2022/train_" # declaring directory where images are stored

In [11]:
IMG_SIZE=224 # declaring image size for resizing image

In [12]:
# creating a function to create training set
def create_dataset():
    s = []
    labels = []
    for class_index, class_name in enumerate(CLASSES_LIST):
        files_list = os.listdir("D:/datathonindoml-2022/train_/"+str(class_name))
        for file_name in files_list:
            path=("D:/datathonindoml-2022/train_/"+str(class_name)+"/" +file_name)
            image=cv2.resize(cv2.imread(path),(IMG_SIZE,IMG_SIZE))  
            s.append(np.array(image))  
            labels.append(class_index)
    features = np.asarray(s)
    labels = np.array(labels)
    return features, labels
        

In [13]:
features, labels = create_dataset()

In [14]:

print(features.shape)
print(labels.shape)

(16000, 224, 224, 3)
(16000,)


In [15]:
one_hot_encoded_labels = to_categorical(labels) # converting labels to one hot encoded labels.

In [16]:
labels

array([ 0,  0,  0, ..., 15, 15, 15])

In [17]:
# splitting data into training and test set
from sklearn.model_selection import train_test_split
features_train, features_test, labels_train, labels_test = train_test_split(features, one_hot_encoded_labels,
                                                                            test_size = 0.00001, shuffle = True,
                                                                            random_state=seed_constant
                                                                            ) 

In [18]:
y_classes = [np.argmax(element) for element in labels_test]
y_classes[:5]

[5]

In [19]:
print(len(labels_train))
print(len(features_train))

15999
15999


In [20]:
features_train=features_train/255 # normalising feature set

In [21]:
 features_train.shape

(15999, 224, 224, 3)

# Model

In [22]:
cnn = models.Sequential([
    layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu',  strides=(2, 2), padding="same", input_shape=(224, 224, 3)),

    layers.MaxPooling2D((2,2)),
    layers.Dropout(0.10),
    layers.Conv2D(64 ,(3, 3), activation='relu'),
    layers.MaxPooling2D((2,2)),
    layers.Dropout(0.10),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2,2)),
    layers.Dropout(0.10),    
    
    layers.Flatten(),
    layers.Dense(512,activation='relu'),
    layers.Dropout(0.10),
    layers.Dense(254,activation='relu'),
    layers.Dropout(0.10),
    layers.Dense(128,activation='relu'),
    layers.Dropout(0.10),
    layers.Dense(64,activation='relu'),
    layers.Dropout(0.10),
    layers.Dense(16, activation='softmax')
])

In [23]:
cnn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 112, 112, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 56, 56, 32)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 56, 56, 32)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 54, 54, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 27, 27, 64)       0         
 2D)                                                             
                                                                 
 dropout_1 (Dropout)         (None, 27, 27, 64)        0

In [24]:
from tensorflow.keras.callbacks import EarlyStopping
early_stopping_callback = EarlyStopping(monitor = 'val_loss', patience = 5, mode = 'min', restore_best_weights = True)
cnn.compile(optimizer='adam',
             loss='categorical_crossentropy',
             metrics=['accuracy'])


In [18]:
cnn.fit(features_train,labels_train,epochs=40, batch_size = 32 ,shuffle = True,validation_split = 0.20,callbacks=[early_stopping_callback])

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40


<keras.callbacks.History at 0x211482751f0>

# saving and loading the model

In [None]:
cnn.save('model_final.h5')

In [25]:
from keras.models import load_model
model = load_model('model_final.h5')


# Generating results on validation set

In [26]:
DATASET_DIR=r"D:/datathonindoml-2022/validation/validation"

In [27]:
def create_dataset_val():
    s = []
    files_list = os.listdir(DATASET_DIR)
    for file_name in files_list:
        path=(DATASET_DIR+"/" +file_name)
        image=cv2.resize(cv2.imread(path),(IMG_SIZE,IMG_SIZE))  
        s.append(np.array(image))  
    features = np.asarray(s)
    return features
        

In [28]:
features = create_dataset_val()

In [29]:
file=[i for i in range(17801,18701)]

In [30]:
y_pred = model.predict(features)



In [31]:
y_classes = [np.argmax(element) for element in y_pred]

In [32]:
import pandas as pd

In [33]:
df=pd.DataFrame({'id':file,'label':y_classes})

In [34]:
df.to_csv('submission_final.csv',index=False)