In [1]:
%%capture
# !unzip '/kaggle/input/street-view-getting-started-with-julia/train.zip'
# !unzip '/kaggle/input/street-view-getting-started-with-julia/test.zip'

!unzip '/kaggle/input/street-view-getting-started-with-julia/trainResized.zip'
!unzip '/kaggle/input/street-view-getting-started-with-julia/testResized.zip'

In [2]:
import gc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import L1,L2
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau
from tensorflow.keras.layers import Conv2D,Dense,BatchNormalization,Dropout,Flatten,MaxPool2D


from sklearn.model_selection import train_test_split



In [3]:
import os
from PIL import Image
from skimage.io import imread

def read_image(folder_path):
    image_list = []
    files = os.listdir(folder_path)
    files = sorted(files, key=lambda x: int(x[:-4]))
    for filename in files:
        file_path = os.path.join(folder_path, filename)
        image = imread(file_path)
        if len(image.shape)==2:
            image=np.repeat(image[:, :, np.newaxis], 3, axis=2)
        image_list.append(image)
    return image_list

train_image=read_image('/kaggle/working/trainResized')
test_image=read_image('/kaggle/working/testResized')

# train_image=read_image('/kaggle/working/train')
# test_image=read_image('/kaggle/working/test')
print('Number of train image: ',len(train_image))
print('Number of test image: ',len(test_image))

Number of train image:  6283
Number of test image:  6220


In [4]:
from skimage.transform import resize

new_shape=(20,20)
# using pillow
# train_image=[image.resize(new_shape) for image in train_image]
# test_image=[image.resize(new_shape) for image in test_image]
# using skimage
# train_image=[resize(image,new_shape) for image in train_image]
# test_image=[resize(image,new_shape) for image in test_image]

train_image=np.asarray(train_image,dtype=np.float32)
test_image=np.asarray(test_image,dtype=np.float32)

In [5]:
# data augmentaion
from tensorflow.keras.preprocessing.image import ImageDataGenerator

data_generator=ImageDataGenerator(
    zoom_range = 0.2,
    rescale = 1./255,
    rotation_range = 5.0,
    shear_range = 3.0,
    brightness_range = [0.0, 3.0]
)

In [6]:
label=pd.read_csv('/kaggle/input/street-view-getting-started-with-julia/trainLabels.csv')
unique_label=np.sort(label['Class'].unique())
print('Number of label: ',len(unique_label))
label_to_id={}
for i in range(len(unique_label)):
    label_to_id[unique_label[i]]=i

ids_lable=np.asarray([label_to_id[i] for i in label['Class'].values],dtype=np.float32)
train_image,val_image,train_label,val_label=train_test_split(train_image,ids_lable,train_size=0.9,shuffle=True)

Number of label:  62


In [7]:
model = tf.keras.models.Sequential([
    BatchNormalization(),
    tf.keras.layers.Conv2D(32, (3,3),padding='same', activation = "gelu"),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Conv2D(64, (3,3),padding='same', activation = "gelu"),
    tf.keras.layers.MaxPool2D(2,2),
    BatchNormalization(),
    tf.keras.layers.Dropout(0.1),
    
    tf.keras.layers.Conv2D(128, (3,3),padding='same', activation = "gelu"),
#     tf.keras.layers.MaxPool2D(2,1),
    tf.keras.layers.Dropout(0.1),
    
    tf.keras.layers.Conv2D(128, (3,3),padding='same', activation = "gelu"),
    tf.keras.layers.MaxPool2D(2,2),
    
    BatchNormalization(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.3),
    
    tf.keras.layers.Dense(432, activation = "gelu"),
    BatchNormalization(),
    Dropout(0.2),
    tf.keras.layers.Dense(62, activation='softmax')
])

In [8]:
early_stopping=EarlyStopping(
    monitor='val_loss',
    min_delta=0.001,
    patience=15,
    restore_best_weights=True)

lr_schedule=ExponentialDecay(
    initial_learning_rate=1e-3,
    decay_steps=100,
    decay_rate=0.9,
    staircase=True
)
lr_reduce=ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.7,
    min_delta=0.001,
    patience=10,
    min_lr=1e-8,
    verbose=1)

In [9]:
# training
optimizer=Adam(learning_rate=lr_schedule,beta_1=0.9, beta_2=0.98,epsilon=1e-9)
model.compile(optimizer=optimizer,loss='sparse_categorical_crossentropy',metrics=['accuracy'])
model.fit(data_generator.flow(train_image,train_label,batch_size=32),
        validation_data=(val_image,val_label),
         callbacks=[early_stopping,lr_reduce],
         epochs=75)

Epoch 1/75


2023-12-19 13:53:13.142696: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape insequential/dropout/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 21: ReduceLROnPlateau reducing learning rate to 1.4192879280017222e-05.
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75


<keras.src.callbacks.History at 0x7f484b5ed930>

In [10]:
# training
optimizer=Adam(learning_rate=lr_schedule,beta_1=0.9, beta_2=0.98,epsilon=1e-9)
model.compile(optimizer=optimizer,loss='sparse_categorical_crossentropy',metrics=['accuracy'])
model.fit(train_image,train_label,batch_size=256,
        validation_data=(val_image,val_label),
         callbacks=[early_stopping,lr_reduce],
         epochs=75)

Epoch 1/75


2023-12-19 13:55:17.614877: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape insequential/dropout/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 17: ReduceLROnPlateau reducing learning rate to 0.000510300014866516.
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75


<keras.src.callbacks.History at 0x7f484af44160>

In [11]:
#write output to csv
pred=model.predict(test_image)
pred_label=list(map(lambda x: unique_label[x],np.argmax(pred,axis=1)))
submission=pd.read_csv('/kaggle/input/street-view-getting-started-with-julia/sampleSubmission.csv')

submission['Class']=pred_label
submission.to_csv('submission.csv',index=False)

