<a href="https://colab.research.google.com/github/sumanjitmoshat/stakxtest/blob/master/intel_image_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
### importing required packages
import pathlib
import PIL
from PIL import Image
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from keras.layers import Dropout
from keras.callbacks import EarlyStopping
from keras.preprocessing.image import ImageDataGenerator
from keras import regularizers

In [None]:
# connecting with kaggle to download the dataset 
! pip install -q kaggle
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! kaggle datasets list
!kaggle datasets download -d puneet6060/intel-image-classification
! mkdir train
! unzip intel-image-classification.zip -d train

In [None]:
### setting path for training and test dataset
data_dir_train= pathlib.Path("train/seg_train/seg_train")
data_dir_test= pathlib.Path("train/seg_test/seg_test")

In [None]:
### checking the count in train and test
image_count_train = len(list(data_dir_train.glob('*/*.jpg')))
image_count_test = len(list(data_dir_test.glob('*/*.jpg')))

In [None]:
### printing image count
print(image_count_train)
print(image_count_test)

In [None]:
### setting the batch size to 32 and image height and 
batch_size = 32
img_height = 150
img_width = 150

In [None]:
### intializing the training dataset , keeping 20 % of the data as validaiton set
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=data_dir_train,
    seed=123,
    validation_split=0.2,
    subset= 'training',
    image_size=(img_height,img_width),
    batch_size= batch_size
    )

In [None]:
### initializing the validation dataset 
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=data_dir_test,
    seed=123,
    validation_split=0.2,
    subset= 'validation',
    image_size=(img_height,img_width),
    batch_size= batch_size
    )

In [None]:
### checking the class names
class_names = train_ds.class_names
print(class_names)

In [None]:
### printing first 9 images from the dataset
plt.figure(figsize=(10,10))
for images,labels in train_ds.take(1):
  for i in range(9):
    ax = plt.subplot(3,3,1+i)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(class_names[labels[i]])
    plt.axis('off')

AUTOTUNE finds the optimal CPU allocation across all parameters . train_ds.cache() keeps the images in memory after they've been loaded off disk during the first epoch . train_ds.prefetch overlaps data pre processing and model execution while training

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)

Creating the model with 3 convolution layer and 2 dense layer after flattening . The inputs are tensor of 150 * 150 * 3 . They are being normalized in the input as pixel value can take a value in range of 0 to 255

In [None]:
num_classes = 6
epochs=20

In [None]:
model = Sequential([
            layers.experimental.preprocessing.Rescaling(1./255,input_shape=(img_height,img_width,3)),
            layers.Conv2D(16,3,padding='same',activation='relu'),
            layers.MaxPooling2D(),
            layers.Conv2D(32,3,padding='same',activation='relu'),
            layers.MaxPooling2D(),
            layers.Conv2D(64,3,padding='same',activation='relu'),
            layers.MaxPooling2D(),
            layers.Flatten(),
            layers.Dense(128,activation='relu'),
            layers.Dense(num_classes,activation='softmax')
                    
])

Using adam(adagrad+rmsprop) as optimizer and SparseCategoricalCrossEntropy as the loss function. And using accuracy as the metrics

In [None]:
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    metrics=['accuracy']
    )

In [None]:
model.summary()

In [None]:
### training the model for 20 epochs

history = model.fit(train_ds,validation_data=val_ds,epochs=epochs)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss= history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8,8))

plt.subplot(1,2,1)
plt.plot(epochs_range,acc,label='Training Accuracy')
plt.plot(epochs_range,val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1,2,2)
plt.plot(epochs_range,loss,label='Training Loss')
plt.plot(epochs_range,val_loss, label='Validation Loss')
plt.legend(loc='lower right')
plt.title('Training and Validation Loss')

plt.show()

As we can see the model is clearly overfitting so introducing dropout to reduce overfitting

In [None]:
earlystop= EarlyStopping(monitor='val_accuracy', patience=3)

In [None]:
model_dropout = Sequential([
            data_augmentation,
            layers.experimental.preprocessing.Rescaling(1./255,input_shape=(img_height,img_width,3)),
            layers.Conv2D(16,3,padding='same',activation='relu'),
            layers.MaxPooling2D(),
            layers.Conv2D(32,3,padding='same',activation='relu'),
            layers.MaxPooling2D(),
            layers.Conv2D(64,3,padding='same',activation='relu'),
            layers.MaxPooling2D(),
            layers.Dropout(0.2),
            layers.Flatten(),
            layers.Dense(128,activation='relu'),
            layers.Dropout(0.2),
            layers.Dense(num_classes,activation='softmax')
                    
])

In [None]:
model_dropout.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    metrics=['accuracy']
    )

In [None]:
model_dropout.summary()

In [None]:
history_dropout = model_dropout.fit(train_ds,validation_data=val_ds,epochs=epochs,callbacks=[earlystop])

In [None]:
acc = history_dropout.history['accuracy']
val_acc = history_dropout.history['val_accuracy']

loss= history_dropout.history['loss']
val_loss = history_dropout.history['val_loss']

epochs_range = range(15)

plt.figure(figsize=(8,8))

plt.subplot(1,2,1)
plt.plot(epochs_range,acc,label='Training Accuracy')
plt.plot(epochs_range,val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1,2,2)
plt.plot(epochs_range,loss,label='Training Loss')
plt.plot(epochs_range,val_loss, label='Validation Loss')
plt.legend(loc='lower right')
plt.title('Training and Validation Loss')

plt.show()