In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, Input, BatchNormalization, Dropout, MaxPool2D
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import torch
import os

# specify GPU device
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# n_gpu = torch.cuda.device_count()
# torch.cuda.get_device_name(0)

In [None]:
path = "/content/drive/MyDrive/Colab Notebooks/DAEN690/NEW"
filenames = os.listdir(path)

df=pd.DataFrame({'filename':filenames})
df["category"] = df.apply(lambda x: x['filename'].split(' ')[0], axis=1)

train_df, validate_df = train_test_split(df, test_size=0.30, random_state=42, stratify=df["category"])
validate_df, test_df = train_test_split(validate_df, test_size=0.5, random_state=42, stratify=validate_df["category"])

#resetting the index
train_df = train_df.reset_index(drop=True)
validate_df = validate_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)

In [None]:
print(train_df.category.value_counts())
print(validate_df.category.value_counts())
print(test_df.category.value_counts())

china    7000
India    7000
usa      7000
Name: category, dtype: int64
china    1500
usa      1500
India    1500
Name: category, dtype: int64
usa      1501
china    1500
India    1500
Name: category, dtype: int64


In [None]:
cnn= tf.keras.models.Sequential()

#first convolution
cnn.add(tf.keras.layers.Conv2D(filters=256,kernel_size=(3,3),activation='relu',input_shape=[224,224,3]))
cnn.add(BatchNormalization())
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
cnn.add(Dropout(0.2))

#second convolution layer
cnn.add(tf.keras.layers.Conv2D(filters=512,kernel_size=(3,3),activation='relu'))
cnn.add(BatchNormalization())
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
cnn.add(Dropout(0.2))

#third convolution layer
cnn.add(tf.keras.layers.Conv2D(filters=512,kernel_size=(3,3),activation='relu'))
cnn.add(BatchNormalization())
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
cnn.add(Dropout(0.2))

#fourth convolution layer
cnn.add(tf.keras.layers.Conv2D(filters=256,kernel_size=(3,3),activation='relu'))
cnn.add(BatchNormalization())
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
cnn.add(Dropout(0.2))

cnn.add(tf.keras.layers.Flatten())
cnn.add(Dropout(0.2))

#fully connected layer
cnn.add(tf.keras.layers.Dense(128,activation='relu'))
cnn.add(Dropout(0.5))

#output layer
cnn.add(tf.keras.layers.Dense(units=len(train_df.category.value_counts()),activation='softmax'))

In [None]:
train_datagen = ImageDataGenerator( rotation_range=15,
                                    rescale=1./255,
                                    shear_range=0.1,
                                    zoom_range=0.2,
                                    horizontal_flip=True,
                                    vertical_flip = True,
                                    width_shift_range=0.1,
                                    height_shift_range=0.1)

train_set = train_datagen.flow_from_dataframe(train_df,path,x_col='filename',y_col='category',
                                              target_size=(224, 224),class_mode='categorical',batch_size=32)

validation_datagen = ImageDataGenerator(rescale=1./255)

validation_set = validation_datagen.flow_from_dataframe(validate_df,path,x_col='filename',
                                                        y_col='category',target_size=(224, 224),
                                                        class_mode='categorical',batch_size=32)

test_datagen = ImageDataGenerator(rescale=1./255)

test_set = test_datagen.flow_from_dataframe(test_df,path,x_col='filename',
                                            y_col='category',target_size=(224, 224),
                                            class_mode='categorical',batch_size=32, shuffle=False)

Found 21000 validated image filenames belonging to 3 classes.
Found 4500 validated image filenames belonging to 3 classes.
Found 4501 validated image filenames belonging to 3 classes.


In [None]:
cnn.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=1e-04), metrics=['accuracy'])
cnn.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 222, 222, 256)     7168      
_________________________________________________________________
batch_normalization (BatchNo (None, 222, 222, 256)     1024      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 111, 111, 256)     0         
_________________________________________________________________
dropout (Dropout)            (None, 111, 111, 256)     0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 109, 109, 512)     1180160   
_________________________________________________________________
batch_normalization_1 (Batch (None, 109, 109, 512)     2048      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 54, 54, 512)       0

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=5, restore_best_weights=True)

r = cnn.fit(train_set, 
            validation_data=validation_set,
            epochs=30,
            steps_per_epoch=len(train_set),
            validation_steps=len(validation_set),
            callbacks=callback)

Epoch 1/30
Epoch 2/30
 79/657 [==>...........................] - ETA: 10:07 - loss: 0.9768 - accuracy: 0.5124

In [None]:
# Epoch 1/30
# 657/657 [==============================] - 12250s 19s/step - loss: 1.2800 - accuracy: 0.4232 - val_loss: 1.0492 - val_accuracy: 0.5429
# Epoch 2/30
# 657/657 [==============================] - 728s 1s/step - loss: 0.9972 - accuracy: 0.5047 - val_loss: 0.8920 - val_accuracy: 0.6049
# Epoch 3/30
# 657/657 [==============================] - 717s 1s/step - loss: 0.9488 - accuracy: 0.5532 - val_loss: 0.8614 - val_accuracy: 0.6224
# Epoch 4/30
# 657/657 [==============================] - 698s 1s/step - loss: 0.9111 - accuracy: 0.5751 - val_loss: 0.9004 - val_accuracy: 0.6289
# Epoch 5/30
# 657/657 [==============================] - 699s 1s/step - loss: 0.8875 - accuracy: 0.6039 - val_loss: 0.8056 - val_accuracy: 0.6600
# Epoch 6/30
# 657/657 [==============================] - 702s 1s/step - loss: 0.8648 - accuracy: 0.6100 - val_loss: 0.8728 - val_accuracy: 0.6178
# Epoch 7/30
# 657/657 [==============================] - 700s 1s/step - loss: 0.8393 - accuracy: 0.6310 - val_loss: 0.8106 - val_accuracy: 0.6538
# Epoch 8/30
# 657/657 [==============================] - 703s 1s/step - loss: 0.8289 - accuracy: 0.6301 - val_loss: 0.8457 - val_accuracy: 0.6451
# Epoch 9/30
# 657/657 [==============================] - 723s 1s/step - loss: 0.8153 - accuracy: 0.6391 - val_loss: 0.7906 - val_accuracy: 0.6618
# Epoch 10/30
# 657/657 [==============================] - 728s 1s/step - loss: 0.8102 - accuracy: 0.6442 - val_loss: 0.7666 - val_accuracy: 0.6853
# Epoch 11/30
# 136/657 [=====>........................] - ETA: 8:22 - loss: 0.7919 - accuracy: 0.6563

In [None]:
plt.plot(r.history['loss'], label='train loss')
plt.plot(r.history['val_loss'], label='val loss')
plt.legend()
plt.savefig('/content/drive/MyDrive/Colab Notebooks/DAEN690/CNN_3/CNN_ValLoss_3.png')
plt.show()

In [None]:
plt.plot(r.history['accuracy'], label='train acc')
plt.plot(r.history['val_accuracy'], label='val acc')
plt.legend()
plt.savefig('/content/drive/MyDrive/Colab Notebooks/DAEN690/CNN_3/CNN_ValAcc_3.png')
plt.show()

In [None]:
cnn.save('/content/drive/MyDrive/Colab Notebooks/DAEN690/CNN_3/CNN_3.h5')

In [None]:
train_set.class_indices.items()

In [None]:
pred = cnn.predict(test_set)
test_df["pred"] = np.argmax(pred, axis=1)
test_df["pred"] = test_df["pred"].replace({0:'India',1:'china',2:'usa'})
test_df.tail()

In [None]:
cnn.evaluate(test_set, verbose=2)

In [None]:
# test_df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/DAEN690/CNN_3/test_df.csv")
# path = "/content/drive/MyDrive/Colab Notebooks/DAEN690/NEW"

In [None]:
cm = confusion_matrix(test_df['category'], test_df['pred'])
sns.heatmap(cm, annot=True)

In [None]:
target_names = ['india', 'china', 'usa']
classification_report(test_df['category'], test_df['pred'], target_names= target_names)

In [None]:
from keras.preprocessing import image

sample_test = test_df.sample(n=35).reset_index(drop=True)
plt.figure(figsize=(20, 20))
for index, row in sample_test.iterrows():
    filename = row['filename']
    pred = row['pred']
    img = image.load_img(path + "/" + filename, target_size=(224,224))
    plt.subplot(5, 7, index+1)
    plt.imshow(img)
    plt.xlabel(filename.split(' ')[0] + '(' + "{}".format(pred) + ')' )
plt.tight_layout()
plt.show()

Output hidden; open in https://colab.research.google.com to view.

In [None]:
# Image Augmentation
example_df = train_df.sample(n=1).reset_index(drop=True)
example_set = train_datagen.flow_from_dataframe(
    example_df, 
    path, 
    x_col='filename',
    y_col='category',
    target_size=(224,224),
    class_mode='categorical'
)

plt.figure(figsize=(12, 12))
for i in range(0, 15):
    plt.subplot(5, 3, i+1)
    for X_batch, Y_batch in example_set:
        image = X_batch[0]
        plt.imshow(image)
        break
plt.tight_layout()
plt.show()