In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.applications.resnet50 import ResNet50
from keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, Input, Lambda, Flatten
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import torch
import os

# specify GPU device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
torch.cuda.get_device_name(0)

'Tesla P100-PCIE-16GB'

In [None]:
# re-size all the images to this
IMAGE_SIZE = [224, 224]

# add preprocessing layer to the front of resnet50
resnet = ResNet50(input_shape=IMAGE_SIZE + [3], weights='imagenet', include_top=False)

# freeze existing trained  weights
for layer in resnet.layers:
    layer.trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
path = "/content/drive/MyDrive/Colab Notebooks/DAEN690/NEW"
filenames = os.listdir(path)

df=pd.DataFrame({'filename':filenames})
df["category"] = df.apply(lambda x: x['filename'].split(' ')[0], axis=1)

temp1 = df[df.category=='china']
temp2 = df[df.category=='usa']
df = pd.concat([temp1, temp2],ignore_index=True, axis = 0)
# df.category.value_counts()

train_df, validate_df = train_test_split(df, test_size=0.30, random_state=42, stratify=df["category"])
validate_df, test_df = train_test_split(validate_df, test_size=0.5, random_state=42, stratify=validate_df["category"])

#resetting the index
train_df = train_df.reset_index(drop=True)
validate_df = validate_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)

In [None]:
print(train_df.category.value_counts())
print(validate_df.category.value_counts())
print(test_df.category.value_counts())

In [None]:
train_datagen = ImageDataGenerator(rotation_range=15,
                                   rescale=1./255,
                                   shear_range=0.1,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   vertical_flip = True,
                                   width_shift_range=0.1,
                                   height_shift_range=0.1)

train_set = train_datagen.flow_from_dataframe(train_df,path,x_col='filename',
                                              y_col='category', target_size=(224, 224),
                                              class_mode='categorical',batch_size=32)

validation_datagen = ImageDataGenerator(rescale=1./255)

validation_set = validation_datagen.flow_from_dataframe(validate_df,path,x_col='filename',
                                                        y_col='category',target_size=(224, 224),
                                                        class_mode='categorical',batch_size=32)

test_datagen = ImageDataGenerator(rescale=1./255)

test_set = test_datagen.flow_from_dataframe(test_df,path,x_col='filename',
                                            y_col='category',target_size=(224, 224),
                                            class_mode='categorical',batch_size=32, shuffle=False)

In [None]:
# Add dropout layers
# x = Dense(1024, activation='relu')(resnet.output)
# x = Dense(512, activation='relu')(x)
x = Flatten()(resnet.output)

prediction = Dense(len(train_df.category.value_counts()), activation='softmax')(x)

model = Model(inputs=resnet.input, outputs=prediction)

model.summary()

In [None]:
model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=1e-05), metrics=['accuracy'])

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=5, restore_best_weights=True)

r = model.fit(train_set,
              validation_data=validation_set,
              epochs=30,
              steps_per_epoch=len(train_set),
              validation_steps=len(validation_set),
              callbacks=[callback])

In [None]:
plt.plot(r.history['loss'], label='train loss')
plt.plot(r.history['val_loss'], label='val loss')
plt.legend()
plt.savefig('/content/drive/MyDrive/Colab Notebooks/DAEN690/RESNET50_2/RESNET50_ValLoss_2.png')
plt.show()

In [None]:
plt.plot(r.history['accuracy'], label='train acc')
plt.plot(r.history['val_accuracy'], label='val acc')
plt.legend()
plt.savefig('/content/drive/MyDrive/Colab Notebooks/DAEN690/RESNET50_2/RESNET50_ValACC_2.png')
plt.show()

In [None]:
model.save('/content/drive/MyDrive/Colab Notebooks/DAEN690/RESNET50_2/RESNET50_2.h5')

In [None]:
train_set.class_indices.items()

In [None]:
model = tf.keras.models.load_model('/content/drive/MyDrive/Colab Notebooks/DAEN690/RESNET50_2/RESNET50_2.h5')
test_set.reset()
pred = model.predict(test_set)
test_df["pred"] = np.argmax(pred, axis=1)
test_df["pred"] = test_df["pred"].replace({0:'India',1:'china',2:'usa'})
test_df.tail()

In [None]:
test_df.to_csv("/content/drive/MyDrive/Colab Notebooks/DAEN690/RESNET50_2/test_df.csv")

In [None]:
model.evaluate(test_set, verbose=2)

In [None]:
cm = confusion_matrix(test_df['category'], test_df['pred'])
sns.heatmap(cm, annot=True)

In [None]:
target_names = [ 'india', 'china', 'usa']
classification_report(test_df['category'], test_df['pred'], target_names= target_names)

In [None]:
#               precision    recall  f1-score   support

#        india       0.65      0.58      0.61      1500
#        china       0.56      0.75      0.64      1500
#          usa       0.58      0.45      0.51      1501

#     accuracy                           0.59      4501
#    macro avg       0.60      0.59      0.59      4501
# weighted avg       0.60      0.59      0.59      4501

In [None]:
from keras.preprocessing import image

sample_test = test_df.sample(n=35).reset_index(drop=True)
plt.figure(figsize=(20, 20))
for index, row in sample_test.iterrows():
    filename = row['filename']
    pred = row['pred']
    img = image.load_img(path + "/" + filename, target_size=(224,224))
    plt.subplot(5, 7, index+1)
    plt.imshow(img)
    plt.xlabel(filename.split(' ')[0] + '(' + "{}".format(pred) + ')' )
plt.tight_layout()
plt.show()

In [None]:
# Image Augmentation
example_df = train_df.sample(n=1).reset_index(drop=True)
example_set = train_datagen.flow_from_dataframe(
    example_df, 
    path,
    x_col='filename',
    y_col='category',
    target_size=(224,224),
    class_mode='categorical'
)

plt.figure(figsize=(12, 12))
for i in range(0, 15):
    plt.subplot(5, 3, i+1)
    for X_batch, Y_batch in example_set:
        image = X_batch[0]
        plt.imshow(image)
        break
plt.tight_layout()
plt.show()