<a href="https://colab.research.google.com/github/pavi-ninjaac/SARS_CoV_2_DNA_Analysis/blob/main/Model_training/Covid19_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Covid19 Image training

In [33]:
#import the libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import itertools
import cv2
from PIL import Image

from sklearn.model_selection import train_test_split
#keras 
from keras.utils.np_utils import to_categorical
from keras.callbacks import EarlyStopping,ReduceLROnPlateau
from keras.models import load_model
from keras.preprocessing import image
from sklearn.metrics import confusion_matrix

#keras layers
from keras.models import Sequential 
from keras.layers import Conv2D,MaxPooling2D,BatchNormalization,AveragePooling2D
from keras.layers import Flatten,Dropout,Dense

In [41]:
#the data was in the numpy array file (.npy), we need to load from there
X = np.load('/content/drive/MyDrive/Colab Notebooks/dataFiles/DNA_Genome_Analysis_covid19/generated_FCGR_Images/dna_image_pixel_x.npy')
y = np.load('/content/drive/MyDrive/Colab Notebooks/dataFiles/DNA_Genome_Analysis_covid19/generated_FCGR_Images/dna_image_pixcel_y.npy')
X.shape

(1557, 128, 128)

In [42]:
X_train,X_test,y_train,y_test = train_test_split(X , y , test_size = 0.2 , random_state = 42)
X_test,X_val , y_test , y_val = train_test_split(X_test,y_test , test_size =0.2 , random_state = 42)

In [43]:
print("The size of the train data-------------------->",X_train.shape)
print("The size of the train target data------------->",y_train.shape)
print()
print("The size of the test data--------------------->",X_test.shape)
print("The size of the test target data-------------->",y_test.shape)
print()
print("The size of the validation data--------------->",X_val.shape)
print("The size of the validation target data-------->",y_val.shape)

The size of the train data--------------------> (1245, 128, 128)
The size of the train target data-------------> (1245,)

The size of the test data---------------------> (249, 128, 128)
The size of the test target data--------------> (249,)

The size of the validation data---------------> (63, 128, 128)
The size of the validation target data--------> (63,)


In [46]:

def clean_data(X_train,y_train,X_test,y_test,X_val,y_val):
  

  print("Collecting all data...................")
  
  print("Converting to numpy array>>>>>>>>>>>>>>>>>>")
  #convert list to numpy array
  X_train = np.array(X_train,'float32')  
  y_train = np.array(y_train,'float32')  
  X_test = np.array(X_test,'float32')  
  y_test = np.array(y_test,'float32')
  X_val = np.array(X_val,'float32')  
  y_val = np.array(y_val,'float32')

  print("Normalizing the data>>>>>>>>>>>>>>>>>>>>>>")
  #normalize the data
  X_train  = X_train/255.0
  X_test = X_test/255.0
  X_val = X_val/255.0

  print("Converting target to one hot encoded values>>>>>>>>>>>>>>>>>>>>>>")
  #convert to numerical values to 0,1
  y_train = to_categorical(y_train,num_classes=2)
  y_test = to_categorical(y_test,num_classes=2)
  y_val = to_categorical(y_val,num_classes=2)

  print("reshaping the data>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
  #reshape the train X data to 48 x 48 picxels
  #the len of the given picxels is 2304 ------>(48*48)
  X_train = X_train.reshape(X_train.shape[0] , 128 ,128 , 1)
  X_test = X_test.reshape(X_test.shape[0] , 128 ,128 , 1)
  X_val = X_val.reshape(X_val.shape[0] , 128 ,128 , 1)

  print("Preprocessing  completed!!!!!!!!!! stay happy :)")
  return X_train,y_train,X_test,y_test,X_val,y_val

In [47]:
X_train,y_train,X_test,y_test,X_val,y_val = clean_data(X_train,y_train,X_test,y_test,X_val,y_val)

Collecting all data...................
Converting to numpy array>>>>>>>>>>>>>>>>>>
Normalizing the data>>>>>>>>>>>>>>>>>>>>>>
Converting target to one hot encoded values>>>>>>>>>>>>>>>>>>>>>>
reshaping the data>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
Preprocessing  completed!!!!!!!!!! stay happy :)


In [49]:
print("The size of the train data-------------------->",X_train.shape)
print("The size of the train target data------------->",y_train.shape)
print()
print("The size of the test data--------------------->",X_test.shape)
print("The size of the test target data-------------->",y_test.shape)
print()
print("The size of the validation data--------------->",X_val.shape)
print("The size of the validation target data-------->",y_val.shape)

The size of the train data--------------------> (1245, 128, 128, 1)
The size of the train target data-------------> (1245, 2)

The size of the test data---------------------> (249, 128, 128, 1)
The size of the test target data--------------> (249, 2)

The size of the validation data---------------> (63, 128, 128, 1)
The size of the validation target data--------> (63, 2)


# Ploting helper functions


In [52]:
def plot_confueion_matrix(y_test , y_pred , title = "Confusion Matrix"):  
  classes = np.array(("Angry", "Disgust", "Fear", "Happy", "Sad", "Surprise", "Neutral"))

  cmap = plt.cm.Blues
  cm = confusion_matrix(y_test , y_pred)
  
  #plot the cm
  plt.figure(figsize=(7,7))
  plt.imshow(cm , interpolation='nearest' , cmap= cmap)
  plt.colorbar()
  thresh = cm.min() + (cm.max() - cm.min()) / 2.
  for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j,i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

  tick_marks = np.arange(len(classes))
  plt.xticks(tick_marks, classes, rotation=45,fontsize=10)
  plt.yticks(tick_marks, classes,fontsize = 10)

  plt.title(title)
  plt.xlabel('Predicted value')
  plt.ylabel('True values')

  plt.tight_layout()
  plt.show()
  #plt.save('path.ghb.png')


def plot_accuracy_loss_graph(result):
  
  plt.figure(figsize=(20,8))
  plt.subplot(1,2,1)

  plt.plot(result.history['accuracy'])
  plt.plot(result.history['val_accuracy'])
  plt.title('VGG Model Accuracy')
  plt.ylabel('Accuracy')
  plt.xlabel('Epoch')
  plt.legend(['Train', 'Test'], loc='upper left')
  
  #plt.savefig('ResNet Model Loss.png')

  plt.subplot(1,2,2)

  plt.plot(result.history['loss'])
  plt.plot(result.history['val_loss'])
  plt.title('VGG  Model Loss')
  plt.ylabel('Loss')
  plt.xlabel('Epoch')
  plt.legend(['Train', 'Test'], loc='upper left')
  plt.show()
  #plt.savefig('ResNet Model Loss.png')

# model training ---->

In [58]:
#building the model
def net(input_shape , num_classes , ):
  model = Sequential()
  model.add(Conv2D(64, (5, 5), activation='relu', padding='same', input_shape=input_shape)) 
  model.add(Conv2D(64, (5, 5), activation='relu', padding='same', ))
  model.add(BatchNormalization())
  model.add(MaxPooling2D(pool_size=(2,2)))

  #model.add(Conv2D(64, (5, 5), activation='relu', padding='same', ))
  #model.add(BatchNormalization())
  #model.add(MaxPooling2D(pool_size=(2,2)))


  model.add(Conv2D(128, (3, 3), activation='relu',  padding='same', ))
  model.add(Conv2D(128, (3, 3), activation='relu', padding='same',))
  model.add(BatchNormalization())
  model.add(MaxPooling2D(pool_size=(2,2)))


  model.add(Conv2D(256, (3, 3), activation='relu', padding='same',))
  model.add(Conv2D(256, (3, 3), activation='relu',  padding='same',))
  model.add(BatchNormalization())
  model.add(MaxPooling2D(pool_size=(2,2)))


  model.add(Flatten())

  model.add(Dense(1024, activation='relu',))
  model.add(Dropout(0.2))
  model.add(Dense(512, activation='relu', ))
  model.add(Dropout(0.2))
  model.add(Dense(128, activation='relu', ))
  model.add(Dropout(0.2))
  model.add(Dense(num_classes, activation='sigmoid'))


  # compile model
  
  model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  #model summary
  print(model.summary())

  return model

In [59]:
model = net(input_shape = (128,128,1), num_classes = 2)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_12 (Conv2D)           (None, 128, 128, 64)      1664      
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 128, 128, 64)      102464    
_________________________________________________________________
batch_normalization_6 (Batch (None, 128, 128, 64)      256       
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 64, 64, 64)        0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 64, 64, 128)       73856     
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 64, 64, 128)       147584    
_________________________________________________________________
batch_normalization_7 (Batch (None, 64, 64, 128)      

In [None]:
# set callbacks
early_stoppping = EarlyStopping(monitor = 'val_loss',
                                min_delta = 0.001,
                                patience = 10,
                                restore_best_weights=True)

#set the global values
epoches = 40
batch_size = 64

#fit the model
history = model.fit(X_train, y_train,  
          batch_size=batch_size,  
          epochs=epoches,  
          verbose=1,  
          validation_data=(X_val, y_val),  
          shuffle=True)

Epoch 1/40