# Deep Learning SGP WEEK 3 Convolutional Neural Networks

In [None]:
import numpy as np
import os
import matplotlib.pyplot as plt
import cv2
import tqdm
from socket import socket

### Our dataset is visible on the top right of the screen. We have two categories for images (covid, healthy) for both 'train' and 'validation' folders

In [None]:

CATEGORIES = ['covid', 'healthy']
DATADIR = '../input/covidistesgp/CovidDataset/train'
for category in CATEGORIES:
    path = os.path.join(DATADIR,category)
    for img in os.listdir(path):
        img_arr = cv2.imread(os.path.join(path,img), cv2.IMREAD_GRAYSCALE)   
        plt.imshow(img_arr, cmap='gray')
        plt.xlabel(category)
        plt.show()
        break

### Now we use a python library called openCV to read and perform some operations on the input data, such as GRAYSCALING and RESIZING

In [None]:
IMG_SIZE=50
train_data=[]
test_data=[]

def create_data(data_dir):
    for category in CATEGORIES:
        path=os.path.join(data_dir, category)
        class_num=CATEGORIES.index(category)
        
        for img in (os.listdir(path)):                                             ## We use os to iterate over all our files in the directory 
            try:
                img_arr=cv2.imread(os.path.join(path,img), cv2.IMREAD_GRAYSCALE)   ## GRAYSCALING
                img_arr=cv2.resize(img_arr, (IMG_SIZE,IMG_SIZE))                   ## RESIZING
                if(data_dir=='../input/covidistesgp/CovidDataset/train'):
                    train_data.append([img_arr,class_num])
                else:
                    test_data.append([img_arr,class_num])
            except exception as e:
                pass

In [None]:
create_data('../input/covidistesgp/CovidDataset/train')
create_data('../input/covidistesgp/CovidDataset/validation')

print(len(train_data))
print(len(test_data))

In [None]:
for sample in train_data[:10]:
    print(sample[1])

### Its pretty important that we randomise our images rather than having all covid images together and all healthy images together

In [None]:
import random

random.shuffle(train_data)              ## Shuffling the dataset

for sample in train_data[:10]:
    print(sample[1])

In [None]:
x_train=[]
y_train=[]
x_test=[]
y_test=[]

for features,label in train_data:
    x_train.append(features)
    y_train.append(label)
    
for features,label in test_data:
    x_test.append(features)
    y_test.append(label)

x_train = np.array(x_train).reshape(-1, IMG_SIZE, IMG_SIZE, 1)   ## reshaping the dataset to (length, 50, 50, 1)
x_test = np.array(x_test).reshape(-1, IMG_SIZE, IMG_SIZE, 1)

print(len(x_train))
print(len(x_test))

### After we've done this preprocessing work, its handy to store our final array instead of repeating this everytime we want to use these values
### For this, we use the python library called pickle to store all the values and load them in directly later

In [None]:
import pickle

pickle_out_x_train = open("x_train.pickle","wb")          # open/create a file called x_train.pickle, and write into it         
pickle.dump(x_train, pickle_out_x_train)                  # dump the contents of the np array
pickle_out_x_train.close()                                # close the file

pickle_out_y_train = open("y_train.pickle","wb")
pickle.dump(y_train, pickle_out_y_train)
pickle_out_y_train.close()

pickle_out_x_test = open("x_test.pickle","wb")
pickle.dump(x_test, pickle_out_x_test)
pickle_out_x_test.close()

pickle_out_y_test = open("y_test.pickle","wb")
pickle.dump(y_test, pickle_out_y_test)
pickle_out_y_test.close()

print(len(x_train))
print(len(x_test))

In [None]:
pickle_in_x_train = open("x_train.pickle","rb")           # open the file
trainX = pickle.load(pickle_in_x_train)                   # load its contents into a python varriable
pickle_in_x_train.close()                                 # close the file

pickle_in_y_train = open("y_train.pickle","rb")
trainY = pickle.load(pickle_in_y_train)
pickle_in_y_train.close()

pickle_in_x_test = open("x_test.pickle","rb")
testX = pickle.load(pickle_in_x_test)
pickle_in_x_test.close()

pickle_in_y_test = open("y_test.pickle","rb")
testY = pickle.load(pickle_in_y_test)
pickle_in_y_test.close()

print(str(len(trainX)) + ', ' + str(len(testX)))

Expected: 2000, 200


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D

print(len(x_test))

### Now fill out the code in the below 2 cells following the instructions

In [None]:
### NORMALIZE the data (trainX, trainY) from 0-255 to 0-1, and convert trainX,trainY,testX,testY to np arrays
### approx. (1 x 4) lines of code
trainX = np.array(trainX)
trainY = np.array(trainY).reshape(-1, 1)
textX = np.array(testX)
testY = np.array(testY).reshape(-1, 1)
trainX = trainX / 255
testX = testX / 255
print(trainX.shape)
print(trainY.shape)
print(testX.shape)
print(testY.shape)







In [None]:
model = Sequential()

### Use model.add to add layers (example: conv2D layers, then Maxpooling2D layers, Dense)
### Experiment with tf keras documentation to complete the model
### approx 5-12 lines of code, feel free to experiment with different model structures

model.add(Conv2D(filters=16, kernel_size=(6, 6), padding='same', activation='tanh', input_shape=(50, 50, 1)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(filters=18, kernel_size=(5, 5), padding='same', activation='tanh'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(filters=22, kernel_size=(4, 4), padding='same', activation='tanh'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(filters=24, kernel_size=(4, 4), padding='same', activation='tanh'))
model.add(MaxPooling2D(2, 2))
model.add(Flatten())
model.add(Dense(100, activation='tanh'))
model.add(Dense(100, activation='tanh'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='adam', metrics=tf.keras.metrics.AUC())
model.summary()

In [None]:
model.fit(trainX, trainY, batch_size=32, epochs=5, validation_split=0.3)

In [None]:
score = model.evaluate(trainX, trainY, verbose = 1) 

print('Train loss:', score[0]) 
print('Train accuracy:', score[1])

#### Expected training accuracy 90-98% 

In [None]:
score = model.evaluate(testX, testY, verbose = 0) 

print('Test loss:', score[0]) 
print('Test accuracy:', score[1])

#### Expected Test Accuracy 70-80%