In [1]:
# !python --version
# python 3.7.9

In [11]:
# import all the packages that we need for now.
import os                   
import pandas as pd          
import numpy as np          
import matplotlib.pyplot as plt   
import cv2    
from tqdm.notebook import tqdm
from sklearn.utils import class_weight, shuffle
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D
from keras.layers import Dense,Activation,Flatten,Dropout

# Loading & Organizing Data 

In [3]:
path = os.getcwd()
labels = pd.read_csv(path + '/covid_image_data/Training_set_covid.csv')  
file_paths = [[file, path +'/covid_image_data/train/' + file] for file in labels['filename']]

# Convert the file names and paths we just saved into a dataframe.
imagesDF = pd.DataFrame(file_paths, columns=['filename', 'filepaths'])

# merge the labels with the covid-19 status we saved earlier to the dataframe we just created.
train_data = pd.merge(imagesDF, labels)

train_data['label'] = train_data['label'].astype(str)

# look how organized and nice it is.
train_data.head()

Unnamed: 0,filename,filepaths,label
0,Image_1.jpg,/Users/boo/Desktop/2Datathon/covid_image_data/...,1
1,Image_2.jpg,/Users/boo/Desktop/2Datathon/covid_image_data/...,0
2,Image_3.jpg,/Users/boo/Desktop/2Datathon/covid_image_data/...,0
3,Image_4.jpg,/Users/boo/Desktop/2Datathon/covid_image_data/...,0
4,Image_5.jpg,/Users/boo/Desktop/2Datathon/covid_image_data/...,0


# Data Pre-processing

In [4]:
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        validation_split=0.2,
        )
valid_datagen= ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)
test_datagen=ImageDataGenerator(rescale=1./255)

In [5]:
direct = path + '/covid_image_data/train'

train_generator=train_datagen.flow_from_dataframe(
dataframe=train_data,
directory=direct,
x_col="filename",
y_col="label",
subset="training",
batch_size=32,
seed=42,
shuffle=True,
class_mode="categorical",
target_size=(224,224),
)

valid_generator=valid_datagen.flow_from_dataframe(
dataframe=train_data,
directory=direct,
x_col="filename",
y_col="label",
validation_split=0.2,
subset="validation",
batch_size=32,
seed=42,
shuffle=True,
class_mode="categorical",
target_size=(224,224)
)

# total # of images = 3479

Found 2784 validated image filenames belonging to 2 classes.
Found 695 validated image filenames belonging to 2 classes.


# making our model

In [12]:
learning_rate = 1e-4
epochs = 10

model = Sequential()

model.add(Conv2D(32,(3,3), input_shape=(224, 224,3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(32,(3,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(2))
model.add(Activation('softmax'))

opt = Adam(lr=learning_rate, decay=learning_rate / epochs)

model.compile(loss="binary_crossentropy", optimizer=opt, 
              metrics=["accuracy"])

#history = model.fit(data,target,epochs=1,validation_split=0.2)

# Summary of our model
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 224, 224, 32)      896       
_________________________________________________________________
activation (Activation)      (None, 224, 224, 32)      0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 112, 112, 32)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 110, 110, 32)      9248      
_________________________________________________________________
activation_1 (Activation)    (None, 110, 110, 32)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 55, 55, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 53, 53, 64)       

In [13]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
history=model.fit(train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=3
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


# Saving test data

In [14]:
labels = pd.read_csv(path + '/covid_image_data/Testing_set_covid.csv')
file_paths = [[file, path +'/covid_image_data/test/' + file] for file in labels['filename']]
test_df = pd.DataFrame(file_paths, columns=['filename', 'filepaths'])
test_data = pd.merge(test_df, labels)

test_data.head()

Unnamed: 0,filename,filepaths
0,Image_1.jpg,/Users/boo/Desktop/2Datathon/covid_image_data/...
1,Image_2.jpg,/Users/boo/Desktop/2Datathon/covid_image_data/...
2,Image_3.jpg,/Users/boo/Desktop/2Datathon/covid_image_data/...
3,Image_4.jpg,/Users/boo/Desktop/2Datathon/covid_image_data/...
4,Image_5.jpg,/Users/boo/Desktop/2Datathon/covid_image_data/...


In [17]:
testPath = path + '/covid_image_data/test'
test_datagen=ImageDataGenerator(rescale=1./255.)
test_generator=test_datagen.flow_from_dataframe(
dataframe=test_data,
directory=testPath,
x_col="filename",
y_col=None,
batch_size=32,
seed=42,
shuffle=False,
class_mode=None,
target_size=(224,224))

Found 870 validated image filenames.


In [20]:
predicted_labels = model.predict(test_generator)
predicted_labels = np.argmax(predicted_labels,axis=1)

In [28]:
#print(predicted_labels)

In [26]:
final_labels = [i for i in predicted_labels]

In [27]:
final_labels = pd.DataFrame(final_labels, columns=['prediction']) 
final_labels.to_csv(path+'/final.csv',index=False)