# Covid vs. Non-Covid CT scan classification

### ICCV 2021 Workshop: MIA-COV19D Competition
### Test Data set analysis


#### Author: Mohammad Nayeem Teli

 ### Import libraries

In [95]:

import os,csv
import numpy as np
import pandas as pd
import math

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import Model,models
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.preprocessing import image
import tensorflow_addons as tfa


from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

%matplotlib inline

import matplotlib.pyplot as plt
import matplotlib.image as mpimg


### Data directories and listing some files

In [197]:
base_dir = 'ICCV2021Data/COV19DCompetition/Data/test_subsets/'
# test_dir = os.path.join(base_dir,'subset1/')
# test_dir = os.path.join(base_dir,'subset2/')
# test_dir = os.path.join(base_dir,'subset3/')
# test_dir = os.path.join(base_dir,'subset4/')
# test_dir = os.path.join(base_dir,'subset5/')
# test_dir = os.path.join(base_dir,'subset6/')
# test_dir = os.path.join(base_dir,'subset7/')
# test_dir = os.path.join(base_dir,'subset8/')

## Test Data Analysis

### Number of test files in different sub folders

In [None]:
totalFiles = 0
totalDir = 0

for base, dirs, files in os.walk(test_dir):
#     print('Searching in : ',base)
    for directories in dirs:
        totalDir += 1
    for Files in files:
        _,ending = os.path.splitext(Files)
        if ending == ".jpg":
            totalFiles += 1


print('Total number of files',totalFiles)
print('Total Number of directories',totalDir)
print('Total:',(totalDir + totalFiles))

In [199]:
def get_image_names(folder):
    fs = []
    for subdir, dirs, files in os.walk(folder, topdown=True):
        for f in files:
            _,ending = os.path.splitext(f)
            if ending == ".jpg":
                fs.append(os.path.join(subdir,f))
    return(fs)

test_image_list = get_image_names(test_dir)
                
assert(totalFiles==len(test_image_list))  




### Load the trained model

In [200]:
model = models.load_model("model.h5")

In [201]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 256, 256, 16)      448       
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 256, 256, 16)      0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 128, 128, 16)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 128, 128, 32)      4640      
_________________________________________________________________
batch_normalization (BatchNo (None, 128, 128, 32)      128       
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 128, 128, 32)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 64, 64, 32)        0

### Data generator for test set

In [None]:
test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_directory(
            test_dir,
            target_size =(256,256),
            batch_size=32,
            class_mode='binary',
            shuffle=False)

In [203]:
# test_generator.reset()

### Make Predictions on the test set

In [None]:

preds = model.predict(test_generator,steps= len(test_generator),workers=8,verbose=1)

#### Get the class labels

In [205]:
# Get class labels
cls = np.round(preds)
file_names = test_generator.filenames

#### Set the class labels corresponding to covid and non-covid class labels (covid : 0, non-covid: 1)

In [206]:
# Final results in a Data Frame that can be saved in .csv file
results=pd.DataFrame({"file":file_names,"pr":preds[:,0], "class":cls[:,0]})

In [207]:
np.unique(results['class'])

array([0., 1.], dtype=float32)

#### Save the predicted labels for each subset 

In [208]:
# results.to_csv('subset1.csv', index=False)
# results.to_csv('subset2.csv', index=False)
# results.to_csv('subset3.csv', index=False)
# results.to_csv('subset4.csv', index=False)
# results.to_csv('subset5.csv', index=False)
# results.to_csv('subset6.csv', index=False)
# results.to_csv('subset7.csv', index=False)
# results.to_csv('subset8.csv', index=False)

#### Make a copy of the dataframe to modify it. Separating the folder and the image name

In [209]:
df = results.copy()

In [211]:
df[['folder','filename']] = df.file.str.split("/",expand=True)

#### Store the folder name and the prediction of the dataframe into a dictionary

In [213]:
d = df.set_index('folder').to_dict()['class']

#### Store the folders as covid or non-covid in the respective files

In [215]:
with open('covid.csv','a') as f:
    for key in d.keys():
        if d[key]==0.0:
            f.write("%s,"% key)

In [216]:
with open('non-covid.csv','a') as f:
    for key in d.keys():
        if d[key]==1.0:
            f.write("%s,"% key)