### IMPORTS

In [203]:
import os
import kagglehub
import pandas as pd
import numpy as np
import matplotlib.image as mpimg
from sklearn.model_selection import train_test_split
from PIL import Image
import numpy as np



### HELPER FUNCTIONS

In [None]:
def get_subdirectories(folder_path):
    subdirectory_path_list = []
    subdirectory_names = os.listdir(folder_path)

    for subdirectories in subdirectory_names:
        full_path = os.path.join(folder_path, subdirectories)
        subdirectory_path_list.append(full_path)
    
    return subdirectory_path_list

def get_image(image_file):
    # Load the image
    image = Image.open(image_file)
    # Convert the image to a NumPy array
    image_array = np.array(image)
    return image_array
    

def preprocess_dataset(class_files):
    label_list  = []

    for i in range(len(class_files)):
        subdirectories = get_subdirectories(class_files[i])
        subdirectories.sort()

        class_files[i] = subdirectories
        
        for sub in subdirectories: 
            label_list.append(i)

    #Combining the classes together
    concated_list = []
    for classes in class_files:
        concated_list.extend(classes)

    #Shuffling the classes
    concated_list = np.array(concated_list)
    label_list = np.array(label_list)

    # Generate a random permutation of indices
    indices = np.random.permutation(len(concated_list))

    # Apply the permutation to both lists
    shuffled_class_list = concated_list[indices]
    shuffled_label_list = label_list[indices]
    
    for i in range(len(shuffled_class_list)):
        shuffled_class_list[i] = get_image(shuffled_class_list[i])

    return shuffled_class_list, shuffled_label_list

In [205]:
path = kagglehub.dataset_download("tawsifurrahman/covid19-radiography-database")
files = os.listdir(path)
print("Path to files", path)
print(files)

Path to files /home/smg0092/.cache/kagglehub/datasets/tawsifurrahman/covid19-radiography-database/versions/5
['COVID-19_Radiography_Dataset']


In [206]:
main_folder = os.path.join(path, files[0])
files_list = get_subdirectories(main_folder)
print(np.array(files_list))

['/home/smg0092/.cache/kagglehub/datasets/tawsifurrahman/covid19-radiography-database/versions/5/COVID-19_Radiography_Dataset/Viral Pneumonia'
 '/home/smg0092/.cache/kagglehub/datasets/tawsifurrahman/covid19-radiography-database/versions/5/COVID-19_Radiography_Dataset/COVID'
 '/home/smg0092/.cache/kagglehub/datasets/tawsifurrahman/covid19-radiography-database/versions/5/COVID-19_Radiography_Dataset/Viral Pneumonia.metadata.xlsx'
 '/home/smg0092/.cache/kagglehub/datasets/tawsifurrahman/covid19-radiography-database/versions/5/COVID-19_Radiography_Dataset/Lung_Opacity.metadata.xlsx'
 '/home/smg0092/.cache/kagglehub/datasets/tawsifurrahman/covid19-radiography-database/versions/5/COVID-19_Radiography_Dataset/COVID.metadata.xlsx'
 '/home/smg0092/.cache/kagglehub/datasets/tawsifurrahman/covid19-radiography-database/versions/5/COVID-19_Radiography_Dataset/Normal'
 '/home/smg0092/.cache/kagglehub/datasets/tawsifurrahman/covid19-radiography-database/versions/5/COVID-19_Radiography_Dataset/Lung_O

In [207]:
#Getting the files from the dataset
normal_file = files_list[5]
normal_subdirectories = get_subdirectories(normal_file)
print(normal_subdirectories)

covid_file = files_list[1]
coivd_subdirectories = get_subdirectories(covid_file)
print(coivd_subdirectories)

covid_images = coivd_subdirectories[1]
covid_mask = coivd_subdirectories[0]

normal_images = normal_subdirectories[1]
normal_mask = normal_subdirectories[0]

['/home/smg0092/.cache/kagglehub/datasets/tawsifurrahman/covid19-radiography-database/versions/5/COVID-19_Radiography_Dataset/Normal/masks', '/home/smg0092/.cache/kagglehub/datasets/tawsifurrahman/covid19-radiography-database/versions/5/COVID-19_Radiography_Dataset/Normal/images']
['/home/smg0092/.cache/kagglehub/datasets/tawsifurrahman/covid19-radiography-database/versions/5/COVID-19_Radiography_Dataset/COVID/masks', '/home/smg0092/.cache/kagglehub/datasets/tawsifurrahman/covid19-radiography-database/versions/5/COVID-19_Radiography_Dataset/COVID/images']


In [208]:
class_files = [covid_images, normal_images]
class_files, label_list = preprocess_dataset(class_files)

X, y = class_files, label_list

[[116  94  76 ... 100 101 102]
 [112  91  75 ...  67  68  70]
 [111  89  73 ...  47  48  49]
 ...
 [ 79  60  48 ...  13  19  27]
 [ 78  59  48 ...  13  19  28]
 [ 81  61  49 ...  13  19  28]]
[[ 9  6  5 ...  6  6  5]
 [ 6  6  6 ...  7  7  6]
 [ 5  6  6 ...  6  6  6]
 ...
 [ 2 22 47 ...  8  9  8]
 [ 3 24 47 ...  9  9  8]
 [ 4 20 35 ...  7  7  8]]
[[18 41 42 ... 26 27 20]
 [22 39 54 ... 29 30 23]
 [19 36 59 ... 34 34 25]
 ...
 [21 36 51 ...  3  9 11]
 [22 36 51 ...  4 10 13]
 [22 39 53 ...  6 11 16]]
[[235 233 231 ... 231 230 215]
 [228 226 223 ... 228 229 214]
 [218 216 213 ... 225 225 211]
 ...
 [ 22  22  22 ...  22  27  32]
 [ 22  22  22 ...  22  28  33]
 [ 22  22  22 ...  22  27  32]]
[[ 34  14   3 ...   4  12  22]
 [ 32  12   2 ...   1   4   7]
 [ 31  12   3 ...   1   2   3]
 ...
 [  1   1   1 ... 147 191 188]
 [  2   2   2 ... 150 195 192]
 [  3   3   3 ... 155 200 196]]
[[  6   4   3 ...   3   6  11]
 [  6   4   3 ...   3   3   4]
 [  5   3   3 ...   3   3   2]
 ...
 [ 43  31  17 

In [209]:
print(class_files[0])

/home/smg0092/.cache/kagglehub/datasets/tawsifurrahman/covid19-radiography-database/versions/5/COVID-19_Radiography_Dataset/Normal/images/Normal-8482.png


In [210]:
print(X[0])
print(y.shape)

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=2/3, random_state=42)

/home/smg0092/.cache/kagglehub/datasets/tawsifurrahman/covid19-radiography-database/versions/5/COVID-19_Radiography_Dataset/Normal/images/Normal-8482.png
(13808,)


In [211]:
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, LSTM
from tensorflow.keras.metrics import Precision, Recall
from tensorflow.keras.optimizers import Adam

