## Installing Libraries and Dependencies

In [1]:
import pandas as pd
import numpy as np
import random as rd
import seaborn as sns
import lightgbm as lgb
import os
import cv2
import glob
import time
import matplotlib.pyplot as plt 
from sklearn.metrics import *
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split

## Dataset Description and Preprocessing

In [2]:
RESIZE_SHAPE = (128, 128)

In [3]:
def normalized_read(paths):
    all_images = []
    for path in paths:
        for i in range(len(path)):
            img = cv2.imread(path[i], 0)
            normalized_img = cv2.resize(img, RESIZE_SHAPE)
            all_images.append(normalized_img.flatten())
    return np.array(all_images)

def show_img(img):
    return plt.imshow(np.reshape(img, RESIZE_SHAPE), cmap='gray')

In [4]:
def results(y_test, y_pred):
    print("Accuracy Score: ", accuracy_score(y_test, y_pred))
    print("F1 Score: ", f1_score(y_test, y_pred))
    print("Recall Score: ", recall_score(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))

In [5]:
train_covid  = glob.glob("D:\YEAR 2\SEMESTER 2\PATTERN RECOGNITION & ML\ML PROJECT\Dataset Main\\train\COVID\\*")
test_covid  = glob.glob("D:\YEAR 2\SEMESTER 2\PATTERN RECOGNITION & ML\ML PROJECT\Dataset Main\\test\COVID\\*")
val_covid  = glob.glob("D:\YEAR 2\SEMESTER 2\PATTERN RECOGNITION & ML\ML PROJECT\Dataset Main\\val\COVID\\*")

train_non_covid  = glob.glob("D:\YEAR 2\SEMESTER 2\PATTERN RECOGNITION & ML\ML PROJECT\Dataset Main\\train\\Non-COVID\\*")
test_non_covid  = glob.glob("D:\YEAR 2\SEMESTER 2\PATTERN RECOGNITION & ML\ML PROJECT\Dataset Main\\test\\Non-COVID\\*")
val_non_covid  = glob.glob("D:\YEAR 2\SEMESTER 2\PATTERN RECOGNITION & ML\ML PROJECT\Dataset Main\\val\\Non-COVID\\*")

In [6]:
train_dataset_w = normalized_read([train_covid, train_non_covid])
val_dataset_w = normalized_read([val_covid, val_non_covid])
test_dataset_w = normalized_read([test_covid, test_non_covid])

In [7]:
train_labels = np.append(np.ones(len(train_covid)), np.zeros(len(train_non_covid)))
val_labels = np.append(np.ones(len(val_covid)), np.zeros(len(val_non_covid)))
test_labels = np.append(np.ones(len(test_covid)), np.zeros(len(test_non_covid)))

In [8]:
train_dataset = pd.DataFrame(train_dataset_w)
train_dataset["label"] = train_labels
val_dataset = pd.DataFrame(val_dataset_w)
val_dataset["label"] = val_labels
test_dataset = pd.DataFrame(test_dataset_w)
test_dataset["label"] = test_labels

In [9]:
train_dataset = np.array(train_dataset)
val_dataset = np.array(val_dataset)
test_dataset = np.array(test_dataset)

## Splitting The Dataset

In [10]:
X_train = train_dataset[:, :-1]
y_train = train_dataset[:, -1]

X_val = val_dataset[:, :-1]
y_val = val_dataset[:, -1]

X_test = test_dataset[:, :-1]
y_test = test_dataset[:, -1]

## Applying LGBM Classifier Without LDA/PCA

In [11]:
import lightgbm as lgb
start = time.time()
lgb_model = lgb.LGBMClassifier()
lgb_model.fit(X_train, y_train)
end = time.time()
print("Training time: %s" % str(end-start))
lgb_pred = lgb_model.predict(X_val)
results(y_val, lgb_pred)

Training time: 95.73385286331177
Accuracy Score:  0.883
F1 Score:  0.8851815505397449
Recall Score:  0.902
Classification Report:
               precision    recall  f1-score   support

         0.0       0.90      0.86      0.88       500
         1.0       0.87      0.90      0.89       500

    accuracy                           0.88      1000
   macro avg       0.88      0.88      0.88      1000
weighted avg       0.88      0.88      0.88      1000



## Applying LGBM Classifier With PCA

In [12]:
train_data = pd.DataFrame(train_dataset_w)
test_data = pd.DataFrame(test_dataset_w)
val_data = pd.DataFrame(val_dataset_w)

In [13]:
from sklearn.decomposition import PCA
pca = PCA(n_components= 0.99)
pca.fit(train_data)

reduced_train_data = pca.transform(train_data)
reduced_test_data = pca.transform(test_data)
reduced_val_data = pca.transform(val_data)

pca_train_data = pd.DataFrame(reduced_train_data)
pca_test_data = pd.DataFrame(reduced_test_data)
pca_val_data = pd.DataFrame(reduced_val_data)

In [14]:
start = time.time()
lgb_model_pca = lgb.LGBMClassifier()
lgb_model_pca.fit(pca_train_data, y_train)
end = time.time()
print("Training time: %s" % str(end-start))
lgb_pred_pca = lgb_model_pca.predict(pca_val_data)
results(y_val, lgb_pred_pca)

Training time: 9.354799032211304
Accuracy Score:  0.834
F1 Score:  0.8394584139264991
Recall Score:  0.868
Classification Report:
               precision    recall  f1-score   support

         0.0       0.86      0.80      0.83       500
         1.0       0.81      0.87      0.84       500

    accuracy                           0.83      1000
   macro avg       0.84      0.83      0.83      1000
weighted avg       0.84      0.83      0.83      1000



## Applying LGBM Classifier With LDA

In [15]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
lda = LDA(n_components=None)
lda.fit(train_data, y_train)

rtrain_data = lda.transform(train_data)
rtest_data = lda.transform(test_data)
rval_data = lda.transform(val_data)

lda_train_data = pd.DataFrame(rtrain_data)
lda_test_data = pd.DataFrame(rtest_data)
lda_val_data = pd.DataFrame(rval_data)

In [16]:
start = time.time()
lgb_model_lda = lgb.LGBMClassifier()
lgb_model_lda.fit(lda_train_data, y_train)
end = time.time()
print("Training time: %s" % str(end-start))
lgb_pred_lda = lgb_model_lda.predict(lda_val_data)
results(y_val, lgb_pred_lda)

Training time: 0.10960626602172852
Accuracy Score:  0.763
F1 Score:  0.7622868605817452
Recall Score:  0.76
Classification Report:
               precision    recall  f1-score   support

         0.0       0.76      0.77      0.76       500
         1.0       0.76      0.76      0.76       500

    accuracy                           0.76      1000
   macro avg       0.76      0.76      0.76      1000
weighted avg       0.76      0.76      0.76      1000



## Save The Model

In [17]:
import joblib
joblib.dump(lgb_model, 'LGBM.pkl')    # Save the model as a pickle in a file

['LGBM.pkl']