In [1]:
import os # to read and load data
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
def preprocess_images(img):
    img = cv2.resize(img, (224, 224))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = img / 255.0
    img = img.flatten()
    return img

In [3]:
data = []
labels = []

covid_folder_path = "C:/Users/User/ML project/COVID19-xray/COVID"
for filename in os.listdir(covid_folder_path):
    img_path = os.path.join(covid_folder_path, filename)
    img = cv2.imread(img_path)
    img = preprocess_images(img)
    data.append(img)
    labels.append(1)

In [4]:
normal_folder_path = "C:/Users/User/ML project/NORMALXray/NORMALc"
for filename in os.listdir(normal_folder_path):
    img_path = os.path.join(normal_folder_path, filename)
    img = cv2.imread(img_path)
    img = preprocess_images(img)
    data.append(img)
    labels.append(0)



In [5]:
# counting zeros and ones
one = 0
zero = 0
for i in labels:
  if i == 1:
    one += 1
  else:
    zero += 1
print(one, zero)

1060 1977


In [6]:
data = np.array(data)
labels = np.array(labels)

In [7]:
data.shape

(3037, 50176)

In [8]:
labels.shape

(3037,)

In [9]:
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=44)

In [10]:
print("Train", X_train.shape, y_train.shape)
print('Test', X_test.shape, y_test.shape)

Train (2429, 50176) (2429,)
Test (608, 50176) (608,)


### KNN

In [11]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.feature_selection import SelectKBest,chi2
from sklearn.pipeline import Pipeline


In [13]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_neighbors': [3, 5, 7],
    'weights': ['uniform', 'distance'],
    'p': [1, 2]
}
knn = KNeighborsClassifier()
grid_search = GridSearchCV(knn, param_grid, cv=5)
grid_search.fit(X_train, y_train)


In [15]:
from sklearn.metrics import accuracy_score
best_params = grid_search.best_params_
knn_best = grid_search.best_estimator_

y_pred = knn_best.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("Best Hyperparameters:", best_params)
print("Accuracy on Test Set:", accuracy)


Best Hyperparameters: {'n_neighbors': 3, 'p': 2, 'weights': 'distance'}
Accuracy on Test Set: 0.9851973684210527


In [17]:
from sklearn.metrics import classification_report
print('KNN classification report /n', classification_report(y_test, y_pred))

KNN classification report /n               precision    recall  f1-score   support

           0       0.99      0.99      0.99       405
           1       0.98      0.97      0.98       203

    accuracy                           0.99       608
   macro avg       0.99      0.98      0.98       608
weighted avg       0.99      0.99      0.99       608



### Logistic Regression

In [19]:
from sklearn.linear_model import LogisticRegression
param_grid = {
    'penalty': ['l1', 'l2'],
    'C': [0.1, 1.0, 10.0]
}
logreg = LogisticRegression()
grid_search_lr = GridSearchCV(logreg, param_grid, cv=5)
grid_search_lr.fit(X_train, y_train)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [20]:
best_params_lr = grid_search.best_params_
logreg_best_lr = grid_search.best_estimator_

ypred_1 = logreg_best_lr.predict(X_test)
accuracy_lr = accuracy_score(y_test, ypred_1)

print("Best Hyperparameters:", best_params_lr)
print("Accuracy on Test Set:", accuracy_lr)


Best Hyperparameters: {'n_neighbors': 3, 'p': 2, 'weights': 'distance'}
Accuracy on Test Set: 0.9851973684210527


### Naive Bayes

In [21]:
from sklearn.naive_bayes import GaussianNB
param_grid = {
    'var_smoothing': [1e-9, 1e-8, 1e-7]
}
naive_bayes = GaussianNB()
grid_search_nb = GridSearchCV(naive_bayes, param_grid, cv=5)
grid_search_nb.fit(X_train, y_train)
 
    

In [24]:
best_params_nb = grid_search.best_params_
naive_bayes_best_nb = grid_search.best_estimator_

ypred_2 = naive_bayes_best_nb.predict(X_test)
accuracy_nb = accuracy_score(y_test, ypred_2)

print("Best Hyperparameters:", best_params_nb)
print("Accuracy on Test Set:", accuracy_nb)


Best Hyperparameters: {'n_neighbors': 3, 'p': 2, 'weights': 'distance'}
Accuracy on Test Set: 0.9851973684210527


### Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier
param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2']
}
dt = DecisionTreeClassifier()

grid_search_dt = GridSearchCV(dt, param_grid, cv=5)
grid_search_dt.fit(X_train, y_train)
















In [None]:
best_params_dt = grid_search.best_params_
decision_tree_best = grid_search.best_estimator_

ypred_3 = decision_tree_best.predict(X_test)
accuracy_dt = accuracy_score(y_test, ypred_3)

print("Best Hyperparameters:", best_params_dt)
print("Accuracy on Test Set:", accuracy_dt)
