In [3]:
import os
import cv2
import numpy as np
import pywt

In [4]:
root_directory = "./Data/"

train_data=[] # X_train
test_data=[] # X_test
train_label=[] # y_train
test_label=[] # y_test

img_row,img_col = 32,32

num_of_classes = 2

data_type =["train","test"]
folder_names=["covid_without_PNEUMONIA","covid_with_PNEUMONIA"]
label_mapping={"covid_without_PNEUMONIA":0,"covid_with_PNEUMONIA":1}

In [5]:
def w2d(img, mode='haar', level=1): #skipped
    imArray = img

    imArray = cv2.cvtColor( imArray,cv2.COLOR_RGB2GRAY )
    #convert to float
    imArray =  np.float32(imArray)   
    imArray /= 255;

    coeffs=pywt.wavedec2(imArray, mode, level=level)


    coeffs_H=list(coeffs)  
    coeffs_H[0] *= 0;  

    imArray_H=pywt.waverec2(coeffs_H, mode);
    imArray_H *= 255;
    imArray_H =  np.uint8(imArray_H)

    return imArray_H

In [8]:
def process_img(dataset,folder,filename): 
    file_path = root_directory+"/"+dataset+"/"+folder+"/"+filename
    img = cv2.imread(file_path)
    scalled_raw_img = cv2.resize(img, (32, 32))
    img_har = w2d(img,'db1',5) #dlt
    scalled_img_har = cv2.resize(img_har, (32, 32)) #dlt 
    combined_img = np.vstack((scalled_raw_img.reshape(32*32*3,1),scalled_img_har.reshape(32*32,1))) #dlt 
    return combined_img  #scalled_raw_img

In [10]:
for dataset in data_type:
    for folder in folder_names:
        for filename in os.listdir(root_directory+"/"+dataset+"/"+folder+"/"):
            if dataset == "train":
                train_data.append((process_img(dataset,folder,filename))/255.0) #rescale+resize
                train_label.append(label_mapping[folder])
            else:
                test_data.append((process_img(dataset,folder,filename))/255.0)
                test_label.append(label_mapping[folder])

In [11]:
train_data = np.asarray(train_data)
train_label = np.asarray(train_label)

test_data = np.asarray(test_data)
test_label = np.asarray(test_label)

X_train = train_data
X_test = test_data

y_train = train_label 
y_test = test_label 

X_train = np.array(X_train).reshape(len(X_train),4096).astype(float)
X_test = np.array(X_test).reshape(len(X_test),4096).astype(float) # 4096 is vectorized lenght of an image

In [12]:
import pandas as pd
import pandas_profiling as pp
import seaborn as sns
from matplotlib import pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

In [29]:
scores=[] 

best_estimators = {}

model_params = {

'KNeighborsClassifier': {
    'model': KNeighborsClassifier(),
    'params': {
        'n_neighbors': [4,5,6],
        'algorithm' : ['auto','ball_tree','kd_tree'],
        'weights' : ['uniform','distance'],
        'leaf_size' : [27,26,25,24,23,22,21,20,19]
    }
},

'DecisionTreeClassifier': {
    'model': DecisionTreeClassifier(),
    'params': {
        'criterion': ['gini','entropy'],
        'max_depth' : [9,10,11,12,13,14]
    }
},


'AdaBoostClassifier': {
    'model': AdaBoostClassifier(),
    'params': {
        'n_estimators': [40,41,42,43],
        'learning_rate' : [0.8],
        'algorithm' : ['SAMME', 'SAMME.R']
    }
},

'GaussianNB': {
    'model': GaussianNB(),
    'params': {

    }
},

 'LOGISTIC_REGRESSION': {
    'model': LogisticRegression(max_iter=500),
    'params': {
        'C': [1,2,3],
        'solver' : [ 'liblinear', 'lbfgs'],
        'multi_class' : ['auto', 'ovr' ]
    }
},


'SVM': {
    'model': SVC(),
    'params': {
         'C': [5,6,7,8,9],
         'kernel': ['rbf','linear'],
         'gamma': ['auto', 'scale']
    }
},

'RANDOM_FOREST':{
    'model' : RandomForestClassifier(),
    'params': {
        'n_estimators':[12,7,8,9,11,10,13,14,16],
        'criterion': ['entropy'],
        'random_state' : [14,15,13,16],
        'max_depth' : [7,8,6,9]

    }
}
}

In [30]:
for model_name, mp in model_params.items():
    clf = GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)
    print(mp['model'], mp['params'])

    
    clf.fit(X_train, y_train)    
    

    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': str(clf.best_params_)
    })
    best_estimators[model_name] = clf.best_estimator_

KNeighborsClassifier() {'n_neighbors': [6], 'algorithm': ['auto'], 'weights': ['distance'], 'leaf_size': [27]}
DecisionTreeClassifier() {'criterion': ['gini'], 'max_depth': [14]}
AdaBoostClassifier() {'n_estimators': [42], 'learning_rate': [0.8], 'algorithm': ['SAMME']}
GaussianNB() {}
LogisticRegression(max_iter=500) {'C': [1], 'solver': ['lbfgs'], 'multi_class': ['auto']}
SVC() {'C': [9], 'kernel': ['rbf'], 'gamma': ['auto']}
RandomForestClassifier() {'n_estimators': [11], 'criterion': ['entropy'], 'random_state': [13], 'max_depth': [6]}


In [47]:
import pandas as pd    
df2 = pd.DataFrame(scores,columns=['model','best_score','best_params'])
df2 = df2.sort_values(by='best_score',ascending=False)
df2

Unnamed: 0,model,best_score,best_params
0,RANDOM_FOREST,0.935556,"{'criterion': 'entropy', 'max_depth': 6, 'n_es..."
1,SVM,0.935556,"{'C': 9, 'gamma': 'auto', 'kernel': 'rbf'}"
2,KNeighborsClassifier,0.933333,"{'algorithm': 'auto', 'leaf_size': 27, 'n_neig..."
3,GaussianNB,0.927326,{}
4,LOGISTIC_REGRESSION,0.922222,"{'C': 1, 'multi_class': 'auto', 'solver': 'lbf..."
5,AdaBoostClassifier,0.902222,"{'algorithm': 'SAMME', 'learning_rate': 0.8, '..."
6,DecisionTreeClassifier,0.853333,"{'criterion': 'gini', 'max_depth': 14}"
