In [1]:
import numpy as np
import pandas as pd
import cv2
import os
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn import tree
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
import tensorflow as tf
from tensorflow import keras
from skimage.transform import resize
from skimage.io import imread

In [2]:
Categories=['NORMAL','PNEUMONIA']



In [3]:
def load_image(datadir, Categories):

    flat_data_arr=[]
    target_arr=[]

    for i in Categories:

        print(f'loading... category : {i}')
        path=os.path.join(datadir,i)
        for img in os.listdir(path):
            img_array=imread(os.path.join(path,img))
            img_resized=resize(img_array,(150,150,3))
            flat_data_arr.append(img_resized.flatten())
            target_arr.append(Categories.index(i))
        print(f'loaded category:{i} successfully')

    flat_data=np.array(flat_data_arr)
    target=np.array(target_arr)

    df=pd.DataFrame(flat_data)
    df['Target']=target

    x=df.iloc[:,:-1]
    y=df.iloc[:,-1]
    return x, y

In [5]:
datadir = 'D:/github/archive/chest_xray/train'
X, y = load_image(datadir, Categories)


loading... category : NORMAL
loaded category:NORMAL successfully
loading... category : PNEUMONIA
loaded category:PNEUMONIA successfully


In [9]:
datadir2 = 'D:/github/archive/chest_xray/test'
X_test2, y_test2 = load_image(datadir2, Categories)

loading... category : NORMAL
loaded category:NORMAL successfully
loading... category : PNEUMONIA
loaded category:PNEUMONIA successfully


In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [7]:
rf1 = RandomForestClassifier(criterion='log_loss')
rf2 = RandomForestClassifier(criterion='gini')
rf3 = RandomForestClassifier(criterion='entropy')
decision_tree = tree.DecisionTreeClassifier()
naive_bayes = GaussianNB()
gb = GradientBoostingClassifier()
svc = SVC()
lr = LogisticRegression()

In [8]:
rf1.fit(X_train,y_train)
rf2.fit(X_train,y_train)
rf3.fit(X_train,y_train)
naive_bayes.fit(X_train, y_train)
decision_tree.fit(X_train, y_train)

print("Random Forest Using log_loss: ", rf1.score(X_test, y_test))
print("Random Forest Using gini: ", rf2.score(X_test, y_test))
print("Random Forest Using entropy: ", rf3.score(X_test, y_test))
print("Using Decision Tree: ", decision_tree.score(X_test, y_test))


Random Forest Using log_loss:  0.9571884984025559
Random Forest Using gini:  0.9514376996805112
Random Forest Using entropy:  0.9533546325878595
Using Decision Tree:  0.8773162939297124


In [10]:
print("Random Forest Using log_loss: ", rf1.score(X_test2, y_test2))
print("Random Forest Using gini: ", rf2.score(X_test2, y_test2))
print("Random Forest Using entropy: ", rf3.score(X_test2, y_test2))
print("Using Decision Tree: ", decision_tree.score(X_test2, y_test2))

Random Forest Using log_loss:  0.7724358974358975
Random Forest Using gini:  0.7387820512820513
Random Forest Using entropy:  0.782051282051282
Using Decision Tree:  0.7339743589743589
