In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import random

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.cluster import DBSCAN
from sklearn.cluster import AgglomerativeClustering

In [2]:
imgs=[]

p1='TB_Chest_Radiography_Database/Normal'
l1=os.listdir('TB_Chest_Radiography_Database/Normal')

p2='TB_Chest_Radiography_Database/Tuberculosis'
l2=os.listdir('TB_Chest_Radiography_Database/Tuberculosis')

print('Number of Normal Images:',len(l1))
print('Number of Tuberculosis Images:',len(l2))

# for i in l1:
#     imgs.append((plt.imread(p1+'/'+i),0))

i1_=list(range(len(l1)))
random.shuffle(i1_)

no=len(l2)
# no=len(l1)

for i in i1_[:no]:
    m1=plt.imread(p1+'/'+l1[i])
    if len(m1.shape)==3:
        imgs.append((np.mean(m1,axis=2),0))
    else:
        imgs.append((m1,0))
    
for i in l2:
    m2=plt.imread(p2+'/'+i)
    if len(m2.shape)==3:
        imgs.append((np.mean(m2,axis=2),1))
    else:
        imgs.append((m2,1))

Number of Normal Images: 3500
Number of Tuberculosis Images: 700


In [3]:
print('Total Images taken to work with:',len(imgs))

i_=list(range(len(imgs)))
random.shuffle(i_) 

x=[]
y=[]
for i in i_:
    x.append(imgs[i][0].flatten())
    y.append(imgs[i][1])
    
x[0].shape

Total Images taken to work with: 1400


(262144,)

In [4]:
xtr,xte,ytr,yte=train_test_split(x, y, test_size=0.33, random_state=42)

In [5]:
len(xtr)

938

In [7]:
pca=PCA(n_components=len(xtr))
xtr1=pca.fit_transform(xtr)
xte1=pca.transform(xte)

In [13]:
model=KMeans(n_clusters=2, random_state=0)
model.fit(xtr1)
ytrpred=model.predict(xtr1)
ytepred=model.predict(xte1)

print('Training Report:')
print(classification_report(ytr,ytrpred))
print('Testing Report:')
print(classification_report(yte,ytepred))

Training Report:
              precision    recall  f1-score   support

           0       0.45      0.52      0.48       448
           1       0.49      0.42      0.45       490

    accuracy                           0.47       938
   macro avg       0.47      0.47      0.47       938
weighted avg       0.47      0.47      0.47       938

Testing Report:
              precision    recall  f1-score   support

           0       0.53      0.55      0.54       252
           1       0.43      0.41      0.42       210

    accuracy                           0.49       462
   macro avg       0.48      0.48      0.48       462
weighted avg       0.49      0.49      0.49       462



In [14]:
print('Training Report:')
print(classification_report(ytr,1-np.array(ytrpred)))
print('Testing Report:')
print(classification_report(yte,1-np.array(ytepred)))

Training Report:
              precision    recall  f1-score   support

           0       0.51      0.48      0.50       448
           1       0.55      0.58      0.56       490

    accuracy                           0.53       938
   macro avg       0.53      0.53      0.53       938
weighted avg       0.53      0.53      0.53       938

Testing Report:
              precision    recall  f1-score   support

           0       0.56      0.45      0.50       252
           1       0.47      0.59      0.52       210

    accuracy                           0.51       462
   macro avg       0.52      0.52      0.51       462
weighted avg       0.52      0.51      0.51       462



In [8]:
model=GaussianMixture(n_components=2, random_state=0)
model.fit(xtr1)
ytrpred=model.predict(xtr1)
ytepred=model.predict(xte1)

print('Training Report:')
print(classification_report(ytr,ytrpred))
print('Testing Report:')
print(classification_report(yte,ytepred))

Training Report:
              precision    recall  f1-score   support

           0       0.48      0.55      0.51       466
           1       0.48      0.41      0.44       472

    accuracy                           0.48       938
   macro avg       0.48      0.48      0.48       938
weighted avg       0.48      0.48      0.48       938

Testing Report:
              precision    recall  f1-score   support

           0       0.55      0.89      0.68       234
           1       0.69      0.25      0.36       228

    accuracy                           0.57       462
   macro avg       0.62      0.57      0.52       462
weighted avg       0.62      0.57      0.52       462



In [9]:
print('Training Report:')
print(classification_report(ytr,1-np.array(ytrpred)))
print('Testing Report:')
print(classification_report(yte,1-np.array(ytepred)))

Training Report:
              precision    recall  f1-score   support

           0       0.52      0.45      0.48       466
           1       0.52      0.59      0.55       472

    accuracy                           0.52       938
   macro avg       0.52      0.52      0.52       938
weighted avg       0.52      0.52      0.52       938

Testing Report:
              precision    recall  f1-score   support

           0       0.31      0.11      0.16       234
           1       0.45      0.75      0.56       228

    accuracy                           0.43       462
   macro avg       0.38      0.43      0.36       462
weighted avg       0.38      0.43      0.36       462



In [12]:
model=AgglomerativeClustering(n_clusters=2, linkage='complete')
ytepred=model.fit_predict(xte)

print('Testing Report:')
print(classification_report(yte,ytepred))

Testing Report:
              precision    recall  f1-score   support

           0       0.53      0.84      0.65       234
           1       0.57      0.22      0.32       228

    accuracy                           0.53       462
   macro avg       0.55      0.53      0.48       462
weighted avg       0.55      0.53      0.48       462



In [13]:
print('Testing Report:')
print(classification_report(yte,1-np.array(ytepred)))

Testing Report:
              precision    recall  f1-score   support

           0       0.43      0.16      0.23       234
           1       0.47      0.78      0.59       228

    accuracy                           0.47       462
   macro avg       0.45      0.47      0.41       462
weighted avg       0.45      0.47      0.41       462

