In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import pydicom as dicom

# Data visualization: Dataframe, samples and classes

In [None]:
train_df=pd.read_csv('/kaggle/input/vinbigdata-chest-xray-abnormalities-detection/train.csv')
train_df.sample(10)

In [None]:
class_sample_counts=train_df.groupby(['class_name','class_id']).size().reset_index().rename(columns={0:'count'})
class_sample_counts.sort_values(by='class_id',inplace=True)
class_sample_counts.reset_index(drop=True)

In [None]:
class_name=list((class_sample_counts['class_name']))
print(f'The total number of classes is {len(class_name)}')
print(class_name)

In [None]:
plt.figure(figsize=(15,5))
sns.set(font_scale = 1.5)
g=sns.barplot(x='class_name',y='count',data=class_sample_counts)
g=g.set_xticklabels(g.get_xticklabels(), rotation=90)

# Visualization of bounding box area of each disease class

In [None]:
train_df_abnormal=train_df[train_df['class_id']!=14].reset_index(drop=True)
train_df_abnormal

In [None]:
train_df_abnormal['area']=(train_df_abnormal['x_max']-train_df_abnormal['x_min'])*(train_df_abnormal['y_max']-train_df_abnormal['y_min'])

In [None]:
class_wise_area=train_df_abnormal.groupby(['class_name','class_id'])['area'].mean().reset_index()
class_wise_area

In [None]:
plt.figure(figsize=(15,5))
sns.set(font_scale = 1.5)
g=sns.barplot(x='class_name',y='area',data=class_wise_area)
g=g.set_xticklabels(g.get_xticklabels(), rotation=90)

In [None]:
train_df_abnormal.drop('area',axis=1)

# Histogram

In [None]:
random_num=np.random.randint(low=0,high=len(train_df_abnormal),size=1)
train_df1=train_df_abnormal.iloc[random_num[0]]
train_folder='/kaggle/input/vinbigdata-chest-xray-abnormalities-detection/train'
dicom_image=dicom.dcmread(os.path.join(train_folder,train_df1['image_id'])+'.dicom')  
pixel_data = dicom_image.pixel_array   
pi = dicom_image['PhotometricInterpretation'].value
if pi == 'MONOCHROME1':
    pixel_data = np.abs(pixel_data.max()- pixel_data) 
plt.subplots(1,2,figsize=(20,5))
plt.subplot(1,2,1)
plt.title('Xray image')
plt.imshow(pixel_data,'gray')
plt.subplot(1,2,2)
plt.title('Histogram of raw image')
plt.xlabel('Intensity')
plt.ylabel('Frequency')
g=plt.hist(pixel_data.ravel(),bins=100)

In [None]:
pixel_data_norm=pixel_data.copy()
cv2.normalize(pixel_data, pixel_data_norm, 0, 255, cv2.NORM_MINMAX)
plt.subplots(1,2,figsize=(20,5))
plt.subplot(1,2,1)
plt.imshow(pixel_data_norm,'gray')
plt.subplot(1,2,2)
g=plt.hist(pixel_data_norm.ravel(),bins=50)

In [None]:
pixel_data_norm1=np.asarray(pixel_data_norm,np.uint8)
pixel_data_norm1=cv2.equalizeHist(pixel_data_norm1)
plt.subplots(1,2,figsize=(20,5))
plt.subplot(1,2,1)
plt.imshow(pixel_data_norm1,'gray')
plt.subplot(1,2,2)
plt.xlabel('Intensity')
plt.ylabel('Frequency')
g=plt.hist(pixel_data_norm1.ravel(),bins=30)

### Bounding box histogram

In [None]:
x1=int(train_df1['x_min'])
x2=int(train_df1['x_max'])
y1=int(train_df1['y_min'])
y2=int(train_df1['y_max'])
roi_org=pixel_data[y1:y2,x1:x2]
roi_normalized=pixel_data_norm[y1:y2,x1:x2]
roi_equalized=pixel_data_norm1[y1:y2,x1:x2]
plt.subplots(1,3,figsize=(25,5))
plt.subplot(1,3,1)
plt.title('Raw Image')
plt.xlabel('Intensity')
plt.ylabel('Frequency')
g1=plt.hist(roi_org.ravel(),bins=20)
plt.subplot(1,3,2)
g2=plt.hist(roi_normalized.ravel(),bins=20)
plt.title('Normalized Image')
plt.xlabel('Intensity')
plt.ylabel('Frequency')
plt.subplot(1,3,3)
g3=plt.hist(roi_equalized.ravel(),bins=20)
g3=plt.title('Equalized Image')
g3=plt.xlabel('Intensity')
g3=plt.ylabel('Frequency')
                          

# Visualization of samples with bounding box

In [None]:
number_of_samples=4
images_id=np.random.randint(low=0,high=len(train_df_abnormal),size=number_of_samples)
ncols=min(number_of_samples,4)
nrows=int(np.ceil(number_of_samples/4))
plt.subplots(nrows,ncols,figsize=(ncols*5,nrows*5))
for i in range(number_of_samples):
    train_df1=train_df_abnormal.iloc[images_id[i]]
    train_folder='/kaggle/input/vinbigdata-chest-xray-abnormalities-detection/train'
    dicom_image=dicom.dcmread(os.path.join(train_folder,train_df1['image_id'])+'.dicom')    
    pixel_data = dicom_image.pixel_array   
    pi = dicom_image['PhotometricInterpretation'].value
   # pixel_data = (pixel_data - pixel_data.min())/ (pixel_data.max() - pixel_data.min())
    if pi == 'MONOCHROME1':
        pixel_data = np.abs(pixel_data.max()- pixel_data)    
    cv2.normalize(pixel_data, pixel_data, 0, 255, cv2.NORM_MINMAX)
    pixel_data=np.asarray(pixel_data,np.uint8)
    pixel_data=cv2.equalizeHist(pixel_data)
    pixel_data=cv2.cvtColor(pixel_data,cv2.COLOR_GRAY2RGB)
    cv2.rectangle(pixel_data,(int(train_df1['x_min']),int(train_df1['y_min'])),((int(train_df1['x_max']),int(train_df1['y_max']))),(0,255,0),20)
    plt.subplot(nrows,ncols,i+1)
    plt.imshow(pixel_data)
    plt.title(train_df1['class_name'])