# Resources:
[ https://www.kaggle.com/dschettler8845/visual-in-depth-eda-vinbigdata-competition-data ]
[ https://www.kaggle.com/awsaf49/vinbigdata-cxr-ad-yolov5-14-class-train ]


# Imports

In [None]:
import numpy as np
import os,math,random
import pandas as pd 
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import pydicom as pyd
from pydicom.pixel_data_handlers.util import apply_voi_lut
from skimage import exposure

import warnings
warnings.filterwarnings('ignore')

* **read xray from this notebook[  ]**

In [None]:
def read_xray(path, voi_lut = True, fix_monochrome = True,equalize_hist=True):
    dicom = pyd.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    
    if equalize_hist:
        data=exposure.equalize_hist(data)
        
    return data


In [None]:
def show_sample_images(df,directory,n,cmap='gray'):
    plt.subplots(math.floor(n/2),2,figsize=(16,math.floor(n/2)*8))
    
    dfs=df.sample(n)
    
    sample_ids=dfs['image_id']
    sample_class=list(dfs['class_name'])
    for i,image_id in enumerate(sample_ids):
        ax=plt.subplot(math.floor(n/2),2,i+1)
        image=read_xray(os.path.join(directory,f'{image_id}.dicom'))
        ax.imshow(image,cmap=cmap)
        plt.title(f'{sample_class[i]}')
        
        #bounding boxes:
        w=dfs.iloc[i]['x_max']-dfs.iloc[i]['x_min']
        h=dfs.iloc[i]['y_max']-dfs.iloc[i]['y_min']
        x_min,y_min=dfs.iloc[i]['x_min'],dfs.iloc[i]['y_min']
        
        p=mpl.patches.Rectangle((x_min,y_min),w,h,ec='r',lw=1,fc='none')
        ax.add_patch(p)
         
    plt.tight_layout()
    plt.axis('off')
    plt.show()  

# **Load data**

In [None]:
cwd='./'
train_dir='../input/vinbigdata-chest-xray-abnormalities-detection/train'
test_dir='../input/vinbigdata-chest-xray-abnormalities-detection/test'

train=pd.read_csv('../input/vinbigdata-chest-xray-abnormalities-detection/train.csv')
sample_sub=pd.read_csv('../input/vinbigdata-chest-xray-abnormalities-detection/sample_submission.csv')
train.head()

In [None]:
show_sample_images(df=train,directory=train_dir,n=40,cmap='gray')

**checking the class balance**

In [None]:
fig,ax=plt.subplots(figsize=(16,8))
sns.countplot(train['class_name'])
plt.setp(ax.get_xticklabels(),rotation=90)
plt.title('Class Balance')


**Checking the Number of pictures in trainset**

In [None]:
print('Total number of Images in trainset are : {} '.format(len(train)))
print('Number of unique pictures in the trainset are : {} '.format(train['image_id'].nunique()))

In [None]:
print('Average number of annotations per image : {} '.format(math.ceil(67900/15000)))

**As there are many images with a lot of annotations ,we will plot them with all annotations**

**Lets plot images with all thier annotations:**

In [None]:
def plot_image(img_id,train_dir,df):
    fig,ax=plt.subplots(figsize=(10,10))
    img=read_xray(os.path.join(train_dir,f'{img_id}.dicom'))
    plt.imshow(img,cmap='gray')

    #annotations:
    
    dfs=df[df['image_id']==img_id]
    
    #all annotations for the image
    for i in range(len(dfs)):
        
         #bounding boxes:
        #width and height
        w=dfs.iloc[i]['x_max']-dfs.iloc[i]['x_min']
        h=dfs.iloc[i]['y_max']-dfs.iloc[i]['y_min']
        
        #min,max
        x_min,y_min=dfs.iloc[i]['x_min'],dfs.iloc[i]['y_min']
        x_max,y_max=dfs.iloc[i]['x_max'],dfs.iloc[i]['y_max']
        
        p=mpl.patches.Rectangle((x_min,y_min),w,h,ec='r',lw=1,fc='none')
        ax.add_patch(p)
        ax.annotate('{}'.format(dfs.iloc[i]['class_name']), xy=(x_min+50,y_max+50),
                    color='blue',horizontalalignment='right')
    
    
    plt.axis('off')
    plt.show()
    
plot_image('9a5094b2563a1ef3ff50dc5c7ff71345',train_dir,train)   

**Lets look at some examples**

In [None]:
def random_id(df):
    img_id=random.choice(df['image_id'])
    return img_id

plot_image(random_id(train),train_dir,train)   

In [None]:
plot_image(random_id(train),train_dir,train)   

In [None]:
plot_image(random_id(train),train_dir,train)   

In [None]:
plot_image(random_id(train),train_dir,train)   

In [None]:
plot_image(random_id(train),train_dir,train)   

In [None]:
plot_image(random_id(train),train_dir,train)   

In [None]:
plot_image(random_id(train),train_dir,train)   