## Preparing the Metadata
The metadata extracted from the dicom images are extracted and then submited to the model to improve the results.

In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import pydicom

import gc
import warnings

warnings.simplefilter(action = 'ignore')
file_path = "patients_meta.csv"

In [2]:
df_meta = pd.read_csv(file_path)


In [3]:
to_drop = df_meta.nunique()
to_drop = to_drop[(to_drop <= 1) | (to_drop == to_drop['patientId']) | 
                  (to_drop == to_drop['Unnamed: 0']
                  )].index
to_drop = to_drop.drop('patientId')
to_drop

Index(['Unnamed: 0', 'AccessionNumber', 'BitsAllocated', 'BitsStored',
       'BodyPartExamined', 'Columns', 'ConversionType', 'HighBit',
       'InstanceNumber', 'LossyImageCompression',
       'LossyImageCompressionMethod', 'Modality', 'PatientBirthDate',
       'PatientID', 'PatientName', 'PatientOrientation',
       'PhotometricInterpretation', 'PixelRepresentation',
       'ReferringPhysicianName', 'Rows', 'SOPClassUID', 'SOPInstanceUID',
       'SamplesPerPixel', 'SeriesInstanceUID', 'SeriesNumber',
       'SpecificCharacterSet', 'StudyDate', 'StudyID', 'StudyInstanceUID',
       'StudyTime'],
      dtype='object')

In [4]:
df_meta.drop(to_drop, axis = 1, inplace = True)
df_meta.head()

Unnamed: 0,patientId,x,y,width,height,Target,PatientAge,PatientSex,SeriesDescription,ViewPosition
0,0004cfab-14fd-4e49-80ba-63a80b6bddd6,,,,,0,51,F,view: PA,PA
1,00313ee0-9eaa-42f4-b0ab-c148ed3241cd,,,,,0,48,F,view: PA,PA
2,00322d4d-1c29-4943-afc9-b6754be640eb,,,,,0,19,M,view: AP,AP
3,003d8fa0-6bf1-40ed-b54c-ac657f8495c5,,,,,0,28,M,view: PA,PA
4,00436515-870c-4b36-a041-de91049b9ab4,264.0,152.0,213.0,379.0,1,32,F,view: AP,AP


In [5]:
df_meta.drop(df_meta[['x', 'y','height', 'width', 'SeriesDescription']], axis = 1, inplace = True)
df_meta.head()

Unnamed: 0,patientId,Target,PatientAge,PatientSex,ViewPosition
0,0004cfab-14fd-4e49-80ba-63a80b6bddd6,0,51,F,PA
1,00313ee0-9eaa-42f4-b0ab-c148ed3241cd,0,48,F,PA
2,00322d4d-1c29-4943-afc9-b6754be640eb,0,19,M,AP
3,003d8fa0-6bf1-40ed-b54c-ac657f8495c5,0,28,M,PA
4,00436515-870c-4b36-a041-de91049b9ab4,1,32,F,AP


### Quantizing the numeric data

In [6]:
def quantize_age(value, year_interval=5):
    return ((value // year_interval) * year_interval) + 1

def quantize_age_10(value):
    return quantize_age(value, 10)

quantize_age(4), quantize_age(5), quantize_age(32), quantize_age(84), quantize_age(86), quantize_age(89)

(1, 6, 31, 81, 86, 86)

In [7]:
df_meta['PatientSex'] = df_meta['PatientSex'].map({'F': 0, 'M': 1})
df_meta['ViewPosition'] = df_meta['ViewPosition'].map({'PA': 0, 'AP': 1})
df_meta['age_5'] = df_meta['PatientAge'].map(quantize_age)
df_meta['age_10'] = df_meta['PatientAge'].map(quantize_age_10)
df_meta.head()

Unnamed: 0,patientId,Target,PatientAge,PatientSex,ViewPosition,age_5,age_10
0,0004cfab-14fd-4e49-80ba-63a80b6bddd6,0,51,0,0,51,51
1,00313ee0-9eaa-42f4-b0ab-c148ed3241cd,0,48,0,0,46,41
2,00322d4d-1c29-4943-afc9-b6754be640eb,0,19,1,1,16,11
3,003d8fa0-6bf1-40ed-b54c-ac657f8495c5,0,28,1,0,26,21
4,00436515-870c-4b36-a041-de91049b9ab4,1,32,0,1,31,31


In [8]:
df_meta['age_5'] /= 120 
df_meta['age_10'] /= 120

In [9]:
df_meta.drop('PatientAge', axis = 1, inplace = True)
df_meta.head()

Unnamed: 0,patientId,Target,PatientSex,ViewPosition,age_5,age_10
0,0004cfab-14fd-4e49-80ba-63a80b6bddd6,0,0,0,0.425,0.425
1,00313ee0-9eaa-42f4-b0ab-c148ed3241cd,0,0,0,0.383333,0.341667
2,00322d4d-1c29-4943-afc9-b6754be640eb,0,1,1,0.133333,0.091667
3,003d8fa0-6bf1-40ed-b54c-ac657f8495c5,0,1,0,0.216667,0.175
4,00436515-870c-4b36-a041-de91049b9ab4,1,0,1,0.258333,0.258333


In [10]:
df_meta.to_csv('extractfied_meta.csv')

In [11]:
df_meta.from_csv('extractfied_meta.csv')
df_meta.head()

Unnamed: 0,patientId,Target,PatientSex,ViewPosition,age_5,age_10
0,0004cfab-14fd-4e49-80ba-63a80b6bddd6,0,0,0,0.425,0.425
1,00313ee0-9eaa-42f4-b0ab-c148ed3241cd,0,0,0,0.383333,0.341667
2,00322d4d-1c29-4943-afc9-b6754be640eb,0,1,1,0.133333,0.091667
3,003d8fa0-6bf1-40ed-b54c-ac657f8495c5,0,1,0,0.216667,0.175
4,00436515-870c-4b36-a041-de91049b9ab4,1,0,1,0.258333,0.258333
