In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly.express as px

import os
import glob
import pydicom
from typing import Dict
import tqdm
#color
from colorama import Fore, Back, Style

import gc


plt.style.use('fivethirtyeight')

In [None]:
train_dir = "/kaggle/input/rsna-str-pulmonary-embolism-detection/train/"
test_dir = "/kaggle/input/rsna-str-pulmonary-embolism-detection/test/"
train = pd.read_csv( "/kaggle/input/rsna-str-pulmonary-embolism-detection/train.csv")
test = pd.read_csv("/kaggle/input/rsna-str-pulmonary-embolism-detection/test.csv")
submission = pd.read_csv("/kaggle/input/rsna-str-pulmonary-embolism-detection/sample_submission.csv")

In [None]:
train['path'] = train_dir + train.StudyInstanceUID + "/" + train.SeriesInstanceUID + "/" + train.SOPInstanceUID + ".dcm"

In [None]:
train.head()

In [None]:
cols = {"pe_present_on_image":"Pe Present On Image",
        "negative_exam_for_pe" :  "Negative Exam For Pe",
        "rv_lv_ratio_gte_1" : "Rv Lv Ratio Gte",
        "rv_lv_ratio_lt_1":"Rv Lv Ratio Lt",
        "leftsided_pe" : "Leftsided Pe", 
        "chronic_pe" : "Chronic Pe",
        "true_filling_defect_not_pe" : "True Filling Defect Not Pe" ,
        "rightsided_pe" : "Rightsided Pe",
        "acute_and_chronic_pe" : "Acute And Chronic Pe",
        "central_pe":"Central Pe",
        "indeterminate":"Indeterminate"}


In [None]:
p_sizes = []
for d in os.listdir(train_dir):    
    for sub in os.listdir(train_dir + d):   
        p_sizes.append(len(os.listdir(train_dir + d + "/" + sub)))

In [None]:

dcm = train.path.iloc[0]
print('Filename: {}'.format(dcm))
dcm = pydicom.read_file(dcm)

In [None]:

def dicom_to_image(filename):
    dcm = pydicom.read_file(filename)
    img = dcm.pixel_array
    img[img == -2000] = 0
    return img

In [None]:
def extract_dicom_meta_data(filename: str) -> Dict:
    dcm = pydicom.read_file(filename)
    img=np.array(dcm.pixel_array).flatten()
    data = {
        'study_instance_uid': dcm.StudyInstanceUID,
        'series_instance_uid': dcm.SeriesInstanceUID,
        'series_number': dcm.SeriesNumber,
        'instance_number': dcm.InstanceNumber,
        'specific_character_set': dcm.SpecificCharacterSet,
        #'image_type': dcm.ImageType,
        'sop_class_uid': dcm.SOPClassUID,
        'sop_instance_uid': dcm.SOPInstanceUID,
        'modality': dcm.Modality,
        'slice_thickness': dcm.SliceThickness,
        'kvp': dcm.KVP,
        'gantry_detector': dcm.GantryDetectorTilt,
        'table_height': dcm.TableHeight,
        'rotation_direction': dcm.RotationDirection,
        'x_ray_tube_current': dcm.XRayTubeCurrent,
        'exposure': dcm.Exposure,
        'convolution_kernel' : dcm.ConvolutionKernel,
        'patient_position' : dcm.PatientPosition,
        #'image_position_patient' : dcm.ImagePositionPatient,
        #'image_orientation_patient': dcm.ImageOrientationPatient,
        'frame_of_reference_uid' : dcm.FrameOfReferenceUID,
        'samples_per_pixel' : dcm.SamplesPerPixel,
        'photometric_interpretation' : dcm.PhotometricInterpretation,
        'rows' : dcm.Rows,
        'columns' : dcm.Columns,
        'pixel_spacing' : dcm.PixelSpacing,
        'bits_allocated' : dcm.BitsAllocated,
        'bits_stored' : dcm.BitsStored,
        'high_bit' : dcm.HighBit,
        'pixel_representation': dcm.PixelRepresentation,
        'window_center': dcm.WindowCenter,
        'window_width': dcm.WindowWidth,
        'rescale_intercept': dcm.RescaleIntercept,
        'rescale_slope': dcm.RescaleSlope,
        'pixel_data': dcm.PixelData,
        'img_min': np.min(img),
        'img_max': np.max(img),
        'img_mean': np.mean(img),
        'img_std': np.std(img)
        }
    return data

In [None]:
meta_data_df = extract_dicom_meta_data(train.path.iloc[0])


meta_data_df = pd.DataFrame.from_dict(meta_data_df)
meta_data_df.shape

In [None]:
feats = list(train.columns[3:5])+list(train.columns[8:12])+list(train.columns[13:17])

In [None]:
means = train[feats].mean().to_dict()

In [None]:
submission['label'] = 0.1
for feat in means.keys():
    submission.loc[submission.id.str.contains(feat, regex=False), 'label'] = means[feat]

In [None]:
submission.to_csv('submission.csv', index = False)