**Source:** https://www.kaggle.com/xhlulu/siim-covid-19-convert-to-jpg-256px

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
from PIL import Image
import warnings
warnings.filterwarnings("ignore")
import ast
from tqdm.auto import tqdm
import cv2
import os
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

In [None]:
def dicom2array(path, voi_lut=True, fix_monochrome=True):
    dicom = pydicom.read_file(path)
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return data

In [None]:
image_id = []
dim0 = []
dim1 = []
splits = []
count = 0
for data in ['test', 'train']:
    save_dir = f'/kaggle/tmp/{data}/'
    os.makedirs(save_dir, exist_ok=True)
    for dirname, _, filenames in tqdm(os.walk(f'../input/rsna-miccai-brain-tumor-radiogenomic-classification/{data}')):
        for file in filenames:
            image = dicom2array(os.path.join(dirname, file))
            dim0.append(image.shape[0])
            dim1.append(image.shape[1])
            image = cv2.resize(image, (256, 256))
            im = Image.fromarray(image)
            im.save(os.path.join(save_dir, dirname.split("/")[4]+'-'+dirname.split("/")[5] + file.replace('dcm', 'jpg')))
            image_id.append(dirname.split("/")[4]+'-'+dirname.split("/")[5] +'-'+ file.replace('dcm', 'jpg'))
            splits.append(data)

In [None]:
!tar -zcf train.tar.gz -C "/kaggle/tmp/train/" .
!tar -zcf test.tar.gz -C "/kaggle/tmp/test/" .

In [None]:
df = pd.DataFrame.from_dict({'id': image_id, 'dim0': dim0, 'dim1': dim1, 'split': splits})
df.to_csv('size.csv', index=False)
df