In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        #print(os.path.join(dirname, filename))
        continue

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import json
from pathlib import Path
from glob import glob

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
import tensorflow as tf
from tensorflow.python.keras.preprocessing.image import load_img, img_to_array

from keras import models, regularizers, layers, optimizers, losses, metrics
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import np_utils, to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image

In [None]:
data_path = Path('/kaggle/input/osic-pulmonary-fibrosis-progression/')
train_path = data_path / 'train'
test_path = data_path / 'test'
print("training_path", train_path)
print("test_path", test_path)

In [None]:
!ls /kaggle/input/osic-pulmonary-fibrosis-progression/

# Read DICOM

## What is DICOM?

DICOM (Digital Imaging and Communications in Medicine) is a format that has metadata, as well as Pixeldata attached to it. 
Some basic info can be extracted from image metadata: gender and age of the patient, as well as info how the image is sampled and generated. 

## Import pydicom
Pydicom is a Python package specifically for parsing .dcm files. 

In [None]:
import pydicom
from pydicom.data import get_testdata_files

print(__doc__)

PathDicom = '/kaggle/input/osic-pulmonary-fibrosis-progression/'
lstFilesDCM = []  # create an empty list
for dirName, subdirList, fileList in os.walk(PathDicom):
    for filename in fileList:
        if ".dcm" in filename.lower():  # check whether the file's DICOM
            lstFilesDCM.append(os.path.join(dirName,filename))

In [None]:
print(lstFilesDCM[0])

## Get metadata

In [None]:
RefDs = pydicom.dcmread(lstFilesDCM[0])
RefDs

In [None]:
# Get ref file
RefDs = pydicom.dcmread(lstFilesDCM[0])

# Load dimensions based on the number of rows, columns, and slices (along the Z axis)
ConstPixelDims = (int(RefDs.Rows), int(RefDs.Columns), len(lstFilesDCM))
print(ConstPixelDims)

## Look metadata

In [None]:
pat_name = RefDs.PatientName
display_name = pat_name.family_name + ", " + pat_name.given_name
print("Patient's name................:", display_name)
print("Patient id....................:", RefDs.PatientID)
print("Modality......................:", RefDs.Modality)
print("BodyPartExamined..............:", RefDs.BodyPartExamined)  
print("Image Position    (Patient)...:", RefDs.ImagePositionPatient)
print("Image Orientation (Patient)...:", RefDs.ImageOrientationPatient)

## Plot an image

In [None]:
if 'PixelData' in RefDs:
    rows = int(RefDs.Rows)
    cols = int(RefDs.Columns)
    print("Image size.......: {rows:d} x {cols:d}, {size:d} bytes".format(rows=rows, cols=cols, size=len(RefDs.PixelData)))
    if 'PixelSpacing' in RefDs:
        print("Pixel spacing....:", RefDs.PixelSpacing) 

        
# use .get() if not sure the item exists, and want a default value if missing
print("Slice location...:", RefDs.get('SliceLocation', "(missing)"))

# plot the image using matplotlib
plt.imshow(RefDs.pixel_array, cmap=plt.cm.bone)
plt.show()

### Make it bigger easily

In [None]:
plt.figure(figsize = (10,10))
plt.imshow(RefDs.pixel_array, cmap=plt.cm.bone)
#plt.show()

## Plot few images

In [None]:
data = pd.read_csv(data_path / 'test.csv')
data

In [None]:
data.loc[data['Patient'] == 'ID00422637202311677017371']
#Age is 73, Male

In [None]:
#PATH = '/kaggle/input/osic-pulmonary-fibrosis-progression/test/ID00422637202311677017371/'

dim = 25
size = 6
data2Dlist = [[0 for x in range(size)] for y in range(dim)] 

for i in range(0,dim):
    print(i, lstFilesDCM[i])
    data2Dlist[i][0] = pydicom.dcmread(lstFilesDCM[i]).PatientID
    data2Dlist[i][1] = pydicom.dcmread(lstFilesDCM[i]).Modality
    data2Dlist[i][2] = pydicom.dcmread(lstFilesDCM[i]).BodyPartExamined
    data2Dlist[i][3] = pydicom.dcmread(lstFilesDCM[i]).InstanceNumber
    data2Dlist[i][4] = '73'
    data2Dlist[i][5] = 'male'
                      

data2Dlist
df = pd.DataFrame(data2Dlist, columns=['ID', 'Modality', 'BPE', 'Slice', 'Age', 'Sex'])
df
                      
##
#data = pd.DataFrame([{'ID': pydicom.dcmread(lstFilesDCM[i]).PatientID}])
#data['Modality'] = pydicom.dcmread(lstFilesDCM[i]).Modality
#data['BPE'] = pydicom.dcmread(lstFilesDCM[i]).BodyPartExamined
#data['Num'] = pydicom.dcmread(lstFilesDCM[i]).InstanceNumber

## Plot in different way

In [None]:
fig, axs = plt.subplots(5,5, figsize=(23,23))

for i in range(0,25):
    RefDs = pydicom.dcmread(lstFilesDCM[i])
    axs[i//5, i%5].imshow(RefDs.pixel_array, cmap=plt.cm.bone) 
    axs[i//5, i%5].set_title('Modality: {} BPE: {}\n Slice: {} Age: {} Sex: {}'.format(df.Modality[i],df.BPE[i],df.Slice[i],df.Age[i],df.Sex[i]))
    
plt.savefig('data_dicom_few_images.png',dpi=300)

plt.show()

### Explore classes of colormaps
* Sequential
* Sequential2
* Diverging
* Cyclic
* Qualitative
* Miscellaneous

In [None]:
## The lightness parameters from 'Sequential' class increases monotonically through the colormaps. 
## The 'inferno' is from 'Perceptually Uniform Sequential' subclass and has nice visualization. 

fig, axs = plt.subplots(5,5, figsize=(23,23))

for i in range(0,25):
    RefDs = pydicom.dcmread(lstFilesDCM[i])
    axs[i//5, i%5].imshow(RefDs.pixel_array, cmap='inferno') 
    axs[i//5, i%5].set_title('Modality: {} BPE: {}\n Slice: {} Age: {} Sex: {}'.format(df.Modality[i],df.BPE[i],df.Slice[i],df.Age[i],df.Sex[i]))
    
plt.savefig('data_dicom_few_images_inferno.png',dpi=100)

plt.show()

In [None]:
## The lightness parameters from 'Sequential' class increases monotonically through the colormaps. 
## In Sequential colormaps, different colors can be tried: 
## 'Greys', 'Purples', 'Blues', 'Greens', 'Oranges', 'Reds', 'YlOrBr', 'YlOrRd', 'OrRd', 'PuRd', 'RdPu', 'BuPu', 'GnBu', 'PuBu', 'YlGnBu', 'PuBuGn', 'BuGn', 'YlGn'. 

fig, axs = plt.subplots(5,5, figsize=(23,23))

for i in range(0,25):
    RefDs = pydicom.dcmread(lstFilesDCM[i])
    axs[i//5, i%5].imshow(RefDs.pixel_array, cmap='Reds') 
    axs[i//5, i%5].set_title('Modality: {} BPE: {}\n Slice: {} Age: {} Sex: {}'.format(df.Modality[i],df.BPE[i],df.Slice[i],df.Age[i],df.Sex[i]))
    
plt.savefig('data_dicom_few_images_reds.png',dpi=100)

plt.show()

In [None]:
## Some lightness parameters from 'Sequential2' class are monotonically increasing (ex: binary)

fig, axs = plt.subplots(5,5, figsize=(23,23))

for i in range(0,25):
    RefDs = pydicom.dcmread(lstFilesDCM[i])
    axs[i//5, i%5].imshow(RefDs.pixel_array, cmap='binary') 
    axs[i//5, i%5].set_title('Modality: {} BPE: {}\n Slice: {} Age: {} Sex: {}'.format(df.Modality[i],df.BPE[i],df.Slice[i],df.Age[i],df.Sex[i]))
    
plt.savefig('data_dicom_few_images_binary.png',dpi=100)

plt.show()

In [None]:
## Some lightness parameters from 'Sequential2' class have kinks (ex: hot)

fig, axs = plt.subplots(5,5, figsize=(23,23))

for i in range(0,25):
    RefDs = pydicom.dcmread(lstFilesDCM[i])
    axs[i//5, i%5].imshow(RefDs.pixel_array, cmap='hot') 
    axs[i//5, i%5].set_title('Modality: {} BPE: {}\n Slice: {} Age: {} Sex: {}'.format(df.Modality[i],df.BPE[i],df.Slice[i],df.Age[i],df.Sex[i]))
    
plt.savefig('data_dicom_few_images_hot.png',dpi=100)

plt.show()

In [None]:
## The lightness parameters from 'Diverging' class are monotonically increasing up to a maximum and follows by monotonically decreasing values. 

fig, axs = plt.subplots(5,5, figsize=(23,23))

for i in range(0,25):
    RefDs = pydicom.dcmread(lstFilesDCM[i])
    axs[i//5, i%5].imshow(RefDs.pixel_array, cmap='Spectral') 
    axs[i//5, i%5].set_title('Modality: {} BPE: {}\n Slice: {} Age: {} Sex: {}'.format(df.Modality[i],df.BPE[i],df.Slice[i],df.Age[i],df.Sex[i]))
    
plt.savefig('data_dicom_few_images_spectral.png',dpi=100)

plt.show()

In [None]:
## The lightness parameters from 'Diverging' class are monotonically increasing up to a maximum and follows by monotonically decreasing values. 

fig, axs = plt.subplots(5,5, figsize=(23,23))

for i in range(0,25):
    RefDs = pydicom.dcmread(lstFilesDCM[i])
    axs[i//5, i%5].imshow(RefDs.pixel_array, cmap='seismic') 
    axs[i//5, i%5].set_title('Modality: {} BPE: {}\n Slice: {} Age: {} Sex: {}'.format(df.Modality[i],df.BPE[i],df.Slice[i],df.Age[i],df.Sex[i]))
    
plt.savefig('data_dicom_few_images_seismic.png',dpi=100)

plt.show()

In [None]:
## The lightness parameters from 'Cyclic' class change monotonically from start to middle, and inversely from middle to end.
## In Cyclic colormaps we want to start and end on the same color, and meet a symmetric center point in the middle. 

fig, axs = plt.subplots(5,5, figsize=(23,23))

for i in range(0,25):
    RefDs = pydicom.dcmread(lstFilesDCM[i])
    axs[i//5, i%5].imshow(RefDs.pixel_array, cmap='twilight') 
    axs[i//5, i%5].set_title('Modality: {} BPE: {}\n Slice: {} Age: {} Sex: {}'.format(df.Modality[i],df.BPE[i],df.Slice[i],df.Age[i],df.Sex[i]))
    
plt.savefig('data_dicom_few_images_twilight.png',dpi=100)

plt.show()

In [None]:
## The lightness parameters values in 'Qualitative' move all over the place throughout the colormap, and are clearly not monotonically increasing. 
## These would not be good options for use as perceptual colormaps.
## Qualitative colormaps are not aimed at being perceptual maps, but looking at the lightness parameter can verify that for us. 

fig, axs = plt.subplots(5,5, figsize=(23,23))

for i in range(0,25):
    RefDs = pydicom.dcmread(lstFilesDCM[i])
    axs[i//5, i%5].imshow(RefDs.pixel_array, cmap='Dark2') 
    axs[i//5, i%5].set_title('Modality: {} BPE: {}\n Slice: {} Age: {} Sex: {}'.format(df.Modality[i],df.BPE[i],df.Slice[i],df.Age[i],df.Sex[i]))
    
plt.savefig('data_dicom_few_images_dark2.png',dpi=100)

plt.show()

In [None]:
## Some of the miscellaneous colormaps have particular uses for which they have been created.

fig, axs = plt.subplots(5,5, figsize=(23,23))

for i in range(0,25):
    RefDs = pydicom.dcmread(lstFilesDCM[i])
    axs[i//5, i%5].imshow(RefDs.pixel_array, cmap='gnuplot2') #gnuplot, gnuplot2
    axs[i//5, i%5].set_title('Modality: {} BPE: {}\n Slice: {} Age: {} Sex: {}'.format(df.Modality[i],df.BPE[i],df.Slice[i],df.Age[i],df.Sex[i]))
    
plt.savefig('data_dicom_few_images_gnuplot2.png',dpi=100)

plt.show()

In [None]:
## Some of the miscellaneous colormaps have particular uses for which they have been created. 
## CMRmap was created to convert well to grayscale, though it does appear to have some small kinks in lightness parameters.

fig, axs = plt.subplots(5,5, figsize=(23,23))

for i in range(0,25):
    RefDs = pydicom.dcmread(lstFilesDCM[i])
    axs[i//5, i%5].imshow(RefDs.pixel_array, cmap='CMRmap') 
    axs[i//5, i%5].set_title('Modality: {} BPE: {}\n Slice: {} Age: {} Sex: {}'.format(df.Modality[i],df.BPE[i],df.Slice[i],df.Age[i],df.Sex[i]))
    
plt.savefig('data_dicom_few_images_CMRmap.png',dpi=100)

plt.show()

# Explore tables

In [None]:
train = pd.read_csv(data_path / 'train.csv')
test  = pd.read_csv(data_path / 'test.csv')
sub   = pd.read_csv(data_path / 'sample_submission.csv')

train.shape, test.shape, sub.shape

In [None]:
print('Train:\n',train.head(5),'\n')
print(train.isna().sum())
print('\n---------------------------------------------------------------------------\n')
print('Test:\n',train.head(5),'\n')
print(test.isna().sum())

In [None]:
fig, axs = plt.subplots(6,2, figsize=(14,28))

train['Sex'].value_counts().plot(kind='bar', legend=True, ax=axs[0,0])
test['Sex'].value_counts().plot(kind='bar', legend=True, ax=axs[0,1])

train['Age'].hist(bins=50, ax=axs[1,0])
test['Age'].hist(bins=50, ax=axs[1,1])
axs[1,0].set_xlabel('Age')
axs[1,1].set_xlabel('Age')

train['SmokingStatus'].value_counts().plot(kind='bar', legend=True, ax=axs[2,0])
test['SmokingStatus'].value_counts().plot(kind='bar', legend=True, ax=axs[2,1])

train['Percent'].hist(bins=50, ax=axs[3,0])
test['Percent'].hist(bins=50, ax=axs[3,1])
axs[3,0].set_xlabel('Percent')
axs[3,1].set_xlabel('Percent')

train['FVC'].hist(bins=20, ax=axs[4,0])
test['FVC'].hist(bins=20, ax=axs[4,1])
axs[4,0].set_xlabel('FVC')
axs[4,1].set_xlabel('FVC')

train['Weeks'].hist(bins=20, ax=axs[5,0])
test['Weeks'].hist(bins=20, ax=axs[5,1])
axs[5,0].set_xlabel('Weeks')
axs[5,1].set_xlabel('Weeks')

plt.savefig('data_sex_age_smokingstatus_percent_fvc_weeks.png',dpi=300)

plt.show()

## Submission

In [None]:
for i in range(len(test)):
    sub.loc[sub['Patient_Week'].str.contains(test.Patient[i]), 'FVC'] = test.FVC[i]
    
sub.to_csv('submission.csv', index=False)
print("file in ready!")