In [None]:
# ! pip install sqlalchemy
# ! rm /kaggle/working/sqlite3.db
!pip install scipy==1.4.1

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import copy
from PIL import Image
import pydicom
import scipy.ndimage
import imageio
from os import listdir
from sqlalchemy import create_engine
import sqlite3
from tqdm import tqdm
from torchvision import transforms
import torch
import random
from torch.utils import data
import torchvision.models as models
from torch.utils.data import Dataset
import pydicom as dicom
import time
from sklearn.model_selection import train_test_split

In [None]:
path = "../input/rsna-str-pulmonary-embolism-detection/train/"
train = pd.read_csv("../input/rsna-str-pulmonary-embolism-detection/train.csv")
train=train[["StudyInstanceUID","SeriesInstanceUID","SOPInstanceUID","pe_present_on_image"]]
# pbar = range(0,train.shape[0])

# pbar = tqdm(pbar, dynamic_ncols=True, smoothing=0.01)
# bad_rows=[]
# for i in pbar:
# #     try:
#     ds=dicom.dcmread(path+train.iloc[i,0]+'/'+train.iloc[i,1]+'/'+train.iloc[i,2]+'.dcm')
#     dcm_sample=ds.pixel_array
# #     except Exception:
# #         print()
# #         bad_rows.append(i)
# train=train.drop(bad_rows)
        
print(train.head())

train_dataset, test_dataset = train_test_split(train, test_size=0.25, random_state=42)

class myNet(torch.nn.Module):
    def __init__(self):
#         super(myNet, self).__init__()
        super().__init__()

#         model = torch.hub.load('pytorch/vision:v0.6.0', 'inception_v3', pretrained=False)
        model=models.inception_v3(pretrained=True)
        self.model=model
#         model = models.mobilenet_v2(pretrained=True)
        print(model)
    
        self.inception_layer = torch.nn.Sequential(*list(model.children())[:])
        print(self.inception_layer)
#         print(list(self.inception_layer.children()))

        self.Linear_layer = torch.nn.Linear(2048, 1)
        self.logistic_layer=torch.nn.Softmax(dim=1)
        
    def forward(self, x):
        x = self.inception_layer(x)
#         x=self.model(x)
        x = self.Linear_layer(x)
        x = self.logistic_layer(x)
        
        return x



preprocess = transforms.Compose([
    transforms.Resize(299),
    transforms.CenterCrop(299),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def transform_to_hu(image):
    pixel_array=copy.deepcopy(image.pixel_array)
    pixel_array = pixel_array.astype(np.int16)
    pixel_array[pixel_array <= -1000] = 0

    intercept = image.RescaleIntercept
    slope = image.RescaleSlope
        
    if slope != 1:
        pixel_array = slope * pixel_array.astype(np.float64)
        pixel_array = pixel_array.astype(np.int16)
            
    pixel_array += np.int16(intercept)
    return pixel_array


class PEDataset(Dataset):
    def __init__(self, transform,pd):
        self.transform = transform
        self.df=pd
        
    def __len__(self):
        return self.df.shape[0]
    def _load_image_from_index(self, index):
        ds=dicom.dcmread(path+self.df.iloc[index,0]+'/'+self.df.iloc[index,1]+'/'+self.df.iloc[index,2]+'.dcm')
        pixel_array=transform_to_hu(ds)
#         print(ds.RescaleIntercept)
#         print(pixel_array)
#         print(pixel_array.max())
#         print(pixel_array.min())
#         print(transform_to_hu(ds))
#         print(transform_to_hu(ds).max())
#         print(transform_to_hu(ds).min())
#         plt.figure()
#         plt.subplot(1,2,1)
#         plt.imshow(pixel_array)
#         plt.subplot(1,2,2)
#         plt.imshow(transform_to_hu(ds))
#         plt.show()
        pixel_array=transform_to_hu(ds)

        temp=Image.fromarray((255 * (pixel_array+0.0 - pixel_array.min()) / (pixel_array.max()+0.0 - pixel_array.min())).astype(np.uint8)).convert("RGB")
#         print(temp)
#         plt.figure()
#         plt.subplot(1,2,1)
#         plt.imshow(temp)
#         plt.subplot(1,2,2)
#         plt.imshow(transform_to_hu(ds),cmap="Greys_r")
#         plt.show()
        img=self.transform(temp)

        return img,self.df.iloc[index,3]
    def __getitem__(self, index):
        try:
            img,label = self._load_image_from_index(index)
        except:
            index=random.random()*self.__len__()
            img,label = self.__getitem__(int(index))
        

        return img,label

def data_sampler(dataset, shuffle):
    if shuffle:
        return data.RandomSampler(dataset)
    else:
        return data.SequentialSampler(dataset)    
dataloaders={}
train_dataset=PEDataset(preprocess,train_dataset)
test_dataset=PEDataset(preprocess,test_dataset)
train_loader= data.DataLoader(train_dataset,batch_size=16,sampler=data_sampler(train_dataset, shuffle=True),drop_last=True)
test_loader= data.DataLoader(test_dataset,batch_size=16,sampler=data_sampler(test_dataset, shuffle=True),drop_last=True)

dataloaders["train"]=train_loader
dataloaders["val"]=test_loader
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
    print("begin trainning")
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
 
    for epoch in tqdm(range(num_epochs), dynamic_ncols=True, smoothing=0.01):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                print(inputs.size())
                inputs = inputs.cuda()
                labels = labels.cuda()

                optimizer.zero_grad()


                with torch.set_grad_enabled(phase == 'train'):

                    if is_inception and phase == 'train':

                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()


                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

 
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history
mynet=myNet()
criterion = torch.nn.MSELoss(size_average=False)
optimizer = torch.optim.SGD(mynet.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()
train_model(mynet.cuda(), dataloaders, criterion, optimizer, num_epochs=25, is_inception=False)





Loading scans per folder.
StudyInstanceUID - unique ID for each study (exam) in the data.
SeriesInstanceUID - unique ID for each series within the study.
SOPInstanceUID - unique ID for each image within the study (and data).

In [None]:
def load_scans(dcm_path):
    # otherwise we sort by ImagePositionPatient (z-coordinate) or by SliceLocation
    slices = [pydicom.dcmread(dcm_path + "/" + file) for file in listdir(dcm_path)]
    slices.sort(key = lambda x: float(x.ImagePositionPatient[2]))
    return slices

In [None]:
listdir("../input/")

basepath = "../input/rsna-str-pulmonary-embolism-detection/"
listdir(basepath)

train = pd.read_csv(basepath + "train.csv")
test = pd.read_csv(basepath + "test.csv")
train.shape

# train.head()

train["dcm_path"] = basepath + "train/" + train.StudyInstanceUID + "/" + train.SeriesInstanceUID 

example = train.dcm_path.values[1]
scans = load_scans(example)
print(scans)