In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
train = pd.read_csv('/kaggle/input/vinbigdata-chest-xray-abnormalities-detection/train.csv')
train

In [None]:
test = pd.read_csv('/kaggle/input/vinbigdata-chest-xray-abnormalities-detection/sample_submission.csv')
test

In [None]:
train[train['image_id'] =='9a5094b2563a1ef3ff50dc5c7ff71345']

In [None]:
import pydicom

dicom = pydicom.dcmread('/kaggle/input/vinbigdata-chest-xray-abnormalities-detection/test/74b23792db329cff5843e36efb8aa65a.dicom')
dicom

In [None]:
# pixel 값이 255를 넘어감

dicom.pixel_array

In [None]:
dicom.pixel_array.shape

In [None]:
image = dicom.pixel_array 

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize =(16,8))

plt.imshow(image, cmap = 'gray')

In [None]:
np.stack([image,image,image]).shape

In [None]:
from torch.utils.data import Dataset, DataLoader

class Vinbigdataset (Dataset):
    def __init__(self,dataframe, image_dir, transforms = None):
        super().__init__()
        self.image_ids = dataframe['image_id'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms
                
    def __getitem__(self, index):
        image_id = self.image_ids[index]
        records = self.df[self.df['image_id']== image_id]
        records = records.reset_index(drop = True)
        dicom = pydicom.dcmread(self.image_dir + image_id + '.dicom')
        image = dicom.pixel_array
        # dicom 파일은 사람들이 움직이거나 하면 y 절편이 움직임 or 기울기가 변하게됌 --> 밝기에 영향을 줌 
        
        intercept = dicom.RescaleIntercept if 'RescaleIntercept' in dicom else 0
        slope = dicom.RescaleSlope if 'RescaleSlope' in dicom else 1
        
        if slope != 1:
            image = slope * image
            image = image.astype(np.int16)
        
        image += np.int16(intercept)
        
        # 3채널로 변경, float32 는 메모리 줄이기위해 쓰는것
        image = np.stack([image, image, image]).astype(np.float32)
        # 표준화, dicom 파일은 경사 (y절편이) 제대로 안되어있으면 (ex) 사람이 움직이면) 사진의 밝기에 영향을줌 --> pixel 값의 최소값이 0 이아닐수도있음
        image = image - image.min()
        # 최대값으로 나눠서 0과1사이로 변환
        image = image / image.max()
        # 채널값을 뒤로 옮김 원랜 0,1,2 순
        image = image.transpose(1,2,0)
        if self.transforms:
            sample = {'image' : image}
            sample = self.transforms(**sample)
            image = sample['image']
        return image, image_id
                   
    def __len__(self):
        return self.image_ids.shape[0]

In [None]:
from albumentations.pytorch.transforms import ToTensorV2

import albumentations as al

In [None]:
def get_test_transform():
    return al.Compose([ToTensorV2])

In [None]:
test_dataset = Vinbigdataset(test, '/kaggle/input/vinbigdata-chest-xray-abnormalities-detection/test/', get_test_transform())

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [None]:
test_data_loader = DataLoader(test_dataset, batch_size = 8, num_workers = 4, collate_fn = collate_fn)

# 모델가져오기

In [None]:
import torchvision

In [None]:
# 학습할땐 pretrained = True 로 여기선 가중치를 가져올거기때문에
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained = False, pretrained_backbone = False)

In [None]:
# 맨마지막층 자체를 바꿔줘야함. 클래스 갯수로 (91 개 -->15개). 그래서 모델 추가

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

In [None]:
model.roi_heads.box_predictor = FastRCNNPredictor(1024,15)

In [None]:
import torch

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

device

# 다른사람 가중치 가져오기

In [None]:
# 혹시 tensorflow h5 가져와도 되는지

model.load_state_dict(torch.load('../input/fasterrcnn/fasterrcnn_resnet50_fpn.pth'))

In [None]:
# 예측하기전에 고정시키겠다
model.eval()

In [None]:
#gpu 로 돌리겠다
model.to(device)

In [None]:
sub = pd.read_csv('/kaggle/input/vinbigdata-chest-xray-abnormalities-detection/sample_submission.csv')
sub

In [None]:
def format_prediction_string(labels, boxes, scores):
    pred_strings = []
    for j in zip(labels, scores, boxes):
        pred_strings.append("{0} {1:.4f} {2} {3} {4} {5}".format(
            j[0], j[1], j[2][0], j[2][1], j[2][2], j[2][3]))

    return " ".join(pred_strings)

detection_threshold = 0.5
results = []

# gradient 를 업데이트 하지 않겠다. 예측이니깐

with torch.no_grad():
    for images, image_ids in test_data_loader:
        #gpu 로 환경을 바꿔줌
        images = list(image.to(device) for image in images)
        # predict 와 같은것
        outputs = model(images)
        for i, image in enumerate(images):
            image_id = image_ids[i]
            result = {
                'image_id': image_id,
                'PredictionString': '14 1.0 0 0 1 1'
            }

            boxes = outputs[i]['boxes'].data.cpu().numpy()
            labels = outputs[i]['labels'].data.cpu().numpy()
            scores = outputs[i]['scores'].data.cpu().numpy()
            
            if len(boxes) > 0:
                # 정답 클래스가 0부터 들어가기때문 (0~14) 이 대회는 '질병이없다' 를 14 로 정해짐.
                labels = labels - 1
                labels[labels == -1] = 14
                
                # 일단은 0.5로. 최적화 해야되는부분
                selected = scores >= detection_threshold
                
                boxes = boxes[selected].astype(np.int32)
                scores = scores[selected]
                labels = labels[selected]

                if len(boxes) > 0:
                    result = {
                        'image_id': image_id,
                        'PredictionString': format_prediction_string(labels, boxes, scores)}
                    
            results.append(result)           

In [None]:
test_df = pd.DataFrame(results, columns=['image_id', 'PredictionString'])
test_df

# 과제 : 무엇때문에 오류가 나는지 찾아오기.