In [None]:
#%%writefile test.py
import pandas as pd
import numpy as np
import cv2
import os
import re
import albumentations as A
import torch
import torchvision

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SequentialSampler
from PIL import Image
from albumentations.pytorch.transforms import ToTensorV2
from matplotlib import pyplot as plt
from tqdm import tqdm

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
DIR_INPUT = '../input/rsna-pneumonia-detection-2018/input/images'
df = pd.read_csv("../input/rsna-pneumonia-detection-2018/input/stage_2_train_labels.csv")
#print(df.shape)
#df.head()

df_pos = pd.DataFrame(columns=['patientId', 'x', 'y', 'width', 'height'])

k = 0
df.loc[df['patientId']=="00436515-870c-4b36-a041-de91049b9ab4"]
for i in range(len(df)):
    if df.loc[i]['Target'] == 1:
        df_pos.loc[k] = df.loc[i]
        k += 1
image_ids = df_pos['patientId'].unique()
#print(image_ids)
DIR_TEST = "../input/rsna-pneumonia-detection-2018/input/images"
valid_ids = image_ids[-300:]
valid_df = df_pos[df_pos['patientId'].isin(valid_ids)]
print(valid_df.head())
print("****************",valid_df.shape)
test_images = os.listdir(DIR_TEST)
#print(f"Validation instances: {len(valid_ids)}")

# load a model; pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True, min_size=1024)
num_classes = 2  # 1 class (pnueomonia) + background
# get the number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

os.makedirs('../validation_predictions', exist_ok=True)
model.load_state_dict(torch.load('../input/rsna-pytorch-hackathon-fasterrcnn-resnet-training/fasterrcnn_resnet50_fpn.pth'))
model.to(device)

def format_prediction_string(boxes, scores):
    pred_strings = []
    for j in zip(scores, boxes):
        pred_strings.append("{0:.4f} {1} {2} {3} {4}".format(j[0], 
                                                             int(j[1][0]), int(j[1][1]), 
                                                             int(j[1][2]), int(j[1][3])))

    return " ".join(pred_strings)

detection_threshold = 0.9
img_num = 0
results = []
model.eval()
f, ax = plt.subplots(3,3, figsize=(16,18))
img = []
with torch.no_grad():
    for i, image_s in tqdm(enumerate(image_ids)):
        #print(i,image_s)
        if i>100:
            break
        #orig_image = cv2.imread(f"{DIR_TEST}/{test_images[i]}", cv2.IMREAD_COLOR)
        orig_image = cv2.imread(f"{DIR_TEST}/{image_ids[i]}.jpg", cv2.IMREAD_COLOR)
        image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        image = np.transpose(image, (2, 0, 1)).astype(np.float)
        image = torch.tensor(image, dtype=torch.float).cuda()
        image = torch.unsqueeze(image, 0)

        model.eval()
        cpu_device = torch.device("cpu")

        outputs = model(image)
        
        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
        #print(outputs)
        if len(outputs[0]['boxes']) != 0:
            for counter in range(len(outputs[0]['boxes'])):
                boxes = outputs[0]['boxes'].data.cpu().numpy()
                scores = outputs[0]['scores'].data.cpu().numpy()
                boxes = boxes[scores >= detection_threshold].astype(np.int32)
                draw_boxes = boxes.copy()
                boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
                boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
                
            for box in draw_boxes:
                img=cv2.rectangle(orig_image,
                            (int(box[0]), int(box[1])),
                            (int(box[2]), int(box[3])),
                            (0, 0, 255), 3)

            #print("**********start********")
            plt.imshow(cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB))
            plt.axis('off')
            
            #plt.savefig(f"./{image}.jpg")
            plt.close()

            result = {
                'patientId': image_s,
                'PredictionString': format_prediction_string(boxes, scores),
                'pred_cls': 1 if len(draw_boxes) >0 else 0,
                'actual_cls': 1,
            }
            results.append(result)
        else:
            result = {
                'patientId': image_s,
                'PredictionString': None,
                'pred_cls': 0,
                'actual_cls': 1,
            }
            results.append(result)

#print(results)
sub_df = pd.DataFrame(results, columns=['patientId', 'PredictionString', 'pred_cls', 'actual_cls'])
print(sub_df)
#sub_df.to_csv('submission.csv', index=False)

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt
df_cm = confusion_matrix(sub_df['actual_cls'], sub_df['pred_cls'])
tn, fp, fn, tp = confusion_matrix(sub_df['actual_cls'], sub_df['pred_cls']).ravel()
print("Accuracy : ",(tp+fp)/(tn+fp+fn+tp))
sn.heatmap(df_cm, annot=True, annot_kws={"size": 16})
plt.show()

In [None]:
#!python test.py