In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
from albumentations import Resize
from albumentations.pytorch import ToTensorV2
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
df = pd.read_csv("../input/ranzcr-clip-catheter-line-classification/train.csv")

LABELS = [
    'ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal',
    'NGT - Abnormal', 'NGT - Borderline', 'NGT - Incompletely Imaged', 'NGT - Normal', 
    'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal',
    'Swan Ganz Catheter Present'
]

df.head()

In [None]:
DEBUG = False
if DEBUG:
    df = df.sample(frac = 0.01).reset_index(drop = True)
    print(df.shape)

In [None]:
from sklearn.model_selection import train_test_split
train, valid = train_test_split(df, test_size = 0.1)
print(train.shape, valid.shape)

In [None]:
from torch.utils.data import Dataset

class TrainDataset(Dataset):
    def __init__(self, df):
        self.df = df
        self.studyuid = df["StudyInstanceUID"].values
        self.labels = df[LABELS].values
        
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, idx):
        path = self.studyuid[idx]
        path = "../input/ranzcr-clip-catheter-line-classification/train" + "/" + path + ".jpg"
        image = cv2.imread(path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = Resize(512, 512)(image = image)["image"]
        image = ToTensorV2()(image = image)["image"]
        labels = self.labels[idx]
        return image, labels

In [None]:
from torch.utils.data import DataLoader
train_dataset = TrainDataset(train)
train_loader = DataLoader(train_dataset, batch_size = 16, shuffle = True)

In [None]:
valid_dataset = TrainDataset(valid)
valid_loader = DataLoader(valid_dataset, batch_size = 16, shuffle = False)

In [None]:
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')
import timm
from pprint import pprint
pprint(timm.list_models(pretrained = True))

In [None]:
import torch.nn as nn
class ResNet200D(nn.Module):
    def __init__(self, model_name='resnet200d'):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=False)
        n_features = self.model.fc.in_features
        self.model.global_pool = nn.Identity()
        self.model.fc = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(n_features, 11)

    def forward(self, x):
        bs = x.size(0)
        features = self.model(x)
        pooled_features = self.pooling(features).view(bs, -1)
        output = self.fc(pooled_features)
        return output

In [None]:
import torch
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"


model = ResNet200D().to(DEVICE)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters())

best_loss = np.inf
for epoch in range(6):
    model.train()
    for X, y in train_loader:
        optimizer.zero_grad()
        X = X.float().to(DEVICE)
        y = y.float().to(DEVICE)
        pred = model(X)
        loss = criterion(pred, y)
        loss.backward()
        optimizer.step()
    model.eval()
    valid_loss = 0
    with torch.no_grad():
        for X, y in valid_loader:
            X = X.float().to(DEVICE)
            y = y.float().to(DEVICE)
            pred = model(X)
            loss = criterion(pred, y)
            valid_loss += loss.item()
    valid_loss /= len(valid_loader)
    print(f"EPOCH:{epoch}, Loss:{valid_loss}")
    if valid_loss < best_loss:
        best_loss = valid_loss
        torch.save(model.state_dict(), "resnet200d.pth")
        print("saved...")

In [None]:
class TestDataset(Dataset):
    def __init__(self, df):
        self.df = df
        self.studyuid = df["StudyInstanceUID"].values
        
    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, idx):
        path = self.studyuid[idx]
        path = "../input/ranzcr-clip-catheter-line-classification/test" + "/" + path + ".jpg"
        image = cv2.imread(path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = Resize(300, 300)(image = image)["image"]
        image = ToTensorV2()(image = image)["image"]
        return image

In [None]:
test = pd.read_csv("../input/ranzcr-clip-catheter-line-classification/sample_submission.csv")
test_dataset = TestDataset(test)
test_loader = DataLoader(test_dataset, batch_size = 32, shuffle = False)


In [None]:

model = ResNet200D().to(DEVICE)
model.load_state_dict(torch.load("./resnet200d.pth"))

In [None]:
submit_preds = []

model.eval()
with torch.no_grad():
    for X in test_loader:
        X = X.float().to(DEVICE)
        submit_preds.append(model(X).sigmoid().to("cpu"))
    submit_preds = np.concatenate([p.numpy() for p in submit_preds], axis = 0)

In [None]:
submit = pd.DataFrame(submit_preds, columns = LABELS)
submit.head()

In [None]:
submit["StudyInstanceUID"] = test["StudyInstanceUID"]
submit = pd.concat([submit.iloc[:, -1], submit.iloc[:, :-1]], axis = 1)
submit.to_csv("submission.csv", index = False)