In [None]:
!pip install timm

In [None]:
import sys

sys.path.append('/kaggle/input/pytorchimagemodels')
import random
import numpy as np
import torch
import pandas as pd
import warnings
import os
import timm
import cv2
from torch import nn
from torch.utils.data import Dataset
from sklearn.metrics import roc_auc_score
from albumentations.pytorch import ToTensorV2
from albumentations import (
    Compose, OneOf, Normalize, Resize, RandomResizedCrop, RandomCrop, HorizontalFlip, VerticalFlip,
    RandomBrightness, RandomContrast, RandomBrightnessContrast, Rotate, ShiftScaleRotate, Cutout,
    IAAAdditiveGaussianNoise, Transpose
)
import torch.nn.functional as F

# print the whole table
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)

path = "/kaggle/input/ranzcr-clip-catheter-line-classification/"



class PARAMETER:
    split_ratio = 80
    # 1/100 的验证集
    stop_train_batch = -1
    # 在第  100  个  batch停止训练
    epochs = 1
    batch_size = 32
    size = 320
    target_size = 11
    num_workers = 4
    target_cols = ['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal',
                   'NGT - Abnormal', 'NGT - Borderline', 'NGT - Incompletely Imaged', 'NGT - Normal',
                   'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal',
                   'Swan Ganz Catheter Present']


class BasicConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(BasicConv2d, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return F.relu(x)


class InceptionA(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(InceptionA, self).__init__()
        # branch1: avgpool --> conv1*1(96)
        self.b1_1 = nn.AvgPool2d(kernel_size=3, padding=1, stride=1)
        self.b1_2 = BasicConv2d(in_channels, 96, kernel_size=1)

        # branch2: conv1*1(96)
        self.b2 = BasicConv2d(in_channels, 96, kernel_size=1)

        # branch3: conv1*1(64) --> conv3*3(96)
        self.b3_1 = BasicConv2d(in_channels, 64, kernel_size=1)
        self.b3_2 = BasicConv2d(64, 96, kernel_size=3, padding=1)

        # branch4: conv1*1(64) --> conv3*3(96) --> conv3*3(96)
        self.b4_1 = BasicConv2d(in_channels, 64, kernel_size=1)
        self.b4_2 = BasicConv2d(64, 96, kernel_size=3, padding=1)
        self.b4_3 = BasicConv2d(96, 96, kernel_size=3, padding=1)

    def forward(self, x):
        y1 = self.b1_2(self.b1_1(x))
        y2 = self.b2(x)
        y3 = self.b3_2(self.b3_1(x))
        y4 = self.b4_3(self.b4_2(self.b4_1(x)))

        outputsA = [y1, y2, y3, y4]
        return torch.cat(outputsA, 1)


class InceptionB(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(InceptionB, self).__init__()
        # branch1: avgpool --> conv1*1(128)
        self.b1_1 = nn.AvgPool2d(kernel_size=3, padding=1, stride=1)
        self.b1_2 = BasicConv2d(in_channels, 128, kernel_size=1)

        # branch2: conv1*1(384)
        self.b2 = BasicConv2d(in_channels, 384, kernel_size=1)

        # branch3: conv1*1(192) --> conv1*7(224) --> conv1*7(256)
        self.b3_1 = BasicConv2d(in_channels, 192, kernel_size=1)
        self.b3_2 = BasicConv2d(192, 224, kernel_size=(1, 7), padding=(0, 3))
        self.b3_3 = BasicConv2d(224, 256, kernel_size=(1, 7), padding=(0, 3))

        # branch4: conv1*1(192) --> conv1*7(192) --> conv7*1(224) --> conv1*7(224) --> conv7*1(256)
        self.b4_1 = BasicConv2d(in_channels, 192, kernel_size=1, stride=1)
        self.b4_2 = BasicConv2d(192, 192, kernel_size=(1, 7), padding=(0, 3))
        self.b4_3 = BasicConv2d(192, 224, kernel_size=(7, 1), padding=(3, 0))
        self.b4_4 = BasicConv2d(224, 224, kernel_size=(1, 7), padding=(0, 3))
        self.b4_5 = BasicConv2d(224, 256, kernel_size=(7, 1), padding=(3, 0))

    def forward(self, x):
        y1 = self.b1_2(self.b1_1(x))
        y2 = self.b2(x)
        y3 = self.b3_3(self.b3_2(self.b3_1(x)))
        y4 = self.b4_5(self.b4_4(self.b4_3(self.b4_2(self.b4_1(x)))))

        outputsB = [y1, y2, y3, y4]
        return torch.cat(outputsB, 1)


class InceptionC(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(InceptionC, self).__init__()
        # branch1: avgpool --> conv1*1(256)
        self.b1_1 = nn.AvgPool2d(kernel_size=3, padding=1, stride=1)
        self.b1_2 = BasicConv2d(in_channels, 256, kernel_size=1)

        # branch2: conv1*1(256)
        self.b2 = BasicConv2d(in_channels, 256, kernel_size=1)

        # branch3: conv1*1(384) --> conv1*3(256) & conv3*1(256)
        self.b3_1 = BasicConv2d(in_channels, 384, kernel_size=1)
        self.b3_2_1 = BasicConv2d(384, 256, kernel_size=(1, 3), padding=(0, 1))
        self.b3_2_2 = BasicConv2d(384, 256, kernel_size=(3, 1), padding=(1, 0))

        # branch4: conv1*1(384) --> conv1*3(448) --> conv3*1(512) --> conv3*1(256) & conv7*1(256)
        self.b4_1 = BasicConv2d(in_channels, 384, kernel_size=1, stride=1)
        self.b4_2 = BasicConv2d(384, 448, kernel_size=(1, 3), padding=(0, 1))
        self.b4_3 = BasicConv2d(448, 512, kernel_size=(3, 1), padding=(1, 0))
        self.b4_4_1 = BasicConv2d(512, 256, kernel_size=(3, 1), padding=(1, 0))
        self.b4_4_2 = BasicConv2d(512, 256, kernel_size=(1, 3), padding=(0, 1))

    def forward(self, x):
        y1 = self.b1_2(self.b1_1(x))
        y2 = self.b2(x)
        y3_1 = self.b3_2_1(self.b3_1(x))
        y3_2 = self.b3_2_2(self.b3_1(x))
        y4_1 = self.b4_4_1(self.b4_3(self.b4_2(self.b4_1(x))))
        y4_2 = self.b4_4_2(self.b4_3(self.b4_2(self.b4_1(x))))

        outputsC = [y1, y2, y3_1, y3_2, y4_1, y4_2]
        return torch.cat(outputsC, 1)


class ReductionA(nn.Module):
    def __init__(self, in_channels, out_channels, k, l, m, n):
        super(ReductionA, self).__init__()
        # branch1: maxpool3*3(stride2 valid)
        self.b1 = nn.MaxPool2d(kernel_size=3, stride=2)

        # branch2: conv3*3(n stride2 valid)
        self.b2 = BasicConv2d(in_channels, n, kernel_size=3, stride=2)

        # branch3: conv1*1(k) --> conv3*3(l) --> conv3*3(m stride2 valid)
        self.b3_1 = BasicConv2d(in_channels, k, kernel_size=1)
        self.b3_2 = BasicConv2d(k, l, kernel_size=3, padding=1)
        self.b3_3 = BasicConv2d(l, m, kernel_size=3, stride=2)

    def forward(self, x):
        y1 = self.b1(x)
        y2 = self.b2(x)
        y3 = self.b3_3(self.b3_2(self.b3_1(x)))

        outputsRedA = [y1, y2, y3]
        return torch.cat(outputsRedA, 1)


class ReductionB(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ReductionB, self).__init__()
        # branch1: maxpool3*3(stride2 valid)
        self.b1 = nn.MaxPool2d(kernel_size=3, stride=2)

        # branch2: conv1*1(192) --> conv3*3(192 stride2 valid)
        self.b2_1 = BasicConv2d(in_channels, 192, kernel_size=1)
        self.b2_2 = BasicConv2d(192, 192, kernel_size=3, stride=2)

        # branch3: conv1*1(256) --> conv1*7(256) --> conv7*1(320) --> conv3*3(320 stride2 valid)
        self.b3_1 = BasicConv2d(in_channels, 256, kernel_size=1)
        self.b3_2 = BasicConv2d(256, 256, kernel_size=(1, 7), padding=(0, 3))
        self.b3_3 = BasicConv2d(256, 320, kernel_size=(7, 1), padding=(3, 0))
        self.b3_4 = BasicConv2d(320, 320, kernel_size=3, stride=2)

    def forward(self, x):
        y1 = self.b1(x)
        y2 = self.b2_2(self.b2_1((x)))
        y3 = self.b3_4(self.b3_3(self.b3_2(self.b3_1(x))))

        outputsRedB = [y1, y2, y3]
        return torch.cat(outputsRedB, 1)


class Stem(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(Stem, self).__init__()
        # conv3*3(32 stride2 valid)
        self.conv1 = BasicConv2d(in_channels, 32, kernel_size=3, stride=2)
        # conv3*3(32 valid)
        self.conv2 = BasicConv2d(32, 32, kernel_size=3)
        # conv3*3(64)
        self.conv3 = BasicConv2d(32, 64, kernel_size=3, padding=1)
        # maxpool3*3(stride2 valid) & conv3*3(96 stride2 valid)
        self.maxpool4 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.conv4 = BasicConv2d(64, 96, kernel_size=3, stride=2)

        # conv1*1(64) --> conv3*3(96 valid)
        self.conv5_1_1 = BasicConv2d(160, 64, kernel_size=1)
        self.conv5_1_2 = BasicConv2d(64, 96, kernel_size=3)
        # conv1*1(64) --> conv7*1(64) --> conv1*7(64) --> conv3*3(96 valid)
        self.conv5_2_1 = BasicConv2d(160, 64, kernel_size=1)
        self.conv5_2_2 = BasicConv2d(64, 64, kernel_size=(7, 1), padding=(3, 0))
        self.conv5_2_3 = BasicConv2d(64, 64, kernel_size=(1, 7), padding=(0, 3))
        self.conv5_2_4 = BasicConv2d(64, 96, kernel_size=3)

        # conv3*3(192 valid)
        self.conv6 = BasicConv2d(192, 192, kernel_size=3, stride=2)
        # maxpool3*3(stride2 valid)
        self.maxpool6 = nn.MaxPool2d(kernel_size=3, stride=2)

    def forward(self, x):
        y1_1 = self.maxpool4(self.conv3(self.conv2(self.conv1(x))))
        y1_2 = self.conv4(self.conv3(self.conv2(self.conv1(x))))
        y1 = torch.cat([y1_1, y1_2], 1)

        y2_1 = self.conv5_1_2(self.conv5_1_1(y1))
        y2_2 = self.conv5_2_4(self.conv5_2_3(self.conv5_2_2(self.conv5_2_1(y1))))
        y2 = torch.cat([y2_1, y2_2], 1)

        y3_1 = self.conv6(y2)
        y3_2 = self.maxpool6(y2)
        y3 = torch.cat([y3_1, y3_2], 1)

        return y3


class Googlenetv4(nn.Module):
    def __init__(self):
        super(Googlenetv4, self).__init__()
        self.stem = Stem(3, 384)
        self.icpA = InceptionA(384, 384)
        self.redA = ReductionA(384, 1024, 192, 224, 256, 384)
        self.icpB = InceptionB(1024, 1024)
        self.redB = ReductionB(1024, 1536)
        self.icpC = InceptionC(1536, 1536)
        self.avgpool = nn.AvgPool2d(kernel_size=8)
        self.dropout = nn.Dropout(p=0.8)
        self.linear = nn.Linear(1536, 11)
        self.soft_max = nn.Softmax()

    def forward(self, x):
        # Stem Module
        out = self.stem(x)
        # InceptionA Module * 4
        out = self.icpA(self.icpA(self.icpA(self.icpA(out))))
        # ReductionA Module
        out = self.redA(out)
        # InceptionB Module * 7
        out = self.icpB(self.icpB(self.icpB(self.icpB(self.icpB(self.icpB(self.icpB(out)))))))
        # ReductionB Module
        out = self.redB(out)
        # InceptionC Module * 3
        out = self.icpC(self.icpC(self.icpC(out)))
        # Average Pooling
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        # Dropout
        out = self.dropout(out)
        # Linear(Softmax)
        out = self.linear(out)
        out = self.soft_max(out)

        return out


class MyModel(torch.nn.Module):
    def __init__(self, model_name='resnext50_32x4d', pretrained=False):
        super(MyModel, self).__init__()
        # define structure of the network here
        self.model = timm.create_model(model_name, pretrained=pretrained)
        n_features = self.model.fc.in_features
        self.model.fc = nn.Linear(n_features, PARAMETER.target_size)

    def forward(self, input):
        # apply network and return output
        x = self.model(input)
        return x


class LoadDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['StudyInstanceUID'].values
        self.transform = transform
        self.labels = df[PARAMETER.target_cols].values

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{path}/train/{file_name}.jpg'
        image = cv2.imread(file_path)
#         image = cv2.resize(image, (512, 512))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        label = torch.tensor(self.labels[idx]).float()
        return image, label


class OutputModel(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['StudyInstanceUID'].values
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{path}/test/{file_name}.jpg'
        image = cv2.imread(file_path)
#         image = cv2.resize(image, (512, 512))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        return image


def get_transforms(*, data):
    if data == 'train':
        return Compose([
            # Resize(CFG.size, CFG.size),
            RandomResizedCrop(PARAMETER.size, PARAMETER.size, scale=(0.85, 1.0)),
            HorizontalFlip(p=0.5),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])

    elif data == 'valid':
        return Compose([
            Resize(PARAMETER.size, PARAMETER.size),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])


def get_score(y_true, y_pred):
    scores = []
    for i in range(y_true.shape[1]):
        score = roc_auc_score(y_true[:, i], y_pred[:, i].round())
        scores.append(score)
    avg_score = np.mean(scores)
    return avg_score, scores



warnings.filterwarnings('ignore')
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print("Using device: {}"
      "\n".format(str(device)))

# loading data

data = pd.read_csv(os.path.join(path, "train.csv"))
data['split_num'] = [random.randint(0,PARAMETER.split_ratio) for _ in range(len(data))]

train = data[data['split_num'] != 1]
train.drop(['split_num'],axis=1,inplace=True)
valid = data[data['split_num'] == 1]
valid.drop(['split_num'], axis=1, inplace=True)

train_dataset = LoadDataset(train, transform=get_transforms(data='train'))
valid_dataset = LoadDataset(valid, transform=get_transforms(data='valid'))


valid_labels = valid[PARAMETER.target_cols].values
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=PARAMETER.batch_size,
                                           shuffle=True,
                                           num_workers=PARAMETER.num_workers, pin_memory=True, drop_last=True)


valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                           batch_size=PARAMETER.batch_size,
                                           shuffle=False,
                                           num_workers=PARAMETER.num_workers, pin_memory=True, drop_last=False)

# for i in range(5):
#     image, label = train_dataset[i]
#     plt.imshow(image[0])
#     plt.title(f'label: {label}')
#     plt.show()

#     net = MyModel().to(device)
net = Googlenetv4().to(device)

criterion = nn.BCEWithLogitsLoss()
optimiser = torch.optim.Adam(net.parameters(), lr=0.003)

# Train.
for epoch in range(PARAMETER.epochs):
    print('\nEpoch: %2d' % (epoch + 1))
    runningloss = 0
    net.train()
    for i, batch in enumerate(train_loader):
        length = len(train_loader)
        inputs, labels = batch
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimiser.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimiser.step()
        runningloss += loss.item()
        if i %32 == 31:
            print(f'正在训练：{(i + 1) * 100 / len(train_loader):.2f}', '%')
            print("Epoch: %2d, Batch: %4d, Loss: %.3f"
                  % (epoch + 1, i + 1, runningloss / 32))
            runningloss = 0
        if i == PARAMETER.stop_train_batch:
            break
    print('Result processing')
    correct = 0

    net.eval()
    for i, batch in enumerate(valid_loader):
        if i % 8 == 7:
            print(f'正在评估：{(i + 1) * 100 / len(valid_loader):.2f}', '%')
        inputs, labels = batch
        inputs = inputs.to(device)
        labels = labels.to(device)
        with torch.no_grad():
            outputs = net(inputs)
        prediction = torch.round(torch.sigmoid(outputs))
        for result in (prediction == labels):
            if result.tolist().count(True) == 11:
                correct += 1
    print(f'total：{len(valid)}  correct: {correct}   acc: {correct*100/len(valid):.2f} %')
        # preds.append(outputs.sigmoid().to('cpu').numpy())

    # predictions = np.concatenate(preds)
    # score, scores = get_score(valid_labels, predictions)
    # print(score)
    # print(scores)

test = pd.read_csv(os.path.join(path, "sample_submission.csv"))
test_dataset = OutputModel(test, transform=get_transforms(data='valid'))
test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=PARAMETER.batch_size,
                                          shuffle=False,
                                          num_workers=PARAMETER.num_workers, pin_memory=True, drop_last=False)
print('\nPredict processing')
net.eval()
preds = []
for i, batch in enumerate(test_loader):
    length = len(train_loader)
    inputs = batch
    inputs = inputs.to(device)
    with torch.no_grad():
        outputs = net(inputs)

    preds.append(outputs.sigmoid().round().to('cpu').numpy())
predictions = np.concatenate(preds)
print('Save prediction')
test[PARAMETER.target_cols] = predictions
test[['StudyInstanceUID'] + PARAMETER.target_cols].to_csv('submission.csv', index=False)
print('Finish')




In [None]:
!pip install --upgrade pip
