In [1]:
import torch
import torch.nn as nn
import torch.optim as optim 
import torchvision
from torchvision import models
from torch.utils.data import DataLoader, Dataset
import torch.utils.data as utils
from torchvision import transforms
import torch.nn.functional as F
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from Data_Retriever import ImageData
from Resnet_UNet import ResNetUNet
from Metrics import Metrics
import time
import os
from tqdm import tqdm
from Get_Data_Classification import Classification_Data
from Data_Retriever_Classification import ImageDataset
from sklearn.metrics import precision_score, recall_score, f1_score
import gc

In [2]:
CUDA_LAUNCH_BLOCKING="1"
print(torch.cuda.current_device())
print(torch.cuda.memory_allocated())

0
0


In [3]:
classification_object = Classification_Data("train.csv")
data = classification_object.df
data.head()

Unnamed: 0,ImageId,file_path,0,1,2,3,4
0,0002cc93b.jpg,./train_images/0002cc93b.jpg,0,1,0,0,0
1,00031f466.jpg,./train_images/00031f466.jpg,1,0,0,0,0
2,000418bfc.jpg,./train_images/000418bfc.jpg,1,0,0,0,0
3,000789191.jpg,./train_images/000789191.jpg,1,0,0,0,0
4,0007a71bf.jpg,./train_images/0007a71bf.jpg,0,0,0,1,0


In [4]:
train_df, test_df = train_test_split(data, test_size = 0.33, random_state=123)

In [5]:
len(test_df)

4148

In [6]:
BATCH = 10
train_data_retriever = ImageDataset(
    train_df['file_path'].tolist(),
    np.array(train_df.drop(['ImageId','file_path'], axis=1))
)

test_data_retriever = ImageDataset(
    test_df['file_path'].tolist(),
    np.array(test_df.drop(['ImageId','file_path'], axis=1))
)
torch.manual_seed(42)
train_loader = DataLoader(train_data_retriever, batch_size=BATCH, shuffle=True)
test_loader = DataLoader(test_data_retriever, batch_size=BATCH, shuffle=True)

In [7]:
del classification_object, data, train_df, test_df
gc.collect()

97

In [8]:
class Resnext50(nn.Module):
    def __init__(self, n_classes=None):
        super().__init__()
        resnet = models.resnet50(pretrained=True)
        for param in resnet.parameters():
            param.requires_grad = False
        resnet.fc = nn.Sequential(nn.Dropout(p=0.2),
                                  nn.Linear(in_features=resnet.fc.in_features, 
                                            out_features=n_classes))
        self.base_model = resnet
        self.sigm = nn.Sigmoid()
    
    
    def forward(self, x):
        return self.sigm(self.base_model(x))
    
# Initialize the model
model = Resnext50(n_classes=5)

In [9]:
print(model)

Resnext50(
  (base_model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace)
        (downsample): Sequential(
          (0): 

In [10]:
# Use threshold to define predicted labels and invoke sklearn's metrics with different averaging strategies.
def calculate_metrics(pred, target, threshold=0.5):
    pred = np.array(pred > threshold, dtype=float)
    return {'micro/precision': precision_score(y_true=target, y_pred=pred, average='micro'),
            'micro/recall': recall_score(y_true=target, y_pred=pred, average='micro'),
            'micro/f1': f1_score(y_true=target, y_pred=pred, average='micro'),
            'macro/precision': precision_score(y_true=target, y_pred=pred, average='macro'),
            'macro/recall': recall_score(y_true=target, y_pred=pred, average='macro'),
            'macro/f1': f1_score(y_true=target, y_pred=pred, average='macro'),
            'samples/precision': precision_score(y_true=target, y_pred=pred, average='samples'),
            'samples/recall': recall_score(y_true=target, y_pred=pred, average='samples'),
            'samples/f1': f1_score(y_true=target, y_pred=pred, average='samples'),
            }

In [11]:
# training function
def train(model, dataloader, optimizer, criterion, train_data, device):
    print('Training')
    model.train()
    counter = 0
    train_running_loss = 0.0
    model_results = []
    model_targets = []
    for i, data in tqdm(enumerate(dataloader), total=int(len(train_data)/dataloader.batch_size)):
        counter += 1
        #if counter == 2:
            #break
        data, target = data['image'].to(device), data['label'].to(device)
        optimizer.zero_grad()
        outputs = model(data)
        # apply sigmoid activation to get all the outputs between 0 and 1
        outputs = torch.sigmoid(outputs)
        #print(outputs)
        loss = criterion(outputs, target.type(torch.float))
        train_running_loss += loss.item()
        # backpropagation
        loss.backward()
        # update optimizer parameters
        optimizer.step()
        model_results.extend(outputs.cpu().detach().numpy())
        model_targets.extend(target.cpu().detach().numpy())
    result = calculate_metrics(np.array(model_results), np.array(model_targets))     
    train_loss = train_running_loss / counter
    return train_loss, result

In [12]:
# validation function
def validate(model, dataloader, criterion, val_data, device):
    print('Validating')
    model.eval()
    counter = 0
    val_running_loss = 0.0
    model_results = []
    model_targets = []
    with torch.no_grad():
        for i, data in tqdm(enumerate(dataloader), total=int(len(val_data)/dataloader.batch_size)):
            counter += 1
            #if counter == 2:
                #break
            data, target = data['image'].to(device), data['label'].to(device)
            outputs = model(data)
            # apply sigmoid activation to get all the outputs between 0 and 1
            outputs = torch.sigmoid(outputs)
            loss = criterion(outputs, target.type(torch.float))
            val_running_loss += loss.item()
            model_results.extend(outputs.cpu().detach().numpy())
            model_targets.extend(target.cpu().detach().numpy())
        
        result = calculate_metrics(np.array(model_results), np.array(model_targets))

        
        val_loss = val_running_loss / counter
        return val_loss, result

In [13]:
# initialize the computation device
CUDA_VISIBLE_DEVICES=0,1 
device = torch.device("cuda:0,1" if torch.cuda.is_available() else "cpu")
print(device)
print(torch.cuda.current_device())
print(torch.cuda.memory_allocated())

cuda:0
0
0


In [14]:
#intialize the model
model= nn.DataParallel(model)
model = model.to(device)
# learning parameters
lr = 0.01
epochs = 1
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.BCELoss()

In [15]:
gc.collect()
torch.cuda.empty_cache()

In [16]:
# start the training and validation
train_loss = []
valid_loss = []
train_result = {}
test_result = {}

PATH = 'classification_checkpoint.pth'

if os.path.exists(PATH):
    checkpoint = torch.load(PATH)
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    epochs = checkpoint['epoch']
    train_loss = checkpoint['train_loss']
    valid_loss = checkpoint['valid_loss']

for epoch in range(epochs):
    print(f"Epoch {epoch+1} of {epochs}")
    train_epoch_loss, train_result = train(
        model, train_loader, optimizer, criterion, train_data_retriever, device
    )
    valid_epoch_loss, test_result = validate(
        model, test_loader, criterion, test_data_retriever, device
    )
    train_loss.append(train_epoch_loss)
    valid_loss.append(valid_epoch_loss)
    
    checkpoint = {'epoch': epoch,
                  'state_dict': model.state_dict(),
                  'optimizer' : optimizer.state_dict(),
                  'train_loss' : train_loss,
                  'valid_loss' : valid_loss}

    torch.save(checkpoint, PATH)
    
    print(f"Train Loss: {train_epoch_loss:.4f}")
    print("micro f1: {:.3f} "
          "macro f1: {:.3f} "
          "samples f1: {:.3f}".format(train_result['micro/f1'],
                                      train_result['macro/f1'],
                                      train_result['samples/f1']))
    print(f'Val Loss: {valid_epoch_loss:.4f}')
    print("micro f1: {:.3f} "
          "macro f1: {:.3f} "
          "samples f1: {:.3f}".format(test_result['micro/f1'],
                                      test_result['macro/f1'],
                                      test_result['samples/f1']))
    

  0%|                                                                                          | 0/842 [00:00<?, ?it/s]

Epoch 1 of 1
Training


  'image': torch.tensor(image, dtype=torch.float32),
100%|██████████████████████████████████████████████████████████████████████████████| 842/842 [5:32:46<00:00, 23.71s/it]
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  0%|                                                                                          | 0/414 [00:00<?, ?it/s]

Validating


415it [50:00,  7.23s/it]                                                                                               

Train Loss: 0.6934
micro f1: 0.007 macro f1: 0.004 samples f1: 0.001
Val Loss: 0.6931
micro f1: 0.000 macro f1: 0.000 samples f1: 0.000



  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [17]:
torch.save(model.state_dict(), 'Classification.pt')