In [1]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm

In [2]:
import torch
from torch import nn
from torch import optim
from torch.nn import functional as F
from torch.utils.data import Dataset,DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
from torchvision.io import read_image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import copy
import numpy as np
import pandas as pd
import os

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [4]:
PATH = os.getcwd() + '/gdrive/MyDrive/AIMIA/'

In [5]:
!pip install validators

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting validators
  Downloading validators-0.20.0.tar.gz (30 kB)
Building wheels for collected packages: validators
  Building wheel for validators (setup.py) ... [?25l[?25hdone
  Created wheel for validators: filename=validators-0.20.0-py3-none-any.whl size=19582 sha256=15a9f8236282d998188476a3e3bde08647db7306493d91cc417f564f51fa6721
  Stored in directory: /root/.cache/pip/wheels/5f/55/ab/36a76989f7f88d9ca7b1f68da6d94252bb6a8d6ad4f18e04e9
Successfully built validators
Installing collected packages: validators
Successfully installed validators-0.20.0


In [6]:
utils = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_convnets_processing_utils')

  "You are about to download and run code from an untrusted repository. In a future release, this won't "
Downloading: "https://github.com/NVIDIA/DeepLearningExamples/zipball/torchhub" to /root/.cache/torch/hub/torchhub.zip
  "pytorch_quantization module not found, quantization will not be available"
  "pytorch_quantization module not found, quantization will not be available"


In [7]:
all_data = pd.read_csv(PATH + 'train_data_m.csv')

image_ids = all_data['file_name'].to_numpy()
labels = all_data['target'].to_numpy()

X_train, X_test, y_train, y_test = train_test_split(image_ids, labels, test_size= 0.20, random_state=42)


train_df = pd.concat([pd.Series(X_train),pd.Series(y_train)],axis = 1)

train_df.to_csv(PATH +'train_data.csv',index = False)

test_df = pd.concat([pd.Series(X_test),pd.Series(y_test)],axis = 1)

test_df.to_csv(PATH + 'test_data.csv',index = False)

In [8]:
class PneumothoraxImgDataset(Dataset):
    def __init__(self, annotations_file, img_dir,dim = 256):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform= transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((dim,dim)),
            transforms.ToTensor()
        ])
        
    def __len__(self):
        return len(self.img_labels)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)

        return image, label

In [9]:
Test_Dataset = PneumothoraxImgDataset(PATH + 'test_data.csv',PATH + 'small_train_data_set')

In [10]:
class TV_Dataset(Dataset):
    def __init__(self, file_names, labels, img_dir,dim = 256):
        self.img_name = file_names
        self.labels = labels
        self.img_dir = img_dir
        self.transform= transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((dim,dim)),
            transforms.ToTensor()
        ])
        
    def __len__(self):
        return len(self.img_name)
    
    def __getitem__(self,idx):
        img_path = os.path.join(self.img_dir, self.img_name[idx])
        image = read_image(img_path)
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        
        return image, label 

In [11]:
def train_val_dataset(train_path,path_dir):
    
    all_data = pd.read_csv(train_path)
    
    image_ids = all_data['0'].to_numpy()
    labels = all_data['1'].to_numpy()
    
    X_train, X_val, y_train, y_val = train_test_split(image_ids, labels, test_size= 0.20)
    
    train_dataset = TV_Dataset(X_train, y_train, path_dir)
    val_dataset = TV_Dataset(X_val, y_val, path_dir)
    
    return train_dataset, val_dataset    

In [12]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [13]:
if device =='cuda':
  print(torch.cuda.memory_summary(device=device, abbreviated=False))

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |       0 B  |       0 B  |       0 B  |       0 B  |
|       from large pool |       0 B  |       0 B  |       0 B  |       0 B  |
|       from small pool |       0 B  |       0 B  |       0 B  |       0 B  |
|---------------------------------------------------------------------------|
| Active memory         |       0 B  |       0 B  |       0 B  |       0 B  |
|       from large pool |       0 B  |       0 B  |       0 B  |       0 B  |
|       from small pool |       0 B  |       0 B  |       0 B  |       0 B  |
|---------------------------------------------------------------

In [14]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.classes = 2
        self.efficientnet = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_efficientnet_b0', pretrained=False)
        self.efficientnet.stem.conv = nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        self.efficientnet.classifier.fc = nn.Linear(1280, self.classes, bias = True)
        
    
    def forward(self,x):
        return self.efficientnet(x)
    
    

In [15]:
model = NeuralNetwork().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

Using cache found in /root/.cache/torch/hub/NVIDIA_DeepLearningExamples_torchhub


In [24]:
def train(model, criterion, optimizer, num_of_epochs):
    
#     best_model_wts = copy.deepcopy(model.state_dict())
    
    best_acc = 0.0
    
    for _, epoch in tqdm(enumerate(range(num_of_epochs))):
        print(f'\nEpoch {epoch + 1}/{num_of_epochs}')
        
        model.train()
        
        running_loss = 0.
        running_accuracy = 0.
        
        
        train_dataset, val_dataset = train_val_dataset(PATH + 'train_data.csv', PATH + 'small_train_data_set')
        
        train_loader = DataLoader(train_dataset,batch_size=64)
        val_loader = DataLoader(val_dataset,batch_size=32)
        
        print('-----------Trainning in Progress --------------')
        for idx, data in tqdm(enumerate(train_loader),total = len(train_loader), position=0, leave=True):
            images, labels = data
            images = images.type(torch.float32).to(device)
            optimizer.zero_grad()
            
            outputs = model(images)
            labels = labels.type(torch.LongTensor).to(device)
            
            _ , preds = torch.max(outputs, 1)
            
            loss = criterion(outputs,labels)
            loss.backward()
            
            optimizer.step()
            
            running_loss += loss.item()*images.size(0)
            running_accuracy += torch.sum(preds == labels.data)

        epoch_loss = running_loss/len(train_dataset)
        epoch_accuracy = running_accuracy/len(train_dataset)
        
        print(f'Training Loss: {epoch_loss:.6f} Training Acc.: {epoch_accuracy:.6f}')
        
        model.eval()

        running_loss = 0
        running_accuracy = 0
        
        print('-----------Validation in Progress --------------')

        for idx, data in tqdm(enumerate(val_loader),total = len(val_loader), position=0, leave=True):
            images, labels = data
            images = images.type(torch.float32).to(device)
            
            outputs = model(images)
            labels = labels.type(torch.LongTensor).to(device)
            
            loss = criterion(outputs,labels)
            
            _ , preds = torch.max(outputs, 1)
            
            running_loss += loss.item()*images.size(0)
            running_accuracy += torch.sum(preds == labels.data)
        
        val_loss = running_loss/len(val_dataset)
        val_accuracy = running_accuracy/len(val_dataset)
        print(f'\nVal Loss: {val_loss:.4f} Val Acc.: {val_accuracy:.4f}\n')
    

    return  model

In [25]:
trianed_model = train(model, criterion, optimizer, num_of_epochs=3)

0it [00:00, ?it/s]


Epoch 1/3
-----------Trainning in Progress --------------


100%|██████████| 21/21 [00:25<00:00,  1.20s/it]


Training Loss: 0.429615 Training Acc.: 0.819444
-----------Validation in Progress --------------


100%|██████████| 11/11 [00:05<00:00,  2.18it/s]
1it [00:30, 30.29s/it]


Val Loss: 0.5133 Val Acc.: 0.7908


Epoch 2/3
-----------Trainning in Progress --------------


100%|██████████| 21/21 [00:25<00:00,  1.20s/it]


Training Loss: 0.351661 Training Acc.: 0.846451
-----------Validation in Progress --------------


100%|██████████| 11/11 [00:04<00:00,  2.21it/s]
2it [01:00, 30.19s/it]


Val Loss: 0.4965 Val Acc.: 0.8031


Epoch 3/3
-----------Trainning in Progress --------------


100%|██████████| 21/21 [00:26<00:00,  1.25s/it]


Training Loss: 0.194139 Training Acc.: 0.929012
-----------Validation in Progress --------------


100%|██████████| 11/11 [00:05<00:00,  2.19it/s]
3it [01:31, 30.55s/it]


Val Loss: 0.5334 Val Acc.: 0.8062






In [26]:
def test(model, criterion):
  test_loader = DataLoader(Test_Dataset, batch_size=32)
  model.eval()
  running_loss = 0
  running_accuracy = 0
  print('-------Testing Model------------')
  for idx, data in tqdm(enumerate(test_loader),total = len(test_loader), position=0, leave=True):
    images, labels = data
    images = images.to(device)
    
    outputs = model(images)
    labels = labels.type(torch.LongTensor).to(device)
    
    loss = criterion(outputs,labels)
    
    _ , preds = torch.max(outputs, 1)
    
    running_loss += loss.item()*images.size(0)
    running_accuracy += torch.sum(preds == labels.data)
        
  test_loss = running_loss/len(Test_Dataset)
  test_accuracy = running_accuracy/len(Test_Dataset)


  print(f'\nTest Loss: {test_loss:.5f} Test Acc.: {test_accuracy:.5f}\n')

In [27]:
test(trianed_model, criterion)

-------Testing Model------------


100%|██████████| 13/13 [00:06<00:00,  2.01it/s]


Test Loss: 0.61701 Test Acc.: 0.76847




