In [1]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import datasets, transforms, models
import torch.optim as optim
import matplotlib.pyplot as plt
import torch.utils.data as data
import os


In [2]:
TRAIN_DATA_PATH = "data_train_val_split/train/"
VAL_DATA_PATH = "data_train_val_split/val/"
TEST_DATA_PATH =  "data_train_val_split/test/"

In [3]:
# data transform, you can add different transform methods and resize image to any size
img_size = 224
train_transform = transforms.Compose([
                                    transforms.Resize((img_size,img_size)),
                                    transforms.RandomHorizontalFlip(p=0.5),
                                    transforms.RandomResizedCrop(size=200),
    #######
    transforms.RandomAffine(0, shear=0.2),
                                      transforms.RandomAffine(0, translate=(0.2, 0.2)),
                                      transforms.RandomAffine(0, scale=(1, 1.2)),
    transforms.RandomRotation(40),
    #######
                                    transforms.ToTensor(),
                                    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                    ])
val_transform = transforms.Compose([
                                    transforms.Resize((img_size,img_size)),
                                    transforms.ToTensor(),
                                    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                    ])


train_dataset = datasets.ImageFolder(root=TRAIN_DATA_PATH, transform=train_transform)
val_dataset = datasets.ImageFolder(root=VAL_DATA_PATH, transform=val_transform)
# dataset = datasets.ImageFolder(root=TRAIN_DATA_PATH,transform=train_transform)

# spilt data into train and validation and the total number of image is 4276. You can decide the number of images
# you want to use to do training and validation.
TOTAL_SIZE = len(os.listdir(TRAIN_DATA_PATH + "/NORMAL")) + len(
    os.listdir(TRAIN_DATA_PATH + "/INFECTED")
)


# spilt your data into train and val
ratio = 0.8
train_len = round(TOTAL_SIZE * ratio)
valid_len = round(TOTAL_SIZE * (1-ratio))

# train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_len, valid_len])

# you can use different batch size
train_data_loader = data.DataLoader(train_dataset, batch_size=128, shuffle=True,  num_workers=4)
val_data_loader = data.DataLoader(val_dataset, batch_size=300, shuffle=True,  num_workers=4)
# print(dataset)
# print(dataset.class_to_idx)

In [4]:
# I have written the function for you this time, but it's strongly recommended that you 
# understand how to do training and validation


def train(model, data_loader, optimizer, scheduler, epoch, verbose=True):
    model.train()
    loss_avg = 0.0
    for batch_idx, (data, target) in enumerate(data_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        
        # loss function, you can use other loss function if you want
        loss   = F.cross_entropy(output, target)
        loss_avg = loss.item()
        
        # do back propagation
        loss.backward()
        optimizer.step()
        scheduler.step()
        verbose_step = len(data_loader) // 10
        if batch_idx % verbose_step == 0 and verbose:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(data_loader.dataset),
                100. * batch_idx / len(data_loader), loss.item()))
        
    return loss_avg / len(data_loader)

def valid(model, data_loader):
    with torch.no_grad():
        model.eval()
        valid_loss = 0
        correct = 0
        for data, target in data_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            valid_loss += F.cross_entropy(output, target, reduction='sum').item() # sum up batch loss
            pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.data.view_as(pred)).cpu().sum().item() 

        valid_loss /= len(data_loader.dataset)
        print('\nValid set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            valid_loss, correct, len(data_loader.dataset),
            100. * correct / len(data_loader.dataset)))
    return float(correct) / len(data_loader.dataset)

In [5]:
############## Build the model here ##########
# class ConvNet(nn.Module):
#     def __init__(self,num_classes=2):
    
#         super(ConvNet, self).__init__()
    
        

#     def forward(self, x):
        
#         return out




In [6]:
# using gpu if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [7]:
####################  implement your optimizer ###################################
## yo can use any training methods if you want (ex:lr decay, weight decay.....)

model = models.resnet18(pretrained=True)

lt=8
cntr=0

for child in model.children():
    cntr+=1
    if cntr < lt:
        for param in child.parameters():
            param.requires_grad = False
            
model.fc = nn.Linear(model.fc.in_features, 2)

if torch.cuda.is_available():
    model.to(device)
else: print("NO GPU!!!!")

lr=1e-3
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

# start training
epochs = 25
acc = 0.0
for epoch in range(epochs):
    model.train()
    train(model, train_data_loader, optimizer, scheduler, epoch)
    accuracy = valid(model, val_data_loader)
    if accuracy > acc:
        acc = accuracy
        print("-------------saving model--------------")
        # save the model
        torch.save(model, "model.pth")


Valid set: Average loss: 0.2879, Accuracy: 510/560 (91%)

-------------saving model--------------

Valid set: Average loss: 0.2659, Accuracy: 519/560 (93%)

-------------saving model--------------

Valid set: Average loss: 0.2654, Accuracy: 519/560 (93%)


Valid set: Average loss: 0.2719, Accuracy: 519/560 (93%)


Valid set: Average loss: 0.2672, Accuracy: 520/560 (93%)

-------------saving model--------------

Valid set: Average loss: 0.2717, Accuracy: 520/560 (93%)


Valid set: Average loss: 0.2664, Accuracy: 520/560 (93%)


Valid set: Average loss: 0.2794, Accuracy: 519/560 (93%)


Valid set: Average loss: 0.2601, Accuracy: 519/560 (93%)


Valid set: Average loss: 0.2728, Accuracy: 519/560 (93%)


Valid set: Average loss: 0.2684, Accuracy: 520/560 (93%)




Valid set: Average loss: 0.2652, Accuracy: 519/560 (93%)


Valid set: Average loss: 0.2682, Accuracy: 520/560 (93%)


Valid set: Average loss: 0.2674, Accuracy: 520/560 (93%)


Valid set: Average loss: 0.2706, Accuracy: 520/560 (93%)


Valid set: Average loss: 0.2648, Accuracy: 519/560 (93%)


Valid set: Average loss: 0.2662, Accuracy: 519/560 (93%)


Valid set: Average loss: 0.2601, Accuracy: 520/560 (93%)


Valid set: Average loss: 0.2696, Accuracy: 519/560 (93%)


Valid set: Average loss: 0.2650, Accuracy: 519/560 (93%)


Valid set: Average loss: 0.2688, Accuracy: 519/560 (93%)


Valid set: Average loss: 0.2702, Accuracy: 520/560 (93%)


Valid set: Average loss: 0.2584, Accuracy: 520/560 (93%)




Valid set: Average loss: 0.2730, Accuracy: 519/560 (93%)


Valid set: Average loss: 0.2671, Accuracy: 519/560 (93%)



In [8]:
test_transform = transforms.Compose([transforms.Resize((img_size,img_size)),
                                    transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                    ])
test_data = datasets.ImageFolder(root=TEST_DATA_PATH,transform=test_transform)
test_data_loader = data.DataLoader(test_data, batch_size=64, shuffle=False, num_workers=4) 

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# load the model so that you don't need to train the model again
test_model = torch.load("model.pth").to(device)

In [10]:
def test(model,data_loader):
    with torch.no_grad():
        model.eval()
        valid_loss = 0
        correct = 0
        bs = test_data_loader.batch_size
        result = []
        for i, (data, target) in enumerate(test_data_loader):
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
            arr = pred.data.cpu().numpy()
            for j in range(pred.size()[0]):
                file_name = test_data.samples[i*bs+j][0].split('/')[-1]
                result.append((file_name,pred[j].cpu().numpy()[0]))
    return result

In [11]:
result = test(test_model,test_data_loader)

# Write results to csv

In [12]:
with open ('ID_result.csv','w') as f:
    f.write('ID,label\n')
    for data in result:
        f.write(data[0]+','+str(data[1])+'\n')