In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data_utils

from torch.autograd import Variable
from torchvision import datasets, models, transforms

In [2]:
import numpy as np
import os

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [53]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(30),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

In [5]:
data_dir = 'data'

In [23]:
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}

In [7]:
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=32, shuffle=True) for x in ['train', 'val']}

In [8]:
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
dataset_sizes

{'train': 4279, 'val': 471}

In [9]:
class_names = image_datasets['train'].classes
class_names

['Black-grass',
 'Charlock',
 'Cleavers',
 'Common Chickweed',
 'Common wheat',
 'Fat Hen',
 'Loose Silky-bent',
 'Maize',
 'Scentless Mayweed',
 'Shepherds Purse',
 'Small-flowered Cranesbill',
 'Sugar beet']

In [10]:
model = models.resnet34(pretrained=True)

In [11]:
criterion = nn.CrossEntropyLoss()

In [12]:
optimizer = optim.Adam(model.parameters(), lr=0.005)

In [13]:
def train(model, dataloader, optimizer, criterion, epoch):
    model.train()
    
    for batch_idx, (x, y) in enumerate(dataloader):
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad()
        prediction = model(x)

        loss = criterion(prediction, y)
        loss.backward()

        optimizer.step() 
        
        if batch_idx % 20 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(x), len(dataloader.dataset),
                100. * batch_idx / len(dataloader), loss.data.item()))

In [14]:
def val(model, dataloader, criterion):
    model.eval()
    
    val_loss = 0
    correct = 0
    
    with torch.no_grad():
        for x, y in dataloader:
            x, y = x.to(device), y.to(device)
            
            prediction = model(x)
            val_loss += criterion(prediction, y).item()
            
            y_hat = prediction.max(1, keepdim=True)[1]
            correct += y_hat.eq(y.view_as(y_hat)).sum().item()
    
    val_loss /= len(dataloader.dataset)
    
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        val_loss, correct, len(dataloader.dataset),
        100. * correct / len(dataloader.dataset)))

In [15]:
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(class_names))

model.to(device);

In [16]:
for epoch in range(1, 50+1):
    train(model, dataloaders['train'], optimizer, criterion, epoch)
    val(model, dataloaders['val'], criterion)
    for param_group in optimizer.param_groups:
        param_group['lr'] *= 0.9


Test set: Average loss: 0.0626, Accuracy: 177/471 (38%)


Test set: Average loss: 0.0543, Accuracy: 198/471 (42%)


Test set: Average loss: 0.0378, Accuracy: 271/471 (58%)


Test set: Average loss: 0.0298, Accuracy: 323/471 (69%)


Test set: Average loss: 0.0379, Accuracy: 314/471 (67%)


Test set: Average loss: 0.0285, Accuracy: 330/471 (70%)


Test set: Average loss: 0.0160, Accuracy: 384/471 (82%)


Test set: Average loss: 0.0232, Accuracy: 361/471 (77%)


Test set: Average loss: 0.0131, Accuracy: 405/471 (86%)


Test set: Average loss: 0.0152, Accuracy: 400/471 (85%)


Test set: Average loss: 0.0116, Accuracy: 414/471 (88%)


Test set: Average loss: 0.0102, Accuracy: 416/471 (88%)


Test set: Average loss: 0.0111, Accuracy: 421/471 (89%)


Test set: Average loss: 0.0086, Accuracy: 433/471 (92%)


Test set: Average loss: 0.0114, Accuracy: 418/471 (89%)


Test set: Average loss: 0.0098, Accuracy: 429/471 (91%)


Test set: Average loss: 0.0073, Accuracy: 436/471 (93%)


Test set: Ave


Test set: Average loss: 0.0076, Accuracy: 438/471 (93%)


Test set: Average loss: 0.0087, Accuracy: 430/471 (91%)


Test set: Average loss: 0.0062, Accuracy: 447/471 (95%)


Test set: Average loss: 0.0055, Accuracy: 451/471 (96%)


Test set: Average loss: 0.0060, Accuracy: 446/471 (95%)


Test set: Average loss: 0.0059, Accuracy: 452/471 (96%)


Test set: Average loss: 0.0065, Accuracy: 445/471 (94%)


Test set: Average loss: 0.0053, Accuracy: 448/471 (95%)


Test set: Average loss: 0.0057, Accuracy: 454/471 (96%)


Test set: Average loss: 0.0053, Accuracy: 450/471 (96%)


Test set: Average loss: 0.0048, Accuracy: 456/471 (97%)


Test set: Average loss: 0.0053, Accuracy: 451/471 (96%)


Test set: Average loss: 0.0062, Accuracy: 452/471 (96%)


Test set: Average loss: 0.0056, Accuracy: 454/471 (96%)


Test set: Average loss: 0.0052, Accuracy: 455/471 (97%)


Test set: Average loss: 0.0051, Accuracy: 455/471 (97%)


Test set: Average loss: 0.0050, Accuracy: 457/471 (97%)


Test set: Ave


Test set: Average loss: 0.0050, Accuracy: 459/471 (97%)


Test set: Average loss: 0.0048, Accuracy: 456/471 (97%)


Test set: Average loss: 0.0051, Accuracy: 456/471 (97%)


Test set: Average loss: 0.0051, Accuracy: 458/471 (97%)


Test set: Average loss: 0.0048, Accuracy: 457/471 (97%)


Test set: Average loss: 0.0051, Accuracy: 457/471 (97%)


Test set: Average loss: 0.0049, Accuracy: 457/471 (97%)


Test set: Average loss: 0.0046, Accuracy: 458/471 (97%)


Test set: Average loss: 0.0050, Accuracy: 457/471 (97%)



In [17]:
torch.save(model, 'models/resnet34-50.pt')

In [27]:
test_folder = !ls data/test

In [30]:
from PIL import Image

In [54]:
result = []
for test_image_file in test_folder:
    test_image = Image.open(f'data/test/{test_image_file}')
    test_image_tensor = data_transforms['test'](test_image)
    test_image_tensor.unsqueeze_(0)
    predict = model(test_image_tensor.to(device))
    result.append({'file': test_image_file, 'species': class_names[np.array(predict.max(1)[1][0])]})

In [55]:
import pandas as pd

In [56]:
submit = pd.DataFrame(data=result)

In [57]:
submit.head()

Unnamed: 0,file,species
0,0021e90e4.png,Small-flowered Cranesbill
1,003d61042.png,Fat Hen
2,007b3da8b.png,Sugar beet
3,0086a6340.png,Common Chickweed
4,00c47e980.png,Sugar beet


In [58]:
len(submit)

794

In [59]:
submit.to_csv('submit-resnet34-50.csv', index=False)

In [62]:
#97.1% on test