In [1]:
import torch
import torchvision
import torchvision.transforms as T
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm

In [35]:
import os
import pandas as pd
from torchvision.io import read_image

class CustomImageDataset(torch.utils.data.Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None, valsplit=0.2, train=True):
        self.img_labels = pd.read_csv(annotations_file)
        length = len(self.img_labels)
        if train:
            self.img_labels = self.img_labels[:int(length*(1-valsplit))]
        else:
            self.img_labels = self.img_labels[int(length*(1-valsplit)):]
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0] + '.jpg')
        image = read_image(img_path)
        label = torch.tensor([
            self.img_labels.iloc[idx, 1].astype(float),
            self.img_labels.iloc[idx, 2].astype(float),
            self.img_labels.iloc[idx, 3].astype(float),
            self.img_labels.iloc[idx, 4].astype(float)
        ])
        #label = self.img_labels.iloc[idx, 1].astype(float)
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [36]:
transformTr = T.Compose([
    T.Resize(size=(300, 300)),
    T.AutoAugment(),
    T.ConvertImageDtype(torch.float),
    T.Normalize((0,0,0), (255, 255, 255))])

transformVal = T.Compose([
    T.Resize(size=(300, 300)),
    T.ConvertImageDtype(torch.float),
    T.Normalize((0,0,0), (255, 255, 255))])

batch_size = 30

trainset = CustomImageDataset(
    annotations_file='./plant/train.csv', 
    img_dir='./plant/train', 
    transform=transformTr,
    train=True)

trainloader = torch.utils.data.DataLoader(
    trainset, 
    batch_size=batch_size, 
    shuffle=True,
    num_workers=0
)

valset = CustomImageDataset(
    annotations_file='./plant/train.csv', 
    img_dir='./plant/train', 
    transform=transformVal,
    train=False)

valloader = torch.utils.data.DataLoader(
    valset, 
    batch_size=batch_size, 
    shuffle=True,
    num_workers=0
)

In [37]:
i, l = trainset.__getitem__(122)
print(l)

tensor([0., 1., 0., 0.], dtype=torch.float64)


In [38]:
import torch.nn as nn
import torch.nn.functional as F

class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 64, 5)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(64, 64, 5)
        self.pool2 = nn.MaxPool2d(2,2)
        self.conv3 = nn.Conv2d(64, 128, 5)
        self.pool3 = nn.MaxPool2d(2,2)
        self.conv4 = nn.Conv2d(128, 128, 5)
        self.pool4 = nn.MaxPool2d(2,2)
        self.dropout = nn.Dropout(0.5)
        self.dense1 = nn.Linear(15*15*128, 512)
        self.dense2 = nn.Linear(512, 4)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = F.relu(self.conv3(x))
        x = self.pool3(x)
        x = F.relu(self.conv4(x))
        x = self.pool4(x)
        x = torch.flatten(x,1)
        x = self.dropout(x)
        x = F.relu(self.dense1(x))
        x = self.dense2(x)
        #x = self.dense2(x)
        #print(x.shape)
        return x

net = ConvNet()

In [40]:
criterion = nn.CrossEntropyLoss()
#optimizer = optim.RMSprop(net.parameters(), lr=1e-4, momentum=0.9)
optimizer = optim.Adam(net.parameters())

In [42]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)

ConvNet(
  (conv1): Conv2d(3, 64, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1))
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv2d(128, 128, kernel_size=(5, 5), stride=(1, 1))
  (pool4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout): Dropout(p=0.5, inplace=False)
  (dense1): Linear(in_features=28800, out_features=512, bias=True)
  (dense2): Linear(in_features=512, out_features=4, bias=True)
)

In [46]:
epoch_num = 10
for epoch in range(epoch_num):
    tr_loss = 0.0
    tr_correct = 0.0
    i = 0
    val_loss = 0.0
    val_correct = 0.0
    j = 0
    with tqdm(trainloader, unit="batch", ascii=True) as tepoch:
        for data in tepoch:
            i = i + 1
            tepoch.set_description(f"Epock {epoch + 1}")
            inputs, labels = data
            labels = labels.to(device)
            inputs = inputs.to(device)

            optimizer.zero_grad()

            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            predicted = torch.max(outputs.data, 1)
            tr_correct += (predicted[1] == torch.max(labels.data, 1)[1]).sum().item()
            #tr_correct += (outputs == labels).sum().item()

            tr_loss += loss.item()
            tepoch.set_postfix(tr_loss=tr_loss/i, tr_acc=tr_correct/i/batch_size)
        
        
        with torch.no_grad():
            for val_data in valloader:
                j = j + 1
                val_input, val_label = val_data
                val_input = val_input.to(device)
                val_label = val_label.to(device)
                val_output = net(val_input)
                vloss = criterion(val_output, val_label)
                val_loss += vloss
                predicted = torch.max(val_output.data, 1)
                val_correct += (predicted[1] == torch.max(val_label.data, 1)[1]).sum().item()
                #val_correct += (val_output == val_label).sum().item()
        print(f"val_loss={val_loss/j}, val_acc={val_correct/j/batch_size}")

Epock 1: 100%|##########| 49/49 [01:14<00:00,  1.52s/batch, tr_acc=0.344, tr_loss=1.25]


val_loss=1.2911095926394829, val_acc=0.28205128205128205


Epock 2: 100%|##########| 49/49 [01:14<00:00,  1.52s/batch, tr_acc=0.305, tr_loss=1.24]


val_loss=1.2524010951702411, val_acc=0.28205128205128205


Epock 3: 100%|##########| 49/49 [01:14<00:00,  1.52s/batch, tr_acc=0.331, tr_loss=1.24]


KeyboardInterrupt: 

In [None]:
itr1 = iter(valloader)
image, label = itr1.next()
print(torch.max(label, 1))

torch.return_types.max(
values=tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], dtype=torch.float64),
indices=tensor([0, 2, 0, 0, 2, 1, 2, 0, 2, 2, 0, 3, 0, 2, 2, 2, 2, 3, 0, 2, 3, 0, 3, 2,
        3, 0, 0, 0, 3, 2]))
