In [79]:
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset
import os
from tqdm.notebook import tqdm
import torch.optim as optim
from torch import nn
from torchvision import transforms, datasets, utils
import wandb

In [115]:
leaves_data = pd.read_csv("D:/xxd/classify-leaves/train.csv")

In [116]:
leave_classes = leaves_data["label"].unique()

In [117]:
leave_classes = dict((k, v) for v, k in enumerate(leave_classes)) 

In [118]:
len(leave_classes)

176

In [119]:
leaves_data["label"] = leaves_data["label"].map(leave_classes)

In [120]:
leaves_data = leaves_data.sample(frac=1.0, random_state=1)

In [121]:
len(leaves_data)

18353

In [122]:
leaves_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 18353 entries, 12785 to 235
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   image   18353 non-null  object
 1   label   18353 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 430.1+ KB


In [123]:
train_num = int(0.9 * len(leaves_data))

In [124]:
train_num

16517

In [125]:
train_data = leaves_data.iloc[:train_num]

In [126]:
train_data = train_data.reset_index(drop=True)

In [127]:
val_data = leaves_data.iloc[train_num:]

In [128]:
val_data = val_data.reset_index(drop=True)

In [129]:
class Mydataset(Dataset):
    def __init__(self, leaves_data, transform=None):
        self.data = leaves_data
        self.transform = transform
    
    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, item):
        img = Image.open(os.path.join("D:/xxd/classify-leaves", self.data["image"][item]))
        label = self.data["label"][item]
        if self.transform is not None:
            img = self.transform(img)

        return img, label

In [130]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=1000, init_weights=False):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(48, 128, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(128, 192, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(192, 192, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(192, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(128*6*6, 2048),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(2048, 2048),
            nn.ReLU(inplace=True),
            nn.Linear(2048, num_classes)
        )
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, start_dim=1)
        x = self.classifier(x)
        return x
    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            if isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

In [131]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [132]:
data_transform = {
    "train": transforms.Compose([transforms.RandomResizedCrop(224), 
                                 transforms.RandomHorizontalFlip(),
                                 transforms.ToTensor(), 
                                 transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
    "val": transforms.Compose([transforms.Resize((224, 224)),
                               transforms.ToTensor(), 
                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}

In [107]:
batch_size = 32
train_dataset = Mydataset(train_data, transform=data_transform["train"])
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)

validate_dataset = Mydataset(val_data, transform=data_transform["val"])
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

In [108]:
net = AlexNet(num_classes=176, init_weights=False)
net.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0002)
epochs = 10
save_path = './AlexNet_leaves.pth'
best_acc = 0.0
train_steps = len(train_loader)
val_steps = len(validate_loader)

In [109]:
wandb.init(project="Kaggle-leaves", config={
    "learning_rate": 0.0002,
    "dropout": 0,
    "architecture": "AlexNet",
    "dataset": "Leaves",
})

wandb.watch(net, log_freq=10)

for epoch in range(epochs):
    net.train()
    running_loss = 0.0
    train_bar = tqdm(train_loader)
    acc = 0.0
    for step, data in enumerate(train_bar, start=0):
        images, labels = data
        optimizer.zero_grad()
        outputs = net(images.to(device))
        predict_y = torch.max(outputs, dim=1)[1]
        acc += torch.eq(predict_y, labels.to(device)).sum().item()
        loss = loss_function(outputs, labels.to(device))
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        train_bar.set_description("train epoch[{}/{}] loss:{:.3f}".format(epoch + 1, epochs, loss))
    
    train_accuracy = acc / len(train_dataset)

    net.eval()
    acc = 0.0
    val_running_loss = 0.0
    with torch.no_grad():
        val_bar = tqdm(validate_loader)
        for val_data in val_bar:
            val_images, val_labels = val_data
            outputs = net(val_images.to(device))
            predict_y = torch.max(outputs, dim=1)[1]
            acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
            val_loss = loss_function(outputs, val_labels.to(device))

            val_running_loss += val_loss.item()

    val_accurate = acc / val_num
    print('[epoch %d] train_loss: %.3f  train_accuracy: %.3f  val_loss: %.3f  val_accuracy: %.3f' %
            (epoch + 1, running_loss / train_steps, train_accuracy, val_running_loss / val_steps, val_accurate))
    wandb.log({"epoch": epoch + 1, "train_loss": running_loss / train_steps, "train_acuracy": train_accuracy, "val_loss": val_running_loss / val_steps, "val_accuracy": val_accurate}, step=epoch*len(train_loader))

    if val_accurate > best_acc:
        best_acc = val_accurate
        torch.save(net.state_dict(), save_path)

print('Finished Training')
wandb.finish()

  0%|          | 0/517 [00:00<?, ?it/s]

KeyboardInterrupt: 