In [1]:
import os
import json
from google.colab import drive

drive.mount('/content/drive')
os.chdir("./drive/MyDrive/git_project/ImageClassification/src")

f = open("../../kaggle.json", 'r')
json_data = json.load(f)
os.environ['KAGGLE_USERNAME'] = json_data['username']
os.environ['KAGGLE_KEY'] = json_data['key']

# !mkdir ../data
# !kaggle competitions download -c llm-detect-ai-generated-text -p ../data
# !unzip ../data/llm-detect-ai-generated-text -d ../data

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Import module

In [2]:
!pip install adamp

Collecting adamp
  Downloading adamp-0.3.0.tar.gz (5.1 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: adamp
  Building wheel for adamp (setup.py) ... [?25l[?25hdone
  Created wheel for adamp: filename=adamp-0.3.0-py3-none-any.whl size=5983 sha256=8ce90d8e9b22d6d15d537d66c57fb2770f8d099f3433d07d3a5e1aad1c012b18
  Stored in directory: /root/.cache/pip/wheels/c7/ad/0f/b41b1c45b18c66e5eef5d2254415af8055c7e2b0934145157d
Successfully built adamp
Installing collected packages: adamp
Successfully installed adamp-0.3.0


In [3]:
import torch
import pickle
import torchvision.transforms as transforms
import torch.nn as nn
from torch.utils.data import DataLoader
from utils import seed_everything, result2df, show_result
from dataset import MnistDataset
from optimizer import get_sgd_optimizer, get_adam_optimizer
from models import LinearModel, SimpleConvModel, ResnetModel, ResnetImageNetModel, UNetResnet50


# Set hypter parameters

In [4]:
# hyperparameters
training_epochs = 50
image_size = (28, 28)
batch_size = 10
patience = 3
n_classes = 10
result_dir = "../result"
data_dir = "./sample_data/MNIST_data/"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Fix seed values

In [5]:
seed_everything()

# Load dataset

In [6]:
# MNIST dataset
mnist_train = MnistDataset(root_dir=data_dir,
                          step="train",
                          transform=transforms.ToTensor())

mnist_test = MnistDataset(root_dir=data_dir,
                         step="test",
                         transform=transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./sample_data/MNIST_data/MnistDataset/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 75351898.21it/s]


Extracting ./sample_data/MNIST_data/MnistDataset/raw/train-images-idx3-ubyte.gz to ./sample_data/MNIST_data/MnistDataset/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./sample_data/MNIST_data/MnistDataset/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 37101284.48it/s]

Extracting ./sample_data/MNIST_data/MnistDataset/raw/train-labels-idx1-ubyte.gz to ./sample_data/MNIST_data/MnistDataset/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./sample_data/MNIST_data/MnistDataset/raw/t10k-images-idx3-ubyte.gz



100%|██████████| 1648877/1648877 [00:00<00:00, 22029481.61it/s]


Extracting ./sample_data/MNIST_data/MnistDataset/raw/t10k-images-idx3-ubyte.gz to ./sample_data/MNIST_data/MnistDataset/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./sample_data/MNIST_data/MnistDataset/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 4162230.45it/s]

Extracting ./sample_data/MNIST_data/MnistDataset/raw/t10k-labels-idx1-ubyte.gz to ./sample_data/MNIST_data/MnistDataset/raw






# Define dataloader

In [7]:
# dataset loader
train_dataloader = DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

test_dataloader = DataLoader(dataset=mnist_test,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

# Models for training

In [8]:
model_dicts = {
    "linear_model":LinearModel(device, image_size, n_classes),
    "simple_conv_model":SimpleConvModel(device, image_size, n_classes),
#     "resnet_model":ResnetModel(device, image_size, n_classes),
#     "resnet_imagenet_model":ResnetImageNetModel(device, image_size, n_classes),
#     "unet_resnet":UNetResnet50(device, image_size, n_classes),
}

# Optimizers for training

In [9]:
optimizer_dicts = {
    "adam":get_adam_optimizer,
    "sgd":get_sgd_optimizer,
}

# Loss functioin for training

In [10]:
loss_dicts = {
    "ce":nn.CrossEntropyLoss(),
}

# Train by models

In [11]:
scheme_results = []
for model_name, model in model_dicts.items():
    for optimizer_name, optimizer_get_fn in optimizer_dicts.items():
        optimizer = optimizer_get_fn(model)
        for loss_name, loss_fn in loss_dicts.items():
            print(f"model :{model_name}, optimizer:{optimizer_name}, loss_fn:{loss_name}")

            # initialize
            epoch_train_accuracies = []
            epoch_train_losses = []
            epoch_valid_accuracies = []
            epoch_valid_losses = []
            patience_stack = 0
            torch.cuda.empty_cache()
            for epoch in range(training_epochs):
                model.train()
                preds = []
                all_labels = []
                losses = []
                for i, data in enumerate(train_dataloader):
                    inputs, labels = data
                    inputs = inputs.to(device).to(torch.float)
                    labels = labels.to(device).to(torch.long)
                    # train
                    optimizer.zero_grad()
                    outputs = model(inputs)
                    loss = loss_fn(outputs, labels)
                    loss.backward()
                    optimizer.step()

                    preds.append(outputs.argmax(axis=1))
                    all_labels.append(labels)
                    losses.append(loss)

                train_loss = float(torch.stack(losses).mean())
                train_acc = (torch.cat(preds) == torch.cat(all_labels)).to(torch.float).mean()
                print(f"train_accuracy:{train_acc:.4f}, train_loss:{train_loss:.4f}", end=" ")
                epoch_train_accuracies.append(float(train_acc))
                epoch_train_losses.append(train_loss)

                # val by test data
                model.eval()
                preds = []
                all_labels = []
                losses = []
                torch.cuda.empty_cache()
                for i, data in enumerate(test_dataloader):

                    inputs, labels = data
                    inputs = inputs.to(device).to(torch.float)
                    labels = labels.to(device).to(torch.long)

                    outputs = model(inputs)
                    loss = loss_fn(outputs, labels)
                    preds.append(outputs.argmax(axis=1))
                    all_labels.append(labels)
                    losses.append(loss)

                valid_loss = float(torch.stack(losses).mean())
                valid_acc = (torch.cat(preds) == torch.cat(all_labels)).to(torch.float).mean()
                print(f"valid_accuracy:{valid_acc:.4f}, valid_loss:{valid_loss:.4f}")
                epoch_valid_accuracies.append(float(valid_acc))
                epoch_valid_losses.append(valid_loss)
                if len(epoch_valid_losses) == 0:
                    continue
                else:
                    if  min(epoch_valid_losses) < valid_loss:
                        patience_stack += 1
                    else:
                        # save best model
                        weight_path = os.path.join(result_dir, "weights")
                        os.makedirs(weight_path, exist_ok=True)
                        torch.save(model.state_dict(), f"{weight_path}/{model_name}_{optimizer_name}_{loss_name}.pth")
                if patience_stack >= 3:
                    break



            scheme_results.append(
                {
                    "model": model_name,
                    "optimizer": optimizer_name,
                    "loss": loss_name,
                    "epoch_train_accuracies": epoch_train_accuracies,
                    "epoch_train_losses": epoch_train_losses,
                    "epoch_valid_accuracies": epoch_valid_accuracies,
                    "epoch_valid_losses": epoch_valid_losses,
                }
            )

model :linear_model, optimizer:adam, loss_fn:ce
train_accuracy:0.8965, train_loss:0.3877 valid_accuracy:0.9189, valid_loss:0.2886
train_accuracy:0.9192, train_loss:0.2877 valid_accuracy:0.9222, valid_loss:0.2737


KeyboardInterrupt: ignored

In [None]:
# save results
with open(f"{result_dir}/result.pkl", "wb") as f:
    pickle.dump(scheme_results, f)

In [None]:
# # load results
# import pickle
# with open(f"{result_dir}/result.pkl", "rb") as f:
#     scheme_results = pickle.load(f)

In [None]:
df = result2df(scheme_results)

# Plot result - valid accuracy with schemes

In [None]:
show_result(df)