# Notebook Setting

In [1]:
%load_ext autoreload
%autoreload 2

# login
import wandb

%env WANDB_API_KEY=$cccc7c8a243b0a2734f74496bc3270f5ebf11d7c

wandb.login()

env: WANDB_API_KEY=$cccc7c8a243b0a2734f74496bc3270f5ebf11d7c


True

# Load Library

In [6]:
import sys

import sklearn.datasets
import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

import matplotlib.pyplot as plt

import os
from modules import *
from utils import *

from easydict import EasyDict as edict
import yaml


# for mixed precision
from torch.cuda.amp import GradScaler
from torch.cuda.amp import autocast


BASE_DIR = os.getcwd()

# Dataset

In [3]:
mnist = sklearn.datasets.fetch_openml('mnist_784', data_home="mnist_784")
x_train = torch.tensor(mnist.data[:60000].values, dtype=torch.float) / 255
y_train = torch.tensor([int(x) for x in mnist.target[:60000]])
x_test = torch.tensor(mnist.data[60000:].values, dtype=torch.float) / 255
y_test = torch.tensor([int(x) for x in mnist.target[60000:]])


train_dataset = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=2048)

valid_dataset = TensorDataset(x_test, y_test)
valid_loader = DataLoader(valid_dataset, batch_size=2048)

# Model

In [11]:
class MyModel(nn.Module):

    # ------------------- WRITE CODE ------------------ # 
    def __init__(self, hparams):
        super(MyModel, self).__init__()

        # -----------------------------MODEL--------------------------- #    

        self.layers = nn.Sequential(nn.Linear(784, 10))
        
        # -----------------------------Optimizer---------------------------- #
        self.optimizer = torch.optim.Adam(self.parameters(), lr = 0.001)
        self.loss_fn = nn.CrossEntropyLoss()
        self.accumulative_step = 1 # hparams.accumulative_step

        # -----------------------------Scheduler---------------------------- #
        self.scheduler = hparams.SchedulerClass(self.optimizer, **hparams.scheduler_params) # scheduler


        # -----------------------------Metric---------------------------- #

        self.loss_meters = {'loss' : AverageMeter()}


        # mixed precision

        self.scaler = GradScaler()

    def forward(self, x):
        # ------- Forward Pass ------- # 
        

        return self.layers(x)        

    def optimize(self, batch, iter):
        # -------------- Train ----------------- # 

        inputs, targets = batch

        inputs = inputs.cuda()
        targets = targets.cuda()


        with autocast():
            out = self(inputs) 
            loss = self.loss_fn(out, targets ) / self.accumulative_step


        # ----------- DO NOT EDIT ------------ # 
        self.scaler.scale(loss).backward() # mixed precision


        # gradient accumulation 
        if not (iter + 1) % self.accumulative_step: 
            #self.optimizer.step() 
            self.scaler.step(self.optimizer) # mixed precision       
            self.scaler.update()   
            self.optimizer.zero_grad()

        self.loss_log(loss, batch[0].size(0) )



    def validate(self, batch):
        # -------------- validate ----------------- # 
        inputs, targets = batch
        inputs = inputs.cuda()
        targets = targets.cuda()


        out = self(inputs) 
        loss = self.loss_fn(out, targets)   
        self.loss_log(loss, batch[0].size(0) )

    
    # --------------- DO NOT EDIT ----------------- #
    def loss_log(self, loss, batch_size, key = 'loss'):
        self.loss_meters[key].update(loss.item() * self.accumulative_step, batch_size )

    def loss_dict(self, set_name):
        return { f'{set_name.upper()}_{k}' : v.avg for k, v in self.loss_meters.items() }
    
    def meter_initialize(self):
        for k, v in self.loss_meters.items():
            v.reset()


# Fit

In [16]:
with open(f"{BASE_DIR}/config.yaml") as f:
    hparams = edict(yaml.load(f))

hparams.save_path = BASE_DIR, # model save path

hparams.project ='wavenet_test'

hparams.log_path = f'{BASE_DIR}/log.txt'


hparams.SchedulerClass = torch.optim.lr_scheduler.ReduceLROnPlateau # scheduler
hparams.scheduler_params = dict(      # scheduler params
    mode='min',        
    factor=0.2,
    patience= 4 ,
    verbose=True, 
    threshold=0.01,
    threshold_mode='abs',
    cooldown=0, 
    min_lr=1e-12,
    eps=1e-08
    )



m = MyModel(hparams).cuda()

fitter = Fitter(m, hparams)
fitter.fit(train_loader, valid_loader, False)

[Epoch 0]TRAIN loss : 1.76004 VALID loss : 1.27225 Time : 0.76362 s
[Epoch 1]TRAIN loss : 1.06685 VALID loss : 0.86234 Time : 0.77167 s
[Epoch 2]TRAIN loss : 0.79311 VALID loss : 0.68711 Time : 0.71899 s
[Epoch 3]TRAIN loss : 0.66137 VALID loss : 0.59164 Time : 0.77499 s
[Epoch 4]TRAIN loss : 0.58331 VALID loss : 0.53090 Time : 0.85610 s
[Epoch 5]TRAIN loss : 0.53104 VALID loss : 0.48858 Time : 0.71997 s
[Epoch 6]TRAIN loss : 0.49333 VALID loss : 0.45728 Time : 0.76300 s
[Epoch 7]TRAIN loss : 0.46469 VALID loss : 0.43315 Time : 0.76297 s
[Epoch 8]TRAIN loss : 0.44212 VALID loss : 0.41394 Time : 0.84607 s
[Epoch 9]TRAIN loss : 0.42382 VALID loss : 0.39826 Time : 0.88597 s
[Epoch 10]TRAIN loss : 0.40865 VALID loss : 0.38522 Time : 0.95927 s
[Epoch 11]TRAIN loss : 0.39583 VALID loss : 0.37420 Time : 0.73465 s
[Epoch 12]TRAIN loss : 0.38485 VALID loss : 0.36475 Time : 0.74810 s
[Epoch 13]TRAIN loss : 0.37531 VALID loss : 0.35656 Time : 0.73594 s
[Epoch 14]TRAIN loss : 0.36695 VALID loss : 