## Installation

In [None]:
pip install wandb numpy pandas matplotlib torch torchvision

## Dataset

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#!unzip /content/drive/MyDrive/DL_Assignment2/Dataset/nature_12K.zip -d /content/drive/MyDrive/DL_Assignment2/Dataset

In [20]:
import os
os.chdir("/content/drive/MyDrive/DL_Assignment2/partB")

## Libraries

In [34]:
%%writefile libraries.py
import torch
import os
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from torch.utils.data import Subset, DataLoader
import numpy as np
import torch.nn as nn
import torch.optim as optim
import wandb
from torchvision.models import efficientnet_v2_s, EfficientNet_V2_S_Weights
import gc
from PIL import Image

Writing libraries.py


## Dataset loader

In [35]:
%%writefile data_loader.py
import torch
from torch.utils.data import Subset, DataLoader
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
import os
from torchvision.models import efficientnet_v2_s, EfficientNet_V2_S_Weights
from PIL import Image
import numpy as np

def validationDataSplit(train_dataset):
  classLabels = [label for _,label in train_dataset.samples]
  num_classes = len(np.unique(classLabels))

  sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
  train_indices, val_indices = next(sss.split(train_dataset.samples, classLabels))

  train_subset = Subset(train_dataset, train_indices)
  val_subset = Subset(train_dataset, val_indices)
  return train_subset, val_subset, num_classes


def load_data(base_dir, isDataAug, batch_size):
  train_dir = os.path.join(base_dir, 'train')
  test_dir = os.path.join(base_dir, 'val')

  train_transform, test_transform = None, None

  # model_transforms = EfficientNet_V2_S_Weights.IMAGENET1K_V1.transforms()
  # print(model_transforms)

  if isDataAug == False:
    train_transform = transforms.Compose([
      transforms.Resize(384, interpolation=Image.BILINEAR),
      transforms.CenterCrop(384),
      transforms.ToTensor(),
      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
  else:
    train_transform = transforms.Compose([
      transforms.Resize(384, interpolation=Image.BILINEAR),
      transforms.CenterCrop(384),
      transforms.RandomHorizontalFlip(),
      transforms.RandomRotation(10),
      transforms.ToTensor(),
      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

  test_transform = transforms.Compose([
      transforms.Resize(384, interpolation=Image.BILINEAR),
      transforms.CenterCrop(384),
      transforms.ToTensor(),
      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

  train_dataset = datasets.ImageFolder(train_dir, transform=train_transform)
  test_dataset = datasets.ImageFolder(test_dir, transform=test_transform)
  train_dataset, val_dataset, num_classes = validationDataSplit(train_dataset)

  # print(f"inp: {train_dataset[0][0].shape} {train_dataset[0][1]}")

  train_loader = DataLoader(train_dataset,shuffle=True,num_workers=2,batch_size=batch_size,pin_memory=True)
  test_loader = DataLoader(test_dataset,shuffle=True,num_workers=2,batch_size=64,pin_memory=True)
  val_loader = DataLoader(val_dataset,shuffle=True,num_workers=2,batch_size=64,pin_memory=True)

  return train_loader, test_loader, val_loader, num_classes

Overwriting data_loader.py


## Training CNN

In [36]:
%%writefile neural_network.py
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch
from torchvision.models import efficientnet_v2_s, EfficientNet_V2_S_Weights

class ConvolutionalNeuralNetwork(nn.Module):
  activationFunctionsMap = {"ReLU": nn.ReLU, "GELU": nn.GELU, "SiLU": nn.SiLU}
  # optimizersMap = {"sgd": optim.SGD, "rmsprop": optim.RMSprop, "adam": optim.Adam}

  def __init__(self, num_classes,
               num_filters, filter_sizes,
               activationFun, optimizer,
               n_neurons_denseLayer,
               isBatchNormalization, dropout,
               learning_rate=0.001,
               momentum=0.5, beta = 0.9,
               beta1=0.9, beta2=0.99,
               epsilon=1e-8, weight_decay=0.0001):
    super(ConvolutionalNeuralNetwork, self).__init__()
    self.num_classes = num_classes
    self.num_filters = num_filters
    self.filter_sizes = filter_sizes
    self.activationFun = ConvolutionalNeuralNetwork.activationFunctionsMap[activationFun]
    # self.optimizer = ConvolutionalNeuralNetwork.optimizersMap[optimizer]

    self.n_neurons_denseLayer = n_neurons_denseLayer
    self.isBatchNormalization = isBatchNormalization
    self.dropout = dropout

    self.lr = learning_rate
    self.momentum = momentum
    self.betas = (beta1, beta2)
    self.eps = epsilon
    self.alpha = beta
    self.weight_decay = weight_decay
    # self.count = 0
    # self.cached_inputs = None    # dataloader shuffling within batches to cache (else needs to make shuffle = False)

    self.defineModel()

    trainable_parameters = [p for p in self.parameters() if p.requires_grad == True]

    if(optimizer == "sgd"):
      self.optimizer = optim.SGD(trainable_parameters, lr=self.lr, momentum=self.momentum, weight_decay=self.weight_decay)
    elif(optimizer == "rmsprop"):
      self.optimizer = optim.RMSprop(trainable_parameters, lr=self.lr, alpha=self.alpha, eps=self.eps, weight_decay=self.weight_decay)
    elif(optimizer == "adam"):
      self.optimizer = optim.Adam(trainable_parameters, lr=self.lr, betas=self.betas, eps=self.eps, weight_decay=self.weight_decay)
    # print(trainable_parameters)

  def defineModel(self):
    self.model = efficientnet_v2_s(weights="IMAGENET1K_V1")

    # Freezing all layers except the last layer
    for param in self.model.parameters():
        param.requires_grad = False

    # Replacing last layer (its a classifier at the end containing dropout followed by linear layer)
    num_in_features_last_layer = self.model.classifier[1].in_features
    self.model.classifier[1] = nn.Linear(num_in_features_last_layer, self.num_classes)

    # last layer is trainable
    for name, param in self.model.classifier[1].named_parameters():
        param.requires_grad = True

  def forward(self, inputs):
    return self.model(inputs)

  def backward(self, outputs, labels):
    loss = nn.CrossEntropyLoss()(outputs, labels)
    loss.backward()

  def updateWeights(self):
    self.optimizer.step()

Overwriting neural_network.py


## Accuracy Calculation

In [37]:
%%writefile accuracy_calculation.py
import torch
import torch.nn as nn
from torch.utils.data import Subset, DataLoader


def findOutputs(device, cnn, inputDataLoader, isTestData=False):
  cnn.eval()  # setting the model to evaluation model
  outputs = []
  total_loss = 0.0
  n_correct = 0
  n_correct_top5 = 0
  n_correct_top2 = 0
  n_samples = 0

  with torch.no_grad():
    for batch_idx, (x_batch, y_batch) in enumerate(inputDataLoader):
      x_batch, y_batch = x_batch.to(device), y_batch.to(device)
      batch_outputs = cnn(x_batch)

      loss = nn.CrossEntropyLoss()(batch_outputs, y_batch)
      total_loss += loss.item() * x_batch.size(0)

      y_pred_batch = torch.argmax(batch_outputs, dim=1)
      n_correct += (y_pred_batch == y_batch).sum().item()
      n_samples += x_batch.size(0)

      if isTestData == True:
          y_pred_batch_top5 = torch.topk(batch_outputs, 5, dim=1).indices
          n_correct_top5 += y_pred_batch_top5.eq(y_batch.view(-1, 1)).sum().item()

          y_pred_batch_top2 = torch.topk(batch_outputs, 2, dim=1).indices
          n_correct_top2 += y_pred_batch_top2.eq(y_batch.view(-1, 1)).sum().item()
      outputs.append(batch_outputs)

  outputs = torch.cat(outputs)
  accuracy = (n_correct * 100.0) / n_samples
  avg_loss = total_loss / n_samples

  top5_accuracy = None
  top2_accuracy = None
  if isTestData == True:
      top5_accuracy = (n_correct_top5 * 100.0) / n_samples
      top2_accuracy = (n_correct_top2 * 100.0) / n_samples
  return outputs, accuracy, avg_loss, top5_accuracy, top2_accuracy

Overwriting accuracy_calculation.py


## Training (ArgParser included)

In [40]:
%%writefile train_local.py
import os
import gc
import wandb
import torch
from neural_network import *
from data_loader import *
from accuracy_calculation import *
import numpy as np
import torch.nn as nn
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

def trainNeuralNetwork_local(args):
  wandb.login()
  wandb.init(mode="online")
  wandb.init(project=args.wandb_project, entity=args.wandb_entity)
  if args.isDataAug == "True":
    isDataAug = True
  else:
    isDataAug = False

  train_loader, test_loader, val_loader, num_classes = load_data(args.base_dir, isDataAug, args.batch_size)
  activationFun = "SiLU"    # have no effect since using pre-trained model
  optimizer = args.optimizer
  learning_rate = args.learning_rate
  momentum = args.momentum
  beta = args.beta
  beta1 = args.beta1
  beta2 = args.beta2
  epsilon = args.epsilon
  weight_decay = args.weight_decay
  dropout = 0   # have no effect since using pre-trained model
  num_filters = [32, 32,32,32,32]   # have no effect since using pre-trained model
  filter_sizes = [3,3,3,3,3]   # have no effect since using pre-trained model
  n_neurons_denseLayer = 128  # have no effect since using pre-trained model
  isBatchNormalization = False  # have no effect since using pre-trained model

  wandb.run.name = f"{activationFun}_{optimizer}_{dropout}_{n_neurons_denseLayer}_DataAug-{isDataAug}_BatchNorm-{isBatchNormalization}"
  best_val_accuracy = 0.0
  best_accuracy_epoch = -1

  cnn = ConvolutionalNeuralNetwork(num_classes,
                                   num_filters, filter_sizes,
                                   activationFun, optimizer,
                                   n_neurons_denseLayer,
                                   isBatchNormalization, dropout,
                                   learning_rate,
                                   momentum, beta,
                                   beta1, beta2,
                                   epsilon, weight_decay)
  cnn.to(device)

  epochs = args.epochs
  for epochNum in range(epochs):
    print(f"Epoch {epochNum}:")
    for batch_idx, (x_batch, y_batch) in enumerate(train_loader):
      if(batch_idx % 40 == 0):
        print(f"Batch idx {batch_idx} running")
        # break
      x_batch, y_batch = x_batch.to(device), y_batch.to(device)
      cnn.optimizer.zero_grad()
      outputs = cnn(x_batch)
      cnn.backward(outputs, y_batch)
      cnn.updateWeights()
      del x_batch, y_batch, outputs

    # Validation accuracy
    val_outputs, val_accuracy, val_loss, _, _ = findOutputs(device, cnn, val_loader)
    print(f"validation: loss={val_loss}, accuracy={val_accuracy}")

    # Train accuracy
    train_outputs, train_accuracy, train_loss, _, _ = findOutputs(device, cnn, train_loader)
    print(f"training: loss={train_loss}, accuracy={train_accuracy}")

    if val_accuracy > best_val_accuracy:
      best_val_accuracy = val_accuracy
      best_accuracy_epoch = epochNum

    wandb.log({
        "epoch": epochNum + 1,
        "val_loss": val_loss,
        "val_accuracy": val_accuracy,
        "train_loss": train_loss,
        "train_accuracy": train_accuracy
        },commit=True)
    del val_outputs, train_outputs
    gc.collect()
    torch.cuda.empty_cache()

  wandb.log({
      "best_acc_epoch": best_accuracy_epoch,
      "best_val_accuracy": best_val_accuracy
  })

  test_outputs, test_accuracy, test_loss, test_top5_accuracy, test_top2_accuracy = findOutputs(device, cnn, test_loader, True)
  print(f"testing: loss={test_loss}, top1_accuracy={test_accuracy}, top5_accuracy = {test_top5_accuracy}, top2_accuracy = {test_top2_accuracy}")

  wandb.log({
      "test_loss": test_loss,
      "test_top1_accuracy": test_accuracy,
      "test_top5_accuracy": test_top5_accuracy,
      "test_top2_accuracy": test_top2_accuracy
  })
  del cnn,train_loader, test_loader, val_loader
  gc.collect()
  torch.cuda.empty_cache()

  wandb.finish()

Overwriting train_local.py


### ArgParser

In [41]:
%%writefile argument_parser.py
import argparse

def parse_arguments():
    parser = argparse.ArgumentParser()

    parser.add_argument("-wp", "--wandb_project", type=str, default="DA6401_Assignment2",
                        help="Project name used to track experiments in Weights & Biases dashboard")
    parser.add_argument("-we", "--wandb_entity", type=str, default="nikhithaa-iit-madras",
                        help="Wandb Entity used to track experiments in the Weights & Biases dashboard.")
    parser.add_argument("-bd", "--base_dir", type=str, default="inaturalist_12K",
                        help="Base directory where dataset (train/val folders) are present")
    parser.add_argument("-e", "--epochs", type=int, default=10,
                        help="Number of epochs to train neural network")
    parser.add_argument("-b", "--batch_size", type=int, default=32,
                        help="Batch size used to train neural network")
    parser.add_argument("-o", "--optimizer", type=str, choices=["sgd", "rmsprop", "adam"], default="sgd",
                        help="Choose one among these optimizers: ['sgd', 'rmsprop', 'adam']")
    parser.add_argument("-lr", "--learning_rate", type=float, default=0.001,
                        help="Learning rate used to optimize model parameters")
    parser.add_argument("-m", "--momentum", type=float, default=0.9,
                        help="Momentum used by momentum and nag optimizers")
    parser.add_argument("-beta", "--beta", type=float, default=0.9,
                        help="Beta used by rmsprop optimizer")
    parser.add_argument("-beta1", "--beta1", type=float, default=0.9,
                        help="Beta1 used by adam and nadam optimizers")
    parser.add_argument("-beta2", "--beta2", type=float, default=0.999,
                        help="Beta2 used by adam and nadam optimizers")
    parser.add_argument("-eps", "--epsilon", type=float, default=0.00000001,
                        help="Epsilon used by optimizers")
    parser.add_argument("-w_d", "--weight_decay", type=float, default=0.0001,
                        help="Weight decay used by optimizers")
    parser.add_argument("-da", "--isDataAug", type=str, default="False",
                        help="Whether to use data augmentation or not")

    return parser.parse_args()

Overwriting argument_parser.py


### Main File

In [42]:
%%writefile main.py
from train_local import *
from argument_parser import *
import libraries

if __name__=="__main__":
  args = parse_arguments()
  trainNeuralNetwork_local(args)

Overwriting main.py


### Running

In [None]:
# change epochs
!python3 main.py -wp DA6401_Assignment2 -we nikhithaa-iit-madras -b 32 -beta1 0.9 -beta2 0.999 -lr 0.001 -e 1 --base_dir ../Dataset/inaturalist_12K -o sgd -w_d 0  -da True

## Training sweep

In [43]:
%%writefile train_sweep.py
# import libraries
from neural_network import *
from data_loader import *
from accuracy_calculation import *
import numpy as np
import torch.nn as nn
import wandb
import gc
import os
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

def trainNeuralNetwork_sweep():
  wandb.init(mode="online")
  args = wandb.config
  train_loader, test_loader, val_loader, num_classes = load_data(args["base_dir"], args["isDataAug"], args["batch_size"])
  activationFun = args["activation"]
  optimizer = args["optimizer"]
  learning_rate = args["learning_rate"]
  momentum = args["momentum"]
  beta = args["beta"]
  beta1 = args["beta1"]
  beta2 = args["beta2"]
  epsilon = args["epsilon"]
  weight_decay = args["weight_decay"]
  dropout = args["dropout"]
  num_filters = args["num_filters"]
  filter_sizes = args["filter_sizes"]
  n_neurons_denseLayer = args["n_neurons_denseLayer"]
  isBatchNormalization = args["isBatchNormalization"]
  isDataAug = args["isDataAug"]

  wandb.run.name = f"{activationFun}_{optimizer}_{dropout}_{n_neurons_denseLayer}_DataAug-{isDataAug}_BatchNorm-{isBatchNormalization}"
  best_val_accuracy = 0.0
  best_accuracy_epoch = -1

  cnn = ConvolutionalNeuralNetwork(num_classes,
                                   num_filters, filter_sizes,
                                   activationFun, optimizer,
                                   n_neurons_denseLayer,
                                   isBatchNormalization, dropout,
                                   learning_rate,
                                   momentum, beta,
                                   beta1, beta2,
                                   epsilon, weight_decay)
  cnn.to(device)

  epochs = args["epochs"]
  for epochNum in range(epochs):
    print(f"Epoch {epochNum}:")
    for batch_idx, (x_batch, y_batch) in enumerate(train_loader):
      if(batch_idx % 40 == 0):
        print(f"Batch idx {batch_idx} running")
      x_batch, y_batch = x_batch.to(device), y_batch.to(device)
      cnn.optimizer.zero_grad()
      outputs = cnn(x_batch)
      cnn.backward(outputs, y_batch)
      cnn.updateWeights()
      del x_batch, y_batch, outputs

    # Validation accuracy
    val_outputs, val_accuracy, val_loss, _, _ = findOutputs(device, cnn, val_loader)
    # wandb.run.summary["metric_name"] = val_accuracy
    print(f"validation: loss={val_loss}, accuracy={val_accuracy}")

    # Train accuracy
    train_outputs, train_accuracy, train_loss, _, _ = findOutputs(device, cnn, train_loader)
    print(f"training: loss={train_loss}, accuracy={train_accuracy}")

    if val_accuracy > best_val_accuracy:
      best_val_accuracy = val_accuracy
      best_accuracy_epoch = epochNum

    wandb.log({
        "epoch": epochNum + 1,
        "val_loss": val_loss,
        "val_accuracy": val_accuracy,
        "train_loss": train_loss,
        "train_accuracy": train_accuracy
        },commit=True)
    del val_outputs, train_outputs
    gc.collect()
    torch.cuda.empty_cache()

  wandb.log({
      "best_acc_epoch": best_accuracy_epoch,
      "best_val_accuracy": best_val_accuracy
  })

  test_outputs, test_accuracy, test_loss, test_top5_accuracy, test_top2_accuracy = findOutputs(device, cnn, test_loader, True)
  print(f"testing: loss={test_loss}, top1_accuracy={test_accuracy}, top5_accuracy = {test_top5_accuracy}, top2_accuracy = {test_top2_accuracy}")

  wandb.log({
      "test_loss": test_loss,
      "test_top1_accuracy": test_accuracy,
      "test_top5_accuracy": test_top5_accuracy,
      "test_top2_accuracy": test_top2_accuracy
  })
  del cnn,train_loader, test_loader, val_loader
  gc.collect()
  torch.cuda.empty_cache()

  wandb.finish()

Overwriting train_sweep.py


###  Main Sweep

In [44]:
%%writefile main_sweep.py
from train_sweep import *
# import libraries
import wandb

sweep_configuration = {
    "method": "random",
    "name" : "finetune_final_sweep",
    "parameters": {
        "num_filters": {'values': [[32, 32, 32, 32, 32]]},  # have no effect since using pre-trained model
        "filter_sizes": {'values': [[3, 3, 3, 3, 3]]},        # have no effect since using pre-trained model
        "activation": {"values": ["ReLU"]},   # have no effect since using pre-trained model
        "optimizer": {"values": ["adam", "rmsprop", "sgd"]},
        "learning_rate": {"values": [1e-3, 1e-4]},
        "weight_decay": {"values": [0.0001, 0.0005]},
        "momentum": {"values": [0.9]},
        "beta": {"values": [0.9]},
        "beta1": {"values":[0.9]},
        "beta2": {"values": [0.999]},
        "epsilon": {"values": [1e-8]},
        # "base_dir": {"values":["/content/drive/MyDrive/DL_Assignment2/Dataset/inaturalist_12K/"]},
        "base_dir": {"values": ["../Dataset/inaturalist_12K"]},
        "isDataAug": {"values": ["False", "True"]},
        "isBatchNormalization": {"values": ["False"]},  # have no effect since using pre-trained model
        "dropout": {"values": [0]},            # have no effect since using pre-trained model
        "n_neurons_denseLayer": {"values": [128]},   # have no effect since using pre-trained model
        "batch_size": {"values": [32,64]},
        "epochs": {"values": [5,10]}
    }
}

if __name__=="__main__":
  wandb.login()
  wandb_id = wandb.sweep(sweep_configuration, project="DA6401_Assignment2")
  wandb.agent(wandb_id, function=trainNeuralNetwork_sweep)

Overwriting main_sweep.py


### Running

In [None]:
!python3 main_sweep.py

In [None]:
# wandb.login(key="x")
# wandb_id = wandb.sweep(sweep_configuration, project="DA6401_Assignment2")
# wandb.agent(wandb_id, function=trainNeuralNetwork_sweep)

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mnikhithaa[0m ([33mnikhithaa-iit-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Create sweep with ID: 0kqsutes
Sweep URL: https://wandb.ai/nikhithaa-iit-madras/DA6401_Assignment2/sweeps/0kqsutes


[34m[1mwandb[0m: Agent Starting Run: 3ypawy7j with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	base_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	filter_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	isBatchNormalization: False
[34m[1mwandb[0m: 	isDataAug: False
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	n_neurons_denseLayer: 128
[34m[1mwandb[0m: 	num_filters: [32, 32, 32, 32, 32]
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Downloading: "https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_v2_s-dd5fe13b.pth
100%|██████████| 82.7M/82.7M [00:00<00:00, 178MB/s] 


Epoch 0:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=1.6043938837051392, accuracy=74.85
training: loss=1.6057432772353495, accuracy=74.34679334916865
Epoch 1:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=1.1927816009521484, accuracy=79.25
training: loss=1.2003781593714764, accuracy=78.08476059507439
Epoch 2:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=0.9676698985099792, accuracy=81.05
training: loss=0.9807530491631364, accuracy=79.73496687085886
Epoch 3:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=0.834

0,1
best_acc_epoch,▁
best_val_accuracy,▁
epoch,▁▂▃▃▄▅▆▆▇█
test_loss,▁
test_top1_accuracy,▁
test_top2_accuracy,▁
test_top5_accuracy,▁
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▄▆▆▇▇▇███

0,1
best_acc_epoch,8.0
best_val_accuracy,84.15
epoch,10.0
test_loss,0.5996
test_top1_accuracy,83.3
test_top2_accuracy,92.85
test_top5_accuracy,98.45
train_accuracy,83.56045
train_loss,0.58364
val_accuracy,83.9


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vzleiau3 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	base_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	filter_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	isBatchNormalization: False
[34m[1mwandb[0m: 	isDataAug: False
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	n_neurons_denseLayer: 128
[34m[1mwandb[0m: 	num_filters: [32, 32, 32, 32, 32]
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 0:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=1.785241639137268, accuracy=69.7
training: loss=1.7862850385302975, accuracy=69.63370421302663
Epoch 1:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=1.390060715675354, accuracy=77.25
training: loss=1.3956595001138439, accuracy=75.97199649956245
Epoch 2:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=1.118051746368408, accuracy=79.2
training: loss=1.1313009003517494, accuracy=78.07225903237905
Epoch 3:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=0.9451105127334595, accuracy=81.3
training: loss=0.952979837049319, accuracy=79.87248406050756
Epoch 4:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=0.8197017173767089, accuracy=82.4
training: loss=0.8409446288845154, 

0,1
best_acc_epoch,▁
best_val_accuracy,▁
epoch,▁▂▃▃▄▅▆▆▇█
test_loss,▁
test_top1_accuracy,▁
test_top2_accuracy,▁
test_top5_accuracy,▁
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▆▄▃▂▂▂▁▁▁
val_accuracy,▁▅▆▇▇█████

0,1
best_acc_epoch,8.0
best_val_accuracy,83.55
epoch,10.0
test_loss,0.61901
test_top1_accuracy,82.7
test_top2_accuracy,92.65
test_top5_accuracy,98.4
train_accuracy,83.11039
train_loss,0.61289
val_accuracy,83.4


[34m[1mwandb[0m: Agent Starting Run: fkfpbwwy with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	base_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	filter_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	isBatchNormalization: False
[34m[1mwandb[0m: 	isDataAug: True
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	n_neurons_denseLayer: 128
[34m[1mwandb[0m: 	num_filters: [32, 32, 32, 32, 32]
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0001


Epoch 0:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=0.608590871334076, accuracy=83.5
training: loss=0.6096150889920061, accuracy=82.98537317164646
Epoch 1:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=0.5072252478599548, accuracy=84.65
training: loss=0.48909813187646634, accuracy=84.69808726090761
Epoch 2:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=0.469897602558136, accuracy=84.85
training: loss=0.4412837703927098, accuracy=86.08576072009001
Epoch 3:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=0.46009

0,1
best_acc_epoch,▁
best_val_accuracy,▁
epoch,▁▃▅▆█
test_loss,▁
test_top1_accuracy,▁
test_top2_accuracy,▁
test_top5_accuracy,▁
train_accuracy,▁▄▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▄▅█▇

0,1
best_acc_epoch,3.0
best_val_accuracy,86.0
epoch,5.0
test_loss,0.46809
test_top1_accuracy,84.9
test_top2_accuracy,93.25
test_top5_accuracy,98.65
train_accuracy,87.63595
train_loss,0.39071
val_accuracy,85.5


[34m[1mwandb[0m: Agent Starting Run: guv0ufvg with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	base_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	filter_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	isBatchNormalization: False
[34m[1mwandb[0m: 	isDataAug: True
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	n_neurons_denseLayer: 128
[34m[1mwandb[0m: 	num_filters: [32, 32, 32, 32, 32]
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 0:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=1.5743275833129884, accuracy=76.45
training: loss=1.5851237391036574, accuracy=74.95936992124015
Epoch 1:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=1.104970362663269, accuracy=80.2
training: loss=1.1175462271455139, accuracy=78.67233404175522
Epoch 2:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=0.8715408978462219, accuracy=81.65
training: loss=0.8871059924800003, accuracy=79.97249656207026
Epoch 3:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=0.73912

0,1
best_acc_epoch,▁
best_val_accuracy,▁
epoch,▁▂▃▃▄▅▆▆▇█
test_loss,▁
test_top1_accuracy,▁
test_top2_accuracy,▁
test_top5_accuracy,▁
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▅▃▃▂▂▁▁▁▁
val_accuracy,▁▄▅▆▆▇▇███

0,1
best_acc_epoch,9.0
best_val_accuracy,85.45
epoch,10.0
test_loss,0.54861
test_top1_accuracy,84.05
test_top2_accuracy,93.45
test_top5_accuracy,98.55
train_accuracy,84.36055
train_loss,0.53538
val_accuracy,85.45


[34m[1mwandb[0m: Agent Starting Run: svox8ltp with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	base_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	filter_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	isBatchNormalization: False
[34m[1mwandb[0m: 	isDataAug: False
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	n_neurons_denseLayer: 128
[34m[1mwandb[0m: 	num_filters: [32, 32, 32, 32, 32]
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 0:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=0.6847695045471192, accuracy=82.75
training: loss=0.6989457887967746, accuracy=81.76022002750344
Epoch 1:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=0.5538216733932495, accuracy=84.75
training: loss=0.5541230207280019, accuracy=84.1855231903988
Epoch 2:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=0.5159271960258484, accuracy=85.25
training: loss=0.49595530647265196, accuracy=85.34816852106513
Epoch 3:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=0.48647997617721556, accuracy=86.25
training: loss=0.46086648019019744, accuracy=86.2482810351294
Epoch 4:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=0.4677537024021149, accuracy=86.25
training: loss=0.427920304

0,1
best_acc_epoch,▁
best_val_accuracy,▁
epoch,▁▃▅▆█
test_loss,▁
test_top1_accuracy,▁
test_top2_accuracy,▁
test_top5_accuracy,▁
train_accuracy,▁▄▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▆██

0,1
best_acc_epoch,3.0
best_val_accuracy,86.25
epoch,5.0
test_loss,0.49276
test_top1_accuracy,84.7
test_top2_accuracy,93.15
test_top5_accuracy,98.3
train_accuracy,86.96087
train_loss,0.42792
val_accuracy,86.25


[34m[1mwandb[0m: Agent Starting Run: 1ixruf8p with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	base_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	filter_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	isBatchNormalization: False
[34m[1mwandb[0m: 	isDataAug: False
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	n_neurons_denseLayer: 128
[34m[1mwandb[0m: 	num_filters: [32, 32, 32, 32, 32]
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 0:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=1.7918304290771485, accuracy=70.8
training: loss=1.7955336810082312, accuracy=70.2712839104888
Epoch 1:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=1.3972978687286377, accuracy=77.75
training: loss=1.401219938111046, accuracy=76.75959494936868
Epoch 2:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=1.127318899154663, accuracy=79.75
training: loss=1.1333838402442535, accuracy=78.50981372671583
Epoch 3:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=0.9456427307128906, accuracy=81.8
training: loss=0.9577252175647298, accuracy=79.48493561695211
Epoch 4:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=0.8322111139297486, accuracy=82.25
training: loss=0.846189933630030

0,1
best_acc_epoch,▁
best_val_accuracy,▁
epoch,▁▂▃▃▄▅▆▆▇█
test_loss,▁
test_top1_accuracy,▁
test_top2_accuracy,▁
test_top5_accuracy,▁
train_accuracy,▁▅▆▆▇▇▇███
train_loss,█▆▄▃▂▂▂▁▁▁
val_accuracy,▁▅▆▇▇▇▇▇██

0,1
best_acc_epoch,8.0
best_val_accuracy,84.15
epoch,10.0
test_loss,0.62112
test_top1_accuracy,83.35
test_top2_accuracy,92.95
test_top5_accuracy,98.35
train_accuracy,83.08539
train_loss,0.62147
val_accuracy,83.6


[34m[1mwandb[0m: Agent Starting Run: 1nzahybw with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	base_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	filter_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	isBatchNormalization: False
[34m[1mwandb[0m: 	isDataAug: False
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	n_neurons_denseLayer: 128
[34m[1mwandb[0m: 	num_filters: [32, 32, 32, 32, 32]
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0001


Epoch 0:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=1.1628892812728882, accuracy=79.35
training: loss=1.1754694458305397, accuracy=77.59719964995624
Epoch 1:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=0.858222978591919, accuracy=81.05
training: loss=0.8646191138076043, accuracy=80.19752469058632
Epoch 2:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=0.7341872272491455, accuracy=82.9
training: loss=0.7457505651288725, accuracy=81.11013876734592
Epoch 3:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=0.66949

0,1
best_acc_epoch,▁
best_val_accuracy,▁
epoch,▁▂▃▃▄▅▆▆▇█
test_loss,▁
test_top1_accuracy,▁
test_top2_accuracy,▁
test_top5_accuracy,▁
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▅▃▃▂▂▂▁▁▁
val_accuracy,▁▃▅▆▆▆▇▆██

0,1
best_acc_epoch,8.0
best_val_accuracy,85.1
epoch,10.0
test_loss,0.55314
test_top1_accuracy,84.15
test_top2_accuracy,93.0
test_top5_accuracy,98.4
train_accuracy,84.47306
train_loss,0.53668
val_accuracy,84.75


[34m[1mwandb[0m: Agent Starting Run: 43k2kgau with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	base_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	filter_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	isBatchNormalization: False
[34m[1mwandb[0m: 	isDataAug: False
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	n_neurons_denseLayer: 128
[34m[1mwandb[0m: 	num_filters: [32, 32, 32, 32, 32]
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0001


Epoch 0:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=2.1867139625549314, accuracy=30.7
training: loss=2.1935670194603203, accuracy=29.303662957869733
Epoch 1:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=2.082211229324341, accuracy=47.05
training: loss=2.087817752163087, accuracy=45.918239779972495
Epoch 2:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=1.98499351978302, accuracy=58.45
training: loss=1.9897647989080882, accuracy=56.669583697962246
Epoch 3:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=1.8936956272125245, accuracy=66.85
training: loss=1.8984963759316074, accuracy=64.2705338167271
Epoch 4:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=1.8065580005645752, accuracy=71.1
training: loss=1.81461936219779

0,1
best_acc_epoch,▁
best_val_accuracy,▁
epoch,▁▂▃▃▄▅▆▆▇█
test_loss,▁
test_top1_accuracy,▁
test_top2_accuracy,▁
test_top5_accuracy,▁
train_accuracy,▁▃▅▆▇▇████
train_loss,█▇▆▅▄▃▃▂▂▁
val_accuracy,▁▃▅▆▇▇████

0,1
best_acc_epoch,9.0
best_val_accuracy,77.65
epoch,10.0
test_loss,1.48886
test_top1_accuracy,77.2
test_top2_accuracy,89.4
test_top5_accuracy,97.1
train_accuracy,76.29704
train_loss,1.48515
val_accuracy,77.65


[34m[1mwandb[0m: Agent Starting Run: w4t1eww7 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	base_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	filter_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	isBatchNormalization: False
[34m[1mwandb[0m: 	isDataAug: False
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	n_neurons_denseLayer: 128
[34m[1mwandb[0m: 	num_filters: [32, 32, 32, 32, 32]
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0001


Epoch 0:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=1.5575687618255616, accuracy=76.65
training: loss=1.570316854216543, accuracy=74.65933241655208
Epoch 1:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=1.0963958377838134, accuracy=80.4
training: loss=1.1103041277839059, accuracy=78.68483560445056
Epoch 2:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=0.8596054072380066, accuracy=82.55
training: loss=0.8796773183895716, accuracy=80.41005125640704
Epoch 3:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=0.73708

0,1
best_acc_epoch,▁
best_val_accuracy,▁
epoch,▁▂▃▃▄▅▆▆▇█
test_loss,▁
test_top1_accuracy,▁
test_top2_accuracy,▁
test_top5_accuracy,▁
train_accuracy,▁▄▅▆▆▇▇███
train_loss,█▅▃▂▂▂▁▁▁▁
val_accuracy,▁▄▆▅▅▇█▇▇█

0,1
best_acc_epoch,9.0
best_val_accuracy,85.3
epoch,10.0
test_loss,0.55013
test_top1_accuracy,84.3
test_top2_accuracy,93.0
test_top5_accuracy,98.4
train_accuracy,83.973
train_loss,0.53809
val_accuracy,85.3


[34m[1mwandb[0m: Agent Starting Run: olzzviw9 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	base_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	filter_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	isBatchNormalization: False
[34m[1mwandb[0m: 	isDataAug: False
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	n_neurons_denseLayer: 128
[34m[1mwandb[0m: 	num_filters: [32, 32, 32, 32, 32]
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0001


Epoch 0:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=0.6063073015213013, accuracy=83.9
training: loss=0.5972729188931825, accuracy=82.96037004625578
Epoch 1:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=0.5047566039562226, accuracy=85.0
training: loss=0.4794433739039582, accuracy=85.41067633454182
Epoch 2:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=0.4626409571170807, accuracy=86.2
training: loss=0.435751321353321, accuracy=86.4233029128641
Epoch 3:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=0.47236743

0,1
best_acc_epoch,▁
best_val_accuracy,▁
epoch,▁▂▃▃▄▅▆▆▇█
test_loss,▁
test_top1_accuracy,▁
test_top2_accuracy,▁
test_top5_accuracy,▁
train_accuracy,▁▄▅▄▆▆▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▄▇▄▆▇█▇▇▇

0,1
best_acc_epoch,6.0
best_val_accuracy,86.8
epoch,10.0
test_loss,0.46964
test_top1_accuracy,85.35
test_top2_accuracy,93.05
test_top5_accuracy,98.5
train_accuracy,89.79872
train_loss,0.32411
val_accuracy,86.4


[34m[1mwandb[0m: Agent Starting Run: 9sjpntza with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	base_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	filter_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	isBatchNormalization: False
[34m[1mwandb[0m: 	isDataAug: True
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	n_neurons_denseLayer: 128
[34m[1mwandb[0m: 	num_filters: [32, 32, 32, 32, 32]
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0001


Epoch 0:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=1.7805431356430055, accuracy=72.05
training: loss=1.786133164107047, accuracy=70.45880735091886
Epoch 1:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=1.418567190170288, accuracy=78.5
training: loss=1.4326292221241257, accuracy=76.17202150268784
Epoch 2:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=1.1863887128829955, accuracy=78.75
training: loss=1.1930637884056559, accuracy=78.23477934741842
Epoch 3:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=1.0271607236862184, accuracy=80.15
training: loss=1.0278674842626068, accuracy=79.34741842730341
Epoch 4:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=0.903118100643158, accuracy=81.9
training: loss=0.920123505851658

0,1
best_acc_epoch,▁
best_val_accuracy,▁
epoch,▁▂▃▃▄▅▆▆▇█
test_loss,▁
test_top1_accuracy,▁
test_top2_accuracy,▁
test_top5_accuracy,▁
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▆▄▃▃▂▂▁▁▁
val_accuracy,▁▅▅▆▇▇▇███

0,1
best_acc_epoch,7.0
best_val_accuracy,83.45
epoch,10.0
test_loss,0.66859
test_top1_accuracy,83.05
test_top2_accuracy,92.75
test_top5_accuracy,98.35
train_accuracy,82.78535
train_loss,0.67083
val_accuracy,82.75


[34m[1mwandb[0m: Agent Starting Run: maxlhjkb with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	base_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	filter_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	isBatchNormalization: False
[34m[1mwandb[0m: 	isDataAug: True
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	n_neurons_denseLayer: 128
[34m[1mwandb[0m: 	num_filters: [32, 32, 32, 32, 32]
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0001


Epoch 0:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=2.23658447265625, accuracy=22.8
training: loss=2.237559325159423, accuracy=22.17777222152769
Epoch 1:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=2.126359230041504, accuracy=43.15
training: loss=2.13009736260439, accuracy=41.20515064383048
Epoch 2:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=2.027772048950195, accuracy=54.7
training: loss=2.030857823076569, accuracy=53.844230528816105
Epoch 3:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=1.934976143836975, accuracy=63.15
training: loss=1.938133339506937, accuracy=62.17027128391049
Epoch 4:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=1.8507474308013916, accuracy=67.8
training: loss=1.8515397835439527, accu

0,1
best_acc_epoch,▁
best_val_accuracy,▁
epoch,▁▂▃▃▄▅▆▆▇█
test_loss,▁
test_top1_accuracy,▁
test_top2_accuracy,▁
test_top5_accuracy,▁
train_accuracy,▁▃▅▆▇▇▇███
train_loss,█▇▆▅▄▄▃▂▂▁
val_accuracy,▁▄▅▆▇▇████

0,1
best_acc_epoch,9.0
best_val_accuracy,76.2
epoch,10.0
test_loss,1.51574
test_top1_accuracy,76.65
test_top2_accuracy,88.9
test_top5_accuracy,97.15
train_accuracy,75.95949
train_loss,1.51149
val_accuracy,76.2


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: w2nyr42k with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	base_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	filter_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	isBatchNormalization: False
[34m[1mwandb[0m: 	isDataAug: False
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	n_neurons_denseLayer: 128
[34m[1mwandb[0m: 	num_filters: [32, 32, 32, 32, 32]
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 0:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=0.649111221075058, accuracy=83.1
training: loss=0.6688293928816283, accuracy=81.87273409176147
Epoch 1:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=0.5347934131622314, accuracy=84.15
training: loss=0.5190105975262894, accuracy=84.88561070133767
Epoch 2:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=0.4914177017211914, accuracy=85.6
training: loss=0.4666926846427908, accuracy=86.01075134391799
Epoch 3:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=0.47491951966285706, accuracy=85.0
training: loss=0.4393780206997613, accuracy=86.74834354294286
Epoch 4:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=0.45956112098693847, accuracy=86.05
training: loss=0.419185359037

0,1
best_acc_epoch,▁
best_val_accuracy,▁
epoch,▁▂▃▃▄▅▆▆▇█
test_loss,▁
test_top1_accuracy,▁
test_top2_accuracy,▁
test_top5_accuracy,▁
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▃▆▅▇██▇█▇

0,1
best_acc_epoch,5.0
best_val_accuracy,86.4
epoch,10.0
test_loss,0.4661
test_top1_accuracy,84.9
test_top2_accuracy,93.25
test_top5_accuracy,98.6
train_accuracy,89.04863
train_loss,0.34936
val_accuracy,86.0


[34m[1mwandb[0m: Agent Starting Run: 4zcz4mnf with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	base_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	filter_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	isBatchNormalization: False
[34m[1mwandb[0m: 	isDataAug: True
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	n_neurons_denseLayer: 128
[34m[1mwandb[0m: 	num_filters: [32, 32, 32, 32, 32]
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 0:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=1.5699910345077515, accuracy=76.7
training: loss=1.5749780674519607, accuracy=75.02187773471684
Epoch 1:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=1.1065853071212768, accuracy=81.8
training: loss=1.1222796984830041, accuracy=78.37229653706713
Epoch 2:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=0.8775346856117249, accuracy=81.3
training: loss=0.8903732970887861, accuracy=80.16002000250032
Epoch 3:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
Batch idx 160 running
Batch idx 200 running
Batch idx 240 running
validation: loss=0.765166

0,1
best_acc_epoch,▁
best_val_accuracy,▁
epoch,▁▂▃▃▄▅▆▆▇█
test_loss,▁
test_top1_accuracy,▁
test_top2_accuracy,▁
test_top5_accuracy,▁
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▅▃▂▂▂▁▁▁▁
val_accuracy,▁▅▅▅▅▇▇▇█▇

0,1
best_acc_epoch,8.0
best_val_accuracy,85.2
epoch,10.0
test_loss,0.56346
test_top1_accuracy,83.25
test_top2_accuracy,93.3
test_top5_accuracy,98.45
train_accuracy,83.72297
train_loss,0.55029
val_accuracy,84.5


[34m[1mwandb[0m: Agent Starting Run: itv9yabf with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	base_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	filter_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	isBatchNormalization: False
[34m[1mwandb[0m: 	isDataAug: False
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	n_neurons_denseLayer: 128
[34m[1mwandb[0m: 	num_filters: [32, 32, 32, 32, 32]
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


Epoch 0:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=0.6649145965576172, accuracy=82.7
training: loss=0.6685900366415097, accuracy=81.72271533941743
Epoch 1:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=0.5296001238822937, accuracy=84.75
training: loss=0.5252533480292157, accuracy=84.33554194274284
Epoch 2:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=0.4754569523334503, accuracy=85.55
training: loss=0.4628331530785647, accuracy=86.12326540817602
Epoch 3:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=0.4882413737773895, accuracy=84.8
training: loss=0.44004768121241034, accuracy=86.59832479059882
Epoch 4:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=0.45395001327991485, accuracy=85.6
training: loss=0.41287158179

0,1
best_acc_epoch,▁
best_val_accuracy,▁
epoch,▁▃▅▆█
test_loss,▁
test_top1_accuracy,▁
test_top2_accuracy,▁
test_top5_accuracy,▁
train_accuracy,▁▄▆▇█
train_loss,█▄▂▂▁
val_accuracy,▁▆█▆█

0,1
best_acc_epoch,4.0
best_val_accuracy,85.6
epoch,5.0
test_loss,0.47427
test_top1_accuracy,85.25
test_top2_accuracy,93.15
test_top5_accuracy,98.4
train_accuracy,87.47343
train_loss,0.41287
val_accuracy,85.6


[34m[1mwandb[0m: Agent Starting Run: qzj6pcia with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	base_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta: 0.9
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	filter_sizes: [3, 3, 3, 3, 3]
[34m[1mwandb[0m: 	isBatchNormalization: False
[34m[1mwandb[0m: 	isDataAug: False
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	n_neurons_denseLayer: 128
[34m[1mwandb[0m: 	num_filters: [32, 32, 32, 32, 32]
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0001


Epoch 0:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=2.216280937194824, accuracy=24.3
training: loss=2.2157239029893043, accuracy=23.577947243405426
Epoch 1:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=2.1112672119140625, accuracy=41.6
training: loss=2.1050654601299668, accuracy=42.367795974496815
Epoch 2:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=2.003027765274048, accuracy=55.8
training: loss=2.002930264008583, accuracy=55.081885235654454
Epoch 3:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=1.9110452480316162, accuracy=63.85
training: loss=1.9093199649413297, accuracy=62.40780097512189
Epoch 4:
Batch idx 0 running
Batch idx 40 running
Batch idx 80 running
Batch idx 120 running
validation: loss=1.8234469785690308, accuracy=68.0
training: loss=1.82534673969840

[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


0,1
epoch,▁▂▄▅▇█
train_accuracy,▁▄▆▇██
train_loss,█▆▅▃▂▁
val_accuracy,▁▄▆▇▇█
val_loss,█▆▅▃▂▁

0,1
epoch,6.0
train_accuracy,69.87123
train_loss,1.74669
val_accuracy,71.45
val_loss,1.74328
