<a href="https://colab.research.google.com/github/m-santh/AicoreKaggle/blob/main/Class.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [38]:
import torch
import torchvision

!pip install pytorch_transformers
#!pip install sgmllib
from pytorch_transformers import *
#import utils
import numpy


class Dataset:
    def __init__(self, args):
        self.args = args
        self.data_X = numpy.load('/content/drive/MyDrive/AiCore/X_train.npy')
        self.data_Y = numpy.load('/content/drive/MyDrive/AiCore/y_train.npy')
        self.data_Y = self.data_Y.astype(int) 
        self.test_data = numpy.load('/content/drive/MyDrive/AiCore/X_test.npy')
        self.validation_data, self.train_data = random_split(
            (self.data_X, self.data_Y), args.validation_percent
        )

    def validation(self):
        return torch.utils.data.DataLoader(
            self.validation_data,
            batch_size=self.args.batch_size,
            pin_memory=self.args.device == "cuda",
        )

    def train(self):
        return torch.utils.data.DataLoader(
            self.train_data,
            batch_size=self.args.batch_size,
            shuffle=True,
            pin_memory=self.args.device == "cuda",
        )




In [8]:

def random_split(data, validation_percent):
    validation_points = int(validation_percent * len(data))
    return torch.utils.data.random_split(
        data, [validation_points, len(data) - validation_points]
    )


In [9]:
import collections
#import metrics
import torch

# Train
# Validate
# On given arguments, data


def run(model, criterion, optimizer, dataset, is_training: bool, metrics):
    model.train(is_training)

    dictionary = collections.defaultdict(int)

    counter = 0
    with torch.set_grad_enabled(is_training):
        for X, y in dataset:
            counter += 1
            y_pred = model(X.squeeze().reshape(X.shape[0], -1))

            loss = criterion(y_pred, y)
            for name, metric in metrics.items():
                dictionary[name] += metric(y_pred, y)

            if is_training:
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()

    return {name: value / counter for name, value in dictionary.items()}


In [11]:
import argparse


def get():
    parser = argparse.ArgumentParser(description="Some classification task.")
    parser.add_argument(
        "--lr", type=float, default=3e-4, help="Learning rate for neural network"
    )
    parser.add_argument("--epochs", type=int, default=50, help="Epochs for training")
    parser.add_argument("--batch_size", type=int, default=64, help="Batch size")
    # Optimizer use choices argument
    parser.add_argument("--optimizer", default="SGD", help="Optimizer")
    parser.add_argument(
        "--model",
        choices=["baseline", "serious"],
        default="baseline",
        help="Model to choose from",
    )

    # Check between [0,1]
    parser.add_argument(
        "--validation_percent", type=float, default=0.2, help="Optimizer"
    )

    parser.add_argument(
        "--num_classes", type=int, default=10, help="Classes for classification"
    )

    parser.add_argument(
        "--size", type=int, default=5000, help="Classes for classification"
    )

    parser.add_argument(
        "--device",
        choices=["cuda", "cpu"],
        default="cpu",
        help="Device to use for training",
    )

    return parser.parse_args()


In [16]:
import torch


def getParser(args, parameters):
    return getattr(torch.optim, args.optimizer)(parameters, lr=args.lr)


In [13]:

def getModel(args):
    if args.model == "baseline":
        return Baseline(args)
    else:
        return Serious(args)


# 25%
class Baseline(torch.nn.Module):
    def __init__(self, args):
        super().__init__()
        self.layer = torch.nn.Linear(32 * 32, args.num_classes)

    def forward(self, inputs):
        return self.layer(inputs)


# 95% train, 70% on validation
class Serious(torch.nn.Module):
    def __init__(self, args):
        # float, half
        super().__init__()
        self.model = torch.nn.Sequential(
            torch.nn.Linear(32 * 32, 512),
            torch.nn.ReLU(),
            torch.nn.BatchNorm1d(512),
            torch.nn.Linear(512, 256),
            torch.nn.ReLU(),
            torch.nn.BatchNorm1d(256),
            torch.nn.Linear(256, args.num_classes),
        )

    def forward(self, inputs):
        return self.model(inputs)


In [14]:
def accuracy(logits, y):
    return torch.mean((torch.argmax(logits, dim=-1) == y).float())


def loss(logits, y):
    return torch.nn.functional.cross_entropy(logits, y)


def print_metrics(metrics):
    for metric, value in metrics.items():
        print(f"{metric}: {value}")


In [21]:
from collections import namedtuple
Args = namedtuple("Args", "lr epochs batch_size optimizer model validation_percent num_classes size device")

In [34]:
args = Args(lr=3e-4, epochs = 50, batch_size = 64, optimizer = "SGD", model = "serious", validation_percent = 0.2, num_classes = 10, size = 5000, device = "cuda")
    
data_class = Dataset(args)
train, validation = data_class.train(), data_class.validation()

In [37]:
print(numpy.shape(train))

()


In [40]:
import parser

import torch


def main():
    # Add seed
    #args = getParser()
    args = Args(lr=3e-4, epochs = 50, batch_size = 64, optimizer = "SGD", model = "baseline", validation_percent = 0.2, num_classes = 10, size = 5000, device = "cuda")
    
    data_class = Dataset(args)
    train, validation = data_class.train(), data_class.validation()

    model = getModel(args)
    optimizer = getParser(args, model.parameters())
    criterion = torch.nn.CrossEntropyLoss()

    for epoch in range(args.epochs):
        train_metrics = run(
            model,
            criterion,
            optimizer,
            train,
            True,
            {"loss": loss, "accuracy": accuracy},
        )
        print_metrics(train_metrics)
        validation_metrics = run(
            model,
            criterion,
            optimizer,
            validation,
            False,
            {"loss": loss, "accuracy": accuracy},
        )
        print_metrics(validation_metrics)


if __name__ == "__main__":
    main()


RuntimeError: ignored