In [3]:
!pip install idx2numpy



In [4]:
# For reading data
import pandas as pd
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

# For visualizing
import plotly.express as px

# For model building
import torch
import torch.nn as nn
import torch.nn.functional as F

import idx2numpy
import requests
import gzip
import numpy as np
from io import BytesIO

In [5]:
class FashionMNIST(Dataset):
    def __init__(self, images_url, labels_url):
        # Download and read in our raw data from the IDX files
        self.images = self.download_and_load_idx(images_url)
        self.labels = self.download_and_load_idx(labels_url)

    def download_and_load_idx(self, url):
        response = requests.get(url)
        response.raise_for_status()
        with gzip.GzipFile(fileobj=BytesIO(response.content)) as f:
            return idx2numpy.convert_from_file(f)

    # return the length of the complete data set
    def __len__(self):
        return len(self.images)

    # retrieve a single record based on index position `idx`
    def __getitem__(self, idx):
        # extract the image and reshape it
        image = self.images[idx].reshape(1, 28, 28)
        # Specify dtype to align with default dtype used by weight matrices
        image = torch.tensor(image, dtype=torch.float32)
        # extract the label
        label = self.labels[idx]

        # return the image and its corresponding label
        return image, label

In [6]:
def train_loop(dataloader, model, loss_fn, optimizer, device):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        pred = model(X)
        loss = loss_fn(pred, y)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 10 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test_loop(dataloader, model, loss_fn, device):
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

def train_net(model, train_dataloader, test_dataloader, epochs=5, learning_rate=1e-3, batch_size=64):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    lr = learning_rate
    bs = batch_size
    ep = epochs

    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)

    for t in range(ep):
        try:
            print(f"Epoch {model.EPOCH+t+1}\n-------------------------------")
        except AttributeError:
            print(f"Epoch {t+1}\n-------------------------------")
        train_loop(train_dataloader, model, loss_fn, optimizer, device)
        test_loop(test_dataloader, model, loss_fn, device)
    print("Done!")

    try:
        model.EPOCH += ep
    except AttributeError:
        model.EPOCH = ep

    return model

In [7]:
# URLs for the datasets
train_images_url = "https://github.com/oliviermizero/ForecastingFashionMNIST/raw/main/data/train-images-idx3-ubyte.gz"
train_labels_url = "https://github.com/oliviermizero/ForecastingFashionMNIST/raw/main/data/train-labels-idx1-ubyte.gz"
test_images_url = "https://github.com/oliviermizero/ForecastingFashionMNIST/raw/main/data/t10k-images-idx3-ubyte.gz"
test_labels_url = "https://github.com/oliviermizero/ForecastingFashionMNIST/raw/main/data/t10k-labels-idx1-ubyte.gz"

# Load our data into memory
train_data = FashionMNIST(train_images_url, train_labels_url)
test_data = FashionMNIST(test_images_url, test_labels_url)

# Create data feed pipelines for modeling
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

In [8]:
class Residual(nn.Module):
    """The Residual block of ResNet models."""
    def __init__(self, num_channels, use_1x1conv=False, strides=1):
        super(Residual, self).__init__()
        self.conv1 = nn.LazyConv2d(num_channels, kernel_size=3, padding=1,
                                   stride=strides)
        self.conv2 = nn.LazyConv2d(num_channels, kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.LazyConv2d(num_channels, kernel_size=1,
                                       stride=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.LazyBatchNorm2d()
        self.bn2 = nn.LazyBatchNorm2d()

    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        Y += X
        return F.relu(Y)

In [9]:
class ResNet(nn.Module):
    def __init__(self, arch, lr=0.1, num_classes=10):
        super(ResNet, self).__init__()
        self.net = nn.Sequential(self.b1())
        for i, b in enumerate(arch):
            self.net.add_module(f'b{i+2}',
                self.block(*b, first_block=(i==0)))
        self.net.add_module('last', nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)), nn.Flatten(),
            nn.LazyLinear(num_classes)))

    def b1(self):
        return nn.Sequential(
            nn.LazyConv2d(64, kernel_size=7, stride=2, padding=3),
            nn.LazyBatchNorm2d(), nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

    def block(self, num_residuals, num_channels, first_block=False):
        blk = []
        for i in range(num_residuals):
            if i == 0 and not first_block:
                blk.append(Residual(num_channels,
                 use_1x1conv=True, strides=2))
            else:
                blk.append(Residual(num_channels))
        return nn.Sequential(*blk)

    def forward(self, x):
        x = self.net(x)
        return x

In [10]:
class ResNet18(ResNet):
    def __init__(self, lr=0.1, num_classes=10):
        super(ResNet18, self).__init__(((2, 64), (2, 128),
         (2, 256), (2, 512)),
                       lr, num_classes)

model = ResNet18()#.to('cuda')

In [17]:
model = train_net(model, train_dataloader,
        test_dataloader, epochs = 1, learning_rate = 1e-3,
        batch_size=64
        )

Epoch 1
-------------------------------
loss: 2.551995  [   64/60000]
loss: 1.703794  [  704/60000]
loss: 1.206934  [ 1344/60000]
loss: 1.068375  [ 1984/60000]
loss: 0.862926  [ 2624/60000]
loss: 0.807481  [ 3264/60000]
loss: 0.757776  [ 3904/60000]
loss: 0.841575  [ 4544/60000]
loss: 0.845504  [ 5184/60000]
loss: 0.671600  [ 5824/60000]
loss: 0.768402  [ 6464/60000]
loss: 0.683704  [ 7104/60000]
loss: 0.584607  [ 7744/60000]
loss: 0.685592  [ 8384/60000]
loss: 0.590827  [ 9024/60000]
loss: 0.775680  [ 9664/60000]
loss: 0.521001  [10304/60000]
loss: 0.621127  [10944/60000]
loss: 0.801205  [11584/60000]
loss: 0.481559  [12224/60000]
loss: 0.398155  [12864/60000]
loss: 0.640368  [13504/60000]
loss: 0.363139  [14144/60000]
loss: 0.443561  [14784/60000]
loss: 0.488652  [15424/60000]
loss: 0.558743  [16064/60000]
loss: 0.511943  [16704/60000]
loss: 0.498881  [17344/60000]
loss: 0.601498  [17984/60000]
loss: 0.575994  [18624/60000]
loss: 0.600253  [19264/60000]
loss: 0.684256  [19904/60000]


In [18]:
# Save our model for later, so we can train more or make predictions

EPOCH = model.epochs
# We use the .pt file extension by convention for saving
#    pytorch models
PATH = "/content/sample_data/model.pt"

# The save function creates a binary storing all our data for us
torch.save({
            'epoch': EPOCH,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            }, PATH)

AttributeError: 'ResNet18' object has no attribute 'epochs'

In [None]:
PATH

In [None]:
# Specify our path
PATH = "/content/sample_data/model.pt"

# Create a new "blank" model to load our information into
blank_model = ResNet18()

# Recreate our optimizer
optimizer = torch.optim.SGD(blank_model.parameters(), lr=0.001, momentum=0.9)

# Load back all of our data from the file
checkpoint = torch.load(PATH)
blank_model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
EPOCH = checkpoint['epoch']