In [1]:
from taranis.core.server.server import Server
from taranis.core.server.client import Client

client = Client("http://127.0.0.1:8000")

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
import hashlib
from collections import defaultdict
from functools import cache
import pandas as pd
import altair as alt
from altair import datum
import torch
import torch.nn as nn
import torch.nn.functional as F



session_group = client.new_group('PythonPrimer')
epoch_tracker = defaultdict(int)


def add_metric(run, metric, value):
    real_epoch = epoch_tracker[run]
    run.new_metric(metric, value, time=real_epoch)
            
@cache
def register_model(name, model):
    representation = repr(model)

    sha = hashlib.sha256()
    sha.update(representation.encode('utf-8'))
    digest = sha.hexdigest()
    
    group = client.get_group(name, meta=dict(digest=digest))
    run = client.new_run(name)
    group.add_run(run)
    session_group.add_run(run)
    return run
    

In [4]:
from torchvision import datasets
from torchvision import transforms
import torch.optim as optim
import torch
import taranis.core.dataset.split as split
from taranis.core.trainer.train import mnist_dataset
from tqdm import tqdm

dataset = datasets.MNIST(
    '../data', 
    train=True, 
    download=True
)
print(len(dataset))

def newdataloader(batch_size=512):
    # Takes the original dataset and apply transform on the fly
    # Convert image to a tensor
    # normalize the tensor
    dataset_to_use = split.TransformedDatasetClassification(
        dataset, 
        transform=transforms.Compose([                    
            transforms.ToTensor(),                         # Transform the image to tensor
            transforms.Normalize((0.1307,), (0.3081,)),    # Normalize the image
        ])
    )

    # Takes care of spliting the dataset into bite size for our model
    dataloader = torch.utils.data.DataLoader(
        dataset_to_use,
        batch_size = batch_size,
        num_workers = 1,
    )
    
    return dataloader

NOTE: Redirects are currently not supported in Windows or MacOs.


60000


In [5]:
def test_model(model):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])
    dataset = datasets.MNIST('../data', train=False,  transform=transform)
    
    loader = torch.utils.data.DataLoader(dataset, batch_size=4096, num_workers=1) 
    
    total = len(dataset)
    model = model.cpu()
    
    with torch.no_grad():
        test_loss = 0
        correct = 0
        count = 0
        
        for batch, labels in loader:
            output = model(batch)
            test_loss += F.nll_loss(output, labels, reduction='sum').item()  
            count += batch.shape[0]
            
            pred = output.argmax(dim=1, keepdim=True) 
            correct += pred.eq(labels.view_as(pred)).sum().item()

        return test_loss / count, correct / total

In [6]:
def train(name, model, epoch=2, lr=0.4):
    batch_size = 512
    dataloader = newdataloader(batch_size)
    
    model = model.cpu()
    
    run = register_model(name, model)
    
    # Gradient optimizer 
    optimizer = optim.Adadelta(model.parameters(), lr=lr)
    
    with tqdm(total=len(dataset) * epoch, ncols=100) as progress:

        # Repeat a few times 
        for i in range(epoch):
            total_loss = 0
            count = 0

            # Go through the dataset
            for batch, labels in dataloader:
    
                optimizer.zero_grad()                     # Clear previous derivative

                probabilities = model(batch)              # Make prediction

                loss = F.nll_loss(probabilities, labels)  # Grade the prediction 

                loss.backward()                           # Compute derivative

                optimizer.step()                          # Update parameters using derivative

                total_loss += loss.item()                 # Keep track of loss to make sure it goes down

                count += 1
                
                progress.update(batch_size)

            loss = total_loss / count
            test_loss, acc = test_model(model)
            
            epoch_tracker[run] += 1
            add_metric(run, 'train_loss', loss)
            add_metric(run, 'test_loss', test_loss)
            add_metric(run, 'test_acc', acc)
            
            progress.set_description(f"loss: {loss:8.4f}")
    

In [7]:
from functools import cache

@cache
def newdataloader_cuda(batch_size=512, train=True):
    dataset_to_use = mnist_dataset(train)
    dataset_to_use.tensors = list(dataset_to_use.tensors)
    for i, t in enumerate(dataset_to_use.tensors):
        dataset_to_use.tensors[i] = t.cuda()

    # Takes care of spliting the dataset into bite size for our model
    dataloader = torch.utils.data.DataLoader(
        dataset_to_use,
        batch_size = batch_size,
        num_workers = 0,
    )
    
    return dataloader


def gpu_train(name, original_model, epoch=2, lr=0.4, batch_size=4096):
    dataloader = newdataloader_cuda(batch_size)        # HERE optimized loader
    device = torch.cuda.current_device()               # HERE Get GPU device
    model = original_model.to(device)                  # HERE Convert model to GPU
    
    run = register_model(name, model)
    optimizer = optim.Adadelta(model.parameters(), lr=1)

    with tqdm(total=len(dataset) * epoch, ncols=100) as progress:
        for i in range(epoch):
            partial_losses = []
            count = 0
            for batch, labels in dataloader:
                batch, labels = batch.to(device), labels.to(device) # HERE: Convert input to GPU

                optimizer.zero_grad()

                probabilities = model(batch)

                loss = F.nll_loss(probabilities, labels)

                loss.backward()

                optimizer.step()

                partial_losses.append(loss.detach())
                count += 1
                
                progress.update(batch_size)

            total_loss = (sum(partial_losses) / count).item()
            test_loss, acc = gpu_test_model(model)
            
            epoch_tracker[run] += 1
            add_metric(run, 'train_loss', total_loss)
            add_metric(run, 'test_loss', test_loss)
            add_metric(run, 'test_acc', acc)
            
            progress.set_description(f"loss: {total_loss:8.4f}")
    
def gpu_test_model(model, batch_size=4096*2):
    dataloader = newdataloader_cuda(batch_size, train=False)     # HERE optimized loader
    total = len(dataloader.dataset)
    
    device = torch.cuda.current_device()  # HERE
    model = model.to(device=device)       # HERE
    
    with torch.no_grad():
        test_loss = 0
        correct = 0
        total_count = 0
        
        for batch, labels in dataloader:
            batch, labels = batch.to(device), labels.to(device) # HERE
            
            output = model(batch)
            test_loss += F.nll_loss(output, labels, reduction='sum').item() 
            
            pred = output.argmax(dim=1, keepdim=True) 
            correct += pred.eq(labels.view_as(pred)).sum().item()
            total_count += batch.shape[0]

        assert total_count == total, f"{total_count} != {total}"
        return test_loss / total, correct / total

In [8]:
def trainfast(*args, **kwargs):
    if torch.cuda.is_available():
        return gpu_train(*args, **kwargs)
    return train(*args, **kwargs)

def testfast(*args, **kwargs):
    if torch.cuda.is_available():
        return gpu_test_model(*args, **kwargs)
    return test_model(*args, **kwargs)

In [9]:
def show_train_test_curve():
    data = pd.DataFrame(session_group.fetch_metrics())
    
    return (
        alt.Chart(data)
        .mark_line()
        .encode(
            x=alt.X('time'), 
            y=alt.Y('value', scale=alt.Scale(domain=[-1, -0.7])),
            color=alt.Color(
                'metric', 
                legend=alt.Legend(
                orient='none',
                legendX=130, legendY=-40,
                direction='horizontal',
                titleAnchor='middle')
            )
        ).transform_filter(
            (datum.metric == 'test_loss') | (datum.metric == 'train_loss')
        )
    )

In [10]:
def show_loss_curve(x, y, c):
    data = pd.DataFrame(session_group.fetch_metrics())
    
    series = data[data['metric'] == y]
    mn = series["value"].min()
    mx = series["value"].max()
    
    return (
        alt.Chart(series)
        .mark_line()
        .encode(
            x=alt.X(x), 
            y=alt.Y('value', scale=alt.Scale(domain=[mn, mx])), 
            color=alt.Color(
                c, 
                legend=alt.Legend(
                orient='none',
                legendX=130, legendY=-40,
                direction='horizontal',
                titleAnchor='middle')
            )
        )
    )


In [11]:
class STN(nn.Module):
    def __init__(self):
        super(STN, self).__init__()
        self.localization = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=7),
            nn.MaxPool2d(2, stride=2),
            nn.ReLU(True),
            nn.Conv2d(8, 10, kernel_size=5),
            nn.MaxPool2d(2, stride=2),
            nn.ReLU(True)
        )
        self.fc_loc = nn.Sequential(
            nn.Linear(10 * 3 * 3, 32),
            nn.ReLU(True),
            nn.Linear(32, 3 * 2)
        )
        
    def forward(self, x):
        xs = self.localization(x)
        xs = xs.view(-1, 10 * 3 * 3)
        
        theta = self.fc_loc(xs)
        theta = theta.view(-1, 2, 3)
        
        grid = F.affine_grid(theta, x.size())
        x = F.grid_sample(x, grid)
        return x


In [None]:
for i in range(100):
    n_class = 10

    linear = nn.Sequential(
        nn.Flatten(),                  # Flatten images from a 2D matrix to 1D => (28x28) => (784,)
        nn.LazyLinear(n_class),        # Simple (a x + b) layer that will learn `a` and `b`
        nn.Softmax(dim=1)              # Normalization so result wil be between [0, 1]
    )

    deepmodel = nn.Sequential(
        nn.Flatten(),
        nn.LazyLinear(128),    # r1 = batch * w1 + b1
        nn.ReLU(),             # r2 = max(r1, 0)
        nn.LazyLinear(64),     # r3 = r3 * w3 + b3
        nn.ReLU(),             # r4 = max(r3, 0)
        nn.LazyLinear(n_class),# r5 = r4 * w4 + b4
        nn.Softmax(dim=1)  
    )

    conv_model = nn.Sequential(
        # ext_nn.MaskLayer((28, 28)),
        nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1),
        nn.ReLU(),
        nn.Conv2d(32, 64, 3, 1),
        nn.ReLU(),
        nn.Flatten(),
        nn.Linear(576 * 64, 128),
        nn.ReLU(),
        nn.Linear(128, n_class),
        nn.Softmax(dim=1),
    )

    linear_stn = nn.Sequential(
        STN(),
        nn.Flatten(),                  # Flatten images from a 2D matrix to 1D => (28x28) => (784,)
        nn.Linear(784, n_class),        # Simple (a x + b) layer that will learn `a` and `b`
        nn.Softmax(dim=1)              # Normalization so result wil be between [0, 1]
    )
    
    models = [
        ('linear', linear),
        ('deepmodel', deepmodel),
        ('conv_model', conv_model),
        ('linear_stn', linear_stn),
    ]
    
    for k, v in models:
        trainfast(k, v, epoch=100, lr=0.1)

loss:  -0.9252: : 6144000it [01:09, 88919.07it/s]                                                   
loss:  -0.9818: : 6144000it [01:05, 93365.37it/s]                                                   
loss:  -0.9928: : 6144000it [17:16, 5925.71it/s]                                                    
loss:  -0.9892: : 6144000it [01:08, 90349.64it/s]                                                   
loss:  -0.9253: : 6144000it [01:04, 94575.20it/s]                                                   
loss:  -0.9828: : 6144000it [01:05, 94387.73it/s]                                                   
loss:  -0.9930: : 6144000it [17:33, 5832.20it/s]                                                    
loss:  -0.9873: : 6144000it [01:09, 88841.01it/s]                                                   
loss:  -0.9252: : 6144000it [01:06, 91894.11it/s]                                                   
loss:  -0.9824: : 6144000it [01:08, 89499.66it/s]                                          

loss:  -0.9812: : 6144000it [01:06, 92673.06it/s]                                                   
loss:  -0.8971: : 6144000it [22:26, 4563.76it/s]                                                    
loss:  -0.9861: : 6144000it [01:08, 89806.61it/s]                                                   
loss:  -0.9252: : 6144000it [01:05, 93603.47it/s]                                                   
loss:  -0.9822: : 6144000it [01:05, 93207.15it/s]                                                   
loss:  -0.9944: : 6144000it [22:26, 4562.84it/s]                                                    
loss:  -0.9850: : 6144000it [01:08, 89696.07it/s]                                                   
loss:  -0.9253: : 6144000it [01:05, 93768.20it/s]                                                   
loss:  -0.9778: : 6144000it [01:06, 92772.87it/s]                                                   
loss:  -0.9950: : 6144000it [22:26, 4563.81it/s]                                           

In [None]:
show_loss_curve('time', 'train_loss', 'name')

In [12]:
show_train_test_curve()

ValueError: metric encoding field is specified without a type; the type cannot be inferred because it does not match any column in the data.

alt.Chart(...)

In [13]:
def newmodel(depth):
    base = nn.Sequential(
        nn.Flatten(),
        nn.Linear(784, 128)
    )
    
    mid = []
    for i in range(depth * 4):
        mid.extend([nn.ReLU(), nn.Linear(128, 128)])
    mid = nn.Sequential(*mid)
    
    end = nn.Sequential(
        nn.ReLU(),    
        nn.Linear(128, 10),     
        nn.Softmax(dim=1)
    )

    return nn.Sequential(
        base,
        mid,
        end,
    )

def move_weights(destination_model, source_model):
    dest = destination_model.state_dict()
    src = source_model.state_dict()
    
    for k, v in src.items():
        if k in dest:
            dest[k] = v
            
    destination_model.load_state_dict(dest)
    

In [14]:
previous_model = newmodel(1)

for i in range(5):
    print(i + 1)
    
    next_model = newmodel(i + 1)
    move_weights(next_model, previous_model)

    loss_tracker[f'model_{i + 1}'] = []
    trainfast(f'model_{i + 1}', next_model, lr=0.1, epoch=50)
    show_loss_curve()
    
    previous_model = next_model
    
show_loss_curve()

NameError: name 'nn' is not defined