# Weights and Biases

## Installation and Libraries

In [None]:
%pip install wandb -q

In [1]:
import torch 
import torch.nn as nn
import torch.optim as optim 
from  torchvision import datasets
from torchvision.transforms import ToTensor

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Device: ", device)

from tqdm import tqdm

Device:  cuda


In [2]:
import wandb, os 
os.environ['WANDB_API_KEY'] = "8725ebdffefbbe249e137ef04781ca644aa5ee0e"
wandb.login()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mshushanksingh310[0m ([33mshushanksingh310-birla-institute-of-technology-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

## Helper functions and Model

In [3]:
data_train = datasets.CIFAR10(
    root = 'data',
    train = True, 
    transform = ToTensor(),
    download = True
)

data_test = datasets.CIFAR10(
    root = 'data',
    train = False, 
    download = True, 
    transform = ToTensor()
)

In [11]:
def build_dataset(batch_size, data_train, data_test):
    train_loader = torch.utils.data.DataLoader(data_train, batch_size = batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(data_test, batch_size = batch_size, shuffle=False)
    return train_loader, test_loader

In [12]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.CNN = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=2),
            nn.BatchNorm2d(64),
            nn.ReLU(),

            nn.AvgPool2d(kernel_size=9),
            nn.Flatten()
        )

        self.classification = nn.Linear(576, 10)
    
    def forward(self, x):
        x_cnn = self.CNN(x)
        res = self.classification(x_cnn)
        return res

model = Network().to(device)
print(model)

Network(
  (CNN): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): AvgPool2d(kernel_size=9, stride=9, padding=0)
    (4): Flatten(start_dim=1, end_dim=-1)
  )
  (classification): Linear(in_features=576, out_features=10, bias=True)
)


In [13]:
train_loader , test_loader = build_dataset(64, data_train, data_test)
for x, y in train_loader:
    break
model(x.to(device)).shape

torch.Size([64, 10])

In [14]:
def get_optim(optimizer, learning_rate, model):
    if optimizer=='sgd':
        return optim.SGD(model.parameters(), lr=learning_rate)
    else:
        return optim.Adam(model.parameters(), lr=learning_rate)

In [15]:
def train_epoch(model, loader, optimizer, criterion, scaler):
    num_correct = 0
    total_loss = 0

    for i, (x, y) in enumerate(loader):
        optimizer.zero_grad()

        x = x.cuda()
        y = y.cuda()

        with torch.cuda.amp.autocast():
            outputs - model(x)
            loss = criterion(outputs, y)
        
        total_loss += fload(loss)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
    ep_loss = float(total_loss / len(loader))

    return model, ep_loss 

In [40]:
def train(model, finish = True):
    best_acc = 0

    for epoch in range(run_config['epochs']):
        batch_bar = tqdm(total=len(train_loader), dynamic_ncols=True, leave=False, position=0, 
        desc='Train')

        num_correct = 0
        total_loss = 0

        for i, (x, y) in enumerate(train_loader):
            optimizer.zero_grad()

            x = x.cuda()
            y = y.cuda()

            with torch.cuda.amp.autocast():
                outputs = model(x)
                loss = criterion(outputs, y)

            num_correct += int((torch.argmax(outputs, axis=1) ==y).sum())

            total_loss += float(loss)

            batch_bar.set_postfix(
                acc="{:.04f}%".format(100 * num_correct / ((i +1) * run_config['batch_size'])),
                loss="{:.04f}".format(float(total_loss / (i + 1))),
                num_correct = num_correct, 
                lr = "{:.04f}".format(float(optimizer.param_groups[0]['lr']))
            )

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            batch_bar.update()
        batch_bar.close()

        train_loss = float(total_loss/ len(train_loader))
        train_acc = 100 * num_correct / (len(train_loader) * run_config['batch_size'])
        lr = float(optimizer.param_groups[0]['lr'])

        print("Epoch {}/{}: Train Acc {:.04f}%, Train Loss {:.04f}, Learning Rate {:.04f}".format(
            epoch + 1, 
            run_config['epochs'],
            train_acc,
            train_loss, 
            lr
        ))

        # what to log 

        metrics = {
            "train_loss": train_loss, 
            "train_acc": train_acc, 
            "lr": lr 
        }

        wandb.log(metrics)

        #updating the model version 

        if train_acc > best_acc: 
            best_acc = train_acc

            # saving the model and optimizer states

            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict()
            }, "Model.pth")

            # ALTERNATIVE 1: Saving files as artifacts  
            # Creating Artifact
            model_artifact = wandb.Artifact(run_config['model'], type='model')

            model_artifact.add_file("Model.pth")

            run.log_artifact(model_artifact)

            wandb.save("Model.pth")

    if finish:
        wandb.finish()

In [41]:
run_config = {
    'model': '1-2dcnn',
    'optimizer':'sgd',
    'lr': 2e-3,
    'batch_size':64,
    'epochs': 5
}

train_loader, test_loader = build_dataset(run_config['batch_size'], data_train, data_test)

optimizer = get_optim(run_config['optimizer'], run_config['lr'], model)

criterion = nn.CrossEntropyLoss()

scaler = torch.cuda.amp.GradScaler()

  scaler = torch.cuda.amp.GradScaler()


In [None]:
run = wandb.init(
    entity="shushanksingh310-ai",
    project="test",
    job_type="model-training",
    name=run_config['model'],
    config=run_config
    )

0,1
lr,▁
train_acc,▁
train_loss,▁

0,1
lr,0.002
train_acc,31.99528
train_loss,1.94376


In [43]:
train(model)

  with torch.cuda.amp.autocast():
                                                                                                                 

Epoch 1/5: Train Acc 34.4949%, Train Loss 1.8823, Learning Rate 0.0020


                                                                                                                 

Epoch 2/5: Train Acc 36.0354%, Train Loss 1.8425, Learning Rate 0.0020


                                                                                                                 

Epoch 3/5: Train Acc 37.3561%, Train Loss 1.8125, Learning Rate 0.0020


                                                                                                                 

Epoch 4/5: Train Acc 38.1474%, Train Loss 1.7865, Learning Rate 0.0020


                                                                                                                 

Epoch 5/5: Train Acc 39.0825%, Train Loss 1.7673, Learning Rate 0.0020


0,1
lr,▁▁▁▁▁
train_acc,▁▃▅▇█
train_loss,█▆▄▂▁

0,1
lr,0.002
train_acc,39.08248
train_loss,1.76729


## Resume a previous run

In [27]:
RESUME_LOGGING = True

In [None]:
if RESUME_LOGGING:
    run_id = ""
    run = wandb.init(
        id = run_id,
        resume = "must"
        project = 'test'
    )


SyntaxError: invalid syntax (3635203901.py, line 1)