In [2]:
# only for testing
# use main.py for training

#To load a saved version of the model:
#saved_model = UNET()
#saved_model.load_state_dict(torch.load(PATH))

#TODO:
# run on GPU

# things to improve performance (speed)
# use torch.utils.random_split https://www.programcreek.com/python/example/125046/torch.utils.data.random_split
# https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html?highlight=checkpoint
# implement "channels Last Memory Format" (beta) https://pytorch.org/tutorials/intermediate/memory_format_tutorial.html

In [None]:
import munch
import yaml

def get_config(file='config.yaml'):
    p = argparse.ArgumentParser(description='Learned Step Size Quantization')
    p.add_argument('config_file', metavar='PATH', nargs='+',
                   help='path to a configuration file')
    arg = p.parse_args()

    with open(file) as yaml_file:
        cfg = yaml.safe_load(yaml_file)

    for f in arg.config_file:
        if not os.path.isfile(f):
            raise FileNotFoundError('Cannot find a configuration file at', f)
        with open(f) as yaml_file:
            c = yaml.safe_load(yaml_file)
            cfg = merge_nested_dict(cfg, c)

    return munch.munchify(cfg) 

In [5]:
# make sure tensorboard is working
#!conda list | grep tensorboard
import torch
#!conda install tensorboard -y
from torch.utils.tensorboard import SummaryWriter
print(torch.__version__)

#make sure this executes
SummaryWriter()

1.12.0.dev20220425


<torch.utils.tensorboard.writer.SummaryWriter at 0x7f3538b75f70>

In [25]:
import yaml
import munch

def get_conf(path='config.yaml'):
    with open(path) as file:
        try:
            return(munch.munchify(yaml.safe_load(file)))
        except yaml.YAMLError as exc:
            print(exc)
        
cfg = open_yaml()

for kwarg in cfg.dataset.test.kwargs:
    print(kwarg,cfg.dataset.test.kwargs[kwarg])

batch_size 16


In [1]:
# root to training data 
root = 'processed-data/TCI_256_split/Tidsperiod-1/train'
rootv = 'processed-data/TCI_256_split/Tidsperiod-1/val'

num_workers = 0
GPU = False
train = True

batch_size=4

import torch
import torch.optim as optim
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

# PyTorch TensorBoard support
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime

import matplotlib.pyplot as plt
import numpy as np


#from tqdm.notebook import tqdm

from dataset.datasets import sentinel
from model.models import UNET

from torchvision import transforms
from torch.utils.data import DataLoader

from train import train,test 


# Helper function for inline image display
def matplotlib_imshow(img, one_channel=False):
    if one_channel:
        img = img.mean(dim=0)
    #img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    if one_channel:
        plt.imshow(npimg, cmap="Greys")
    else:
        plt.imshow(np.transpose(npimg, (1, 2, 0)))

        

        
        
        
        
        
        
        
        
transform=transforms.Compose([transforms.ToTensor()])


# Create datasets for training & validation,
training_set= sentinel(root_dir=root,img_transform=transform)
validation_set= sentinel(root_dir=rootv,img_transform=transform)

if GPU:
    pin_memmory = True
else: 
    pin_memory = False 
    
# Create data loaders for our datasets; shuffle for training, not for validation
training_loader = DataLoader(training_set, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=pin_memory)
validation_loader = DataLoader(validation_set, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=pin_memory)
model = UNET()


# Report split sizes
print('Training set has {} instances'.format(len(training_set)))
print('Validation set has {} instances'.format(len(validation_set)))

# num_workers>0 enabels asynchronus dataloading and augmentation 
# num_workers should be tuned depending on the workload, CPU, GPU, and location of training data



    
# use torch.no_grad() to disable gradients in validation and inference (less memory usage) 

optimizer = optim.NAdam(model.parameters(), lr = 0.001)
loss_fn = nn.CrossEntropyLoss(ignore_index=0)


  from .autonotebook import tqdm as notebook_tqdm


Training set has 2674 instances
Validation set has 316 instances


In [11]:
#visualise images in tensorboard

dataiter = iter(training_loader)
images, labels = dataiter.next()

# Create a grid from the images and show them
# https://pytorch.org/vision/main/auto_examples/plot_visualization_utils.html#sphx-glr-auto-examples-plot-visualization-utils-py
img_grid = torchvision.utils.make_grid(images)
#show(img_grid)
#matplotlib_imshow(img_grid, one_channel=False)
#print('  '.join(classes[labels[j]] for j in range(4))
# Default log_dir argument is "runs" - but it's good to be specific
# torch.utils.tensorboard.SummaryWriter is imported above
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter('runs/visualise_data')

# Write image data to TensorBoard log dir
writer.add_image('Four Sat Images', img_grid)
writer.flush()

# To view, start TensorBoard on the command line with:
#  tensorboard --logdir=runs
# ...and open a browser tab to http://localhost:6006/

#if on remote
## on remote
##tensorboard --logdir <path> --port 6006
## forward everything on port 6006 on server to 
##ssh -L 16006:127.0.0.1:6006 -i vmexjobb_key.pem exjobbare@vdexjobb.westeurope.cloudapp.azure.com

In [3]:
dataiter = iter(training_loader)
images, labels = dataiter.next()
images.shape[1]


3

In [12]:


timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter('runs/s2_trainer_{}'.format(timestamp))
epoch_number = 0

EPOCHS = 10

best_vloss = 1_000_000.




for epoch in range(EPOCHS):
    print('EPOCH {}:'.format(epoch_number + 1))
    
    #make sure gradient tracking is on, and do one pass over the data
    model.train(True)
    avg_loss = train(epoch_number,writer)
    
    # turn of gradient tracking for reporting
    model.train(False)
    running_vloss = 0.0
    for i, vdata in enumerate(validation_loader):
        vinputs, vlabels = vdata
        voutputs = model(vinputs)
        vloss = loss_fn(voutputs,vlabels)
        running_vloss += vloss
        
    avg_vloss = running_vloss / (i + 1)
    print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))
    
    # (tensor board) Log the running loss averaged per batch 
    # for both training and validation
    writer.add_scalars('Training vs. Validation Loss',
                      {'Training': avg_loss, 'Validation': avg_vloss},
                      epoch_number + 1)
    writer.flush()
    
    # track best performance, and save the model's state
    if avg_vloss < best_vloss: 
        best_vloss = avg_vloss
        model_path = 'model_{}_{}'.format(timestamp, epoch_number)
        torch.save(model.state_dict(),model_path)
    
    epoch_number += 1
    
   

EPOCH 1:


TypeError: train() missing 6 required positional arguments: 'device', 'train_loader', 'optimizer', 'loss_fn', 'epoch', and 'tb_writer'

In [2]:
### see prediction vs label (should load unseen data before)
### help functions
import matplotlib.pyplot as plt
import torch 

def plot_pred_lbl_rgb(pred,labl,rgb):
    
    # def fig
    fig = plt.figure(figsize=(10, 7))
    rows = 1
    columns = 3
    
    # Adds a subplot at the 1st position
    fig.add_subplot(rows, columns, 1)
    plt.imshow(pred)
    
    plt.title("Pred (%d classes)"%(len(torch.unique(pred)))) 
    
    # Adds a subplot at the 2nd position
    fig.add_subplot(rows, columns, 2)
    plt.imshow(labl)          
    plt.title("Label (%d classes)"%(len(torch.unique(labl))))
    
    # Adds a subplot at the 3rd position
    fig.add_subplot(rows, columns, 3)
    plt.imshow(rgb.permute(1,2,0).numpy())
    plt.title("RGB")
    plt.axis('off')
    plt.show()
####################

with torch.no_grad():
    for batch in validation_loader:
        X,y = batch
        preds = model(X)
        preds = nn.functional.softmax(preds,dim=1)
        predlabls = torch.argmax(preds,dim=1)
        
        for idx, predlabl in enumerate(predlabls):
            plot_pred_lbl_rgb(predlabl,y[idx],X[idx])
            

NameError: name 'validation_loader' is not defined

In [7]:
#visualizing model in tensorboard


# Again, grab a single mini-batch of images
dataiter = iter(training_loader)
images, labels = dataiter.next()

# add_graph() will trace the sample input through your model,
# and render it as a graph.
writer.add_graph(model, images)
writer.flush()

  if x.shape != concat_layer.shape:
