### Exploration of Model Accuracy when Fed Single Channel Images of MSAVI Vegetation Index

In [1]:
import preprocess
import torch
import torchvision
import rasterio.features
from torch.utils.data import DataLoader
from torchvision import models
from torchvision.models.segmentation.deeplabv3 import DeepLabHead
from torch import nn
import torch.optim as optim
import copy
from tqdm import tqdm
import time
from sklearn.metrics import roc_auc_score, f1_score
import numpy as np

import segmentation_models_pytorch as smp

In [2]:
def createDeepLabv3(outputchannels=1):
#     model = torchvision.models.resnet50(pretrained=False)
#     print(model.conv1)
#     # https://discuss.pytorch.org/t/how-to-transfer-the-pretrained-weights-for-a-standard-resnet50-to-a-4-channel/52252
#     weight = model.conv1.weight.clone()
#     model.conv1 = nn.Conv2d(4, 256, kernel_size=7, stride=2, padding=3, bias=False)
#     with torch.no_grad():
#         model.conv1.weight[:, :3] = weight
#         model.conv1.weight[:, 3] = model.conv1.weight[:, 0]
#     # Adding a sigmoid activation after last convolution because we want to output pria value between 0 and 1
#     model.classifier = DeepLabHead(2048, outputchannels)
    model = smp.Unet(classes=1, in_channels=1)
    # set the model into training mode and return
    model.train()
    return model

In [3]:
# now need to define training procedure
def train_model(model, criterion, dataloader, optimizer, metrics, num_epochs=3):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 1e10
    # Use GPU if available
    device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
    model.to(device)
    # initialize log
    fieldnames = ['epoch', 'Train_loss', 'Test_loss'] + \
        [f'Train_{m}' for m in metrics.keys()] + \
        [f'Test_{m}' for m in metrics.keys()]
    
    # Training
    for epoch in range(1, num_epochs + 1):
        print("Epoch {}/{}".format(epoch, num_epochs))
        print("-"*10)
        batchsummary = {a: [0] for a in fieldnames}
        # Each epoch has training and validation
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
            # Begin iterating over data using batches
            for sample in tqdm(iter(dataloader)):
                inputs = sample['image'].to(device)
                masks = sample['mask'].float().to(device)
                inputs = inputs.unsqueeze(1)
                masks = masks.unsqueeze(1)
                print(inputs.size())
                # zero parameter gradients
                optimizer.zero_grad()
                # track the history if we're in train
                with torch.set_grad_enabled(phase=='train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, masks)
                    y_pred = outputs.data.cpu().numpy().ravel()
                    y_true = masks.data.cpu().numpy().ravel()
                    for name, metric in metrics.items():
                        if name == 'f1_score':
                            # use classification threshold of 0.1
                            try:
                                batchsummary[f'{phase}_{name}'].append(metric(y_true > 0, y_pred > 0.1))
                            except:
                                batchsummary[f'{phase}_{name}'] = [metric(y_true > 0, y_pred > 0.1)]
                        else:
                            try:
                                batchsummary[f'{phase}_{name}'].append(metric(y_true.astype('uint8'),y_pred))
                            except:
                                batchsummary[f'{phase}_{name}']= [metric(y_true.astype('uint8'),y_pred)]
                    
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
            batchsummary['epoch'] = epoch
            epoch_loss = loss
            batchsummary[f'{phase}_loss'] = epoch_loss.item()
            print('{} Loss: {:.4f}'.format(phase, loss))
            for field in fieldnames[3:]:
                batchsummary[field] = np.mean(batchsummary[field])
            print(batchsummary)
            if phase == 'test' and loss < best_loss:
                best_loss = loss
                best_model_wts = copy.deepcopy(model.state_dict())
    
    time_elapsed = time.time() - since
    print('Training completed in {:.0f}m {:.0f}s'.format(time_elapsed//60, time_elapsed%60))
    print('Lowest Loss: {:4f}'.format(best_loss))
    model.load_state_dict(best_model_wts)
    return model

In [4]:
model = createDeepLabv3()
criterion = torch.nn.MSELoss(reduction="mean")
optimizer = optim.Adam(model.parameters(), lr=1e-4)
metrics = {'f1_score':f1_score, 'auroc':roc_auc_score}

In [5]:
twelve_img = "/vol/ml/EphemeralStreamData/Ephemeral_Channels/Imagery/vhr_2012_refl.img"
twelve_shp = "/vol/ml/EphemeralStreamData/Ephemeral_Channels/Reference/reference_2012_merge.shp"

In [6]:
ds = preprocess.MSAVIDataset([(twelve_img, twelve_shp)])

  return _prepare_from_string(" ".join(pjargs))


In [None]:
ds[1000]['image'].size()

In [None]:
dl = DataLoader(ds, batch_size=50)

In [None]:
trained_model = train_model(model, criterion, dl, optimizer, metrics)