## Check if GPU is detected

In [1]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ['TORCH_USE_CUDA_DSA'] = "1"

In [2]:
%env CUDA_LAUNCH_BLOCKING=1

env: CUDA_LAUNCH_BLOCKING=1


In [3]:
import torch
# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('CUDA version:', torch.version.cuda)
    print('Memory Usage:') 
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

Using device: cuda

NVIDIA GeForce GTX 960
CUDA version: 11.7
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


## Import libraries, methods and constants

In [4]:
import numpy as np
import evaluate
import torch
import json
from os import sys

from transformers import SegformerImageProcessor, AutoModelForSemanticSegmentation, TrainingArguments, Trainer, EarlyStoppingCallback

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
module_path = os.path.abspath(os.path.join('../src'))
if module_path not in sys.path:
    sys.path.append(module_path)
from data_prepossessing import create_datasets_for_plants, get_labels
from constants import seed, weed_plants, models_folder
from config import model_type, crop

## Train a semantic segmentation model

In [6]:
def init_image_processor(checkpoint):
    image_processor = SegformerImageProcessor.from_pretrained(checkpoint)
    return image_processor

In [7]:
def train_transforms(example_batch, image_processor):
    images = [x for x in example_batch["image"]]
    labels = [x for x in example_batch["annotation"]]
    inputs = image_processor(images, labels)
    return inputs

In [8]:
def compute_metrics(num_labels, metric, eval_pred):
    with torch.no_grad():
        logits, labels = eval_pred
        logits_tensor = torch.from_numpy(logits)
        logits_tensor = torch.nn.functional.interpolate(
            logits_tensor,
            size=labels.shape[-2:],
            mode="bilinear",
            align_corners=False,
        ).argmax(dim=1)

        pred_labels = logits_tensor.detach().cpu().numpy()
        metrics = metric.compute(
            predictions=pred_labels,
            references=labels,
            num_labels=num_labels,
            ignore_index=255,
            reduce_labels=False,
        )
        for key, value in metrics.items():
            if type(value) is np.ndarray:
                metrics[key] = value.tolist()
        return metrics

In [9]:
def init_training_arguments(prediction_loss_only):
    return TrainingArguments(
        output_dir="segformer-b0-scene-parse-150",
        learning_rate=6e-5,
        num_train_epochs=1,
        per_device_train_batch_size=6,
        per_device_eval_batch_size=6,
        save_total_limit=3,
        evaluation_strategy="steps",
        save_strategy="steps",
        save_steps=30,
        eval_steps=30,
        logging_steps=1,
        prediction_loss_only=prediction_loss_only,
        remove_unused_columns=False,
        load_best_model_at_end=True,
        seed=seed,
    )


def init_training_arguments_for_training():
    return init_training_arguments(True)


def init_training_arguments_for_evaluation():
    return init_training_arguments(False)

In [10]:
def initialize_trainer(model, training_args, num_labels, metric, train_ds, test_ds):
    return Trainer(
        model=model,
        args=training_args,
        train_dataset=train_ds,
        eval_dataset=test_ds,
        compute_metrics=lambda eval_pred: compute_metrics(num_labels, metric, eval_pred),
        callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
    )

In [11]:
def train_model_of_type_for_crop(model_type, crop):
    # Define a model checkpoint to be finetuned
    checkpoint = "nvidia/mit-b0"

    # Prepare the data for the model training
    model_plant_names = [crop] + weed_plants
    train_ds, test_ds = create_datasets_for_plants(model_plant_names, model_type, crop)

    print("Training subset number of images: " + str(train_ds.num_rows))
    print("Test subset number of images: " + str(test_ds.num_rows))

    image_processor = init_image_processor(checkpoint)
    train_ds.set_transform(lambda example_batch: train_transforms(example_batch, image_processor))
    test_ds.set_transform(lambda example_batch: train_transforms(example_batch, image_processor))

    # Generate labels for the model
    id2label, label2id = get_labels(crop, model_type)
    num_classses = len(id2label)

    print('Number of classes:', num_classses)
    print('id2label:', id2label)
    print('label2id:', label2id)

    # Initialize and train model
    model = AutoModelForSemanticSegmentation.from_pretrained(checkpoint, id2label=id2label, label2id=label2id)
    training_args_for_training = init_training_arguments_for_training()
    metric = evaluate.load("mean_iou")
    trainer = initialize_trainer(model, training_args_for_training, num_classses, metric, train_ds, test_ds)
    trainer.train()

    # Save the trained model, so that it can be used for inference later
    trainer.save_model(models_folder + model_type + '/' + crop)
    
    # Save the log history, so that it can be used for plotting later
    with open(models_folder + model_type + '/' + crop + '/log_history.json', 'w') as file:
        log_history = trainer.state.lo0g_history
        json.dump(log_history, file)

    # Instantiate new trainer for evaluation that will use compute_metrics method
    training_args_for_evaluation = init_training_arguments_for_evaluation()
    eval_trainer = initialize_trainer(trainer.model, training_args_for_evaluation, num_classses, metric, train_ds, test_ds)
    test_metric = eval_trainer.evaluate(test_ds)
    with open(models_folder + model_type + '/' + crop + '/test_metric.json', 'w') as file:
        json.dump(test_metric, file)


In [12]:
def train_model_from_config():
    train_model_of_type_for_crop(model_type, crop)

In [13]:
train_model_from_config()

Training subset number of images: 467
Test subset number of images: 469




4
Number of classes: 4
id2label: {0: 'void', 1: 'soil', 2: 'broad_bean', 3: 'weeds'}
label2id: {'void': 0, 'soil': 1, 'broad_bean': 2, 'weeds': 3}


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b0 and are newly initialized: ['decode_head.linear_fuse.weight', 'decode_head.linear_c.0.proj.weight', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.bias', 'decode_head.batch_norm.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.classifier.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.2.proj.bias', 'decode_head.classifier.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  1%|▏         | 1/78 [00:04<05:12,  4.06s/it]

{'loss': 1.4832, 'learning_rate': 5.923076923076924e-05, 'epoch': 0.01}


  3%|▎         | 2/78 [00:06<04:16,  3.38s/it]

{'loss': 1.4714, 'learning_rate': 5.846153846153846e-05, 'epoch': 0.03}


  4%|▍         | 3/78 [00:09<03:55,  3.14s/it]

{'loss': 1.4469, 'learning_rate': 5.76923076923077e-05, 'epoch': 0.04}


  5%|▌         | 4/78 [00:12<03:44,  3.03s/it]

{'loss': 1.4187, 'learning_rate': 5.692307692307692e-05, 'epoch': 0.05}


  6%|▋         | 5/78 [00:15<03:36,  2.97s/it]

{'loss': 1.4192, 'learning_rate': 5.615384615384616e-05, 'epoch': 0.06}


/opt/conda/conda-bld/pytorch_1682343970094/work/aten/src/ATen/native/cuda/NLLLoss2d.cu:103: nll_loss2d_forward_kernel: block: [10,0,0], thread: [96,0,0] Assertion `t >= 0 && t < n_classes` failed.
/opt/conda/conda-bld/pytorch_1682343970094/work/aten/src/ATen/native/cuda/NLLLoss2d.cu:103: nll_loss2d_forward_kernel: block: [11,0,0], thread: [96,0,0] Assertion `t >= 0 && t < n_classes` failed.
/opt/conda/conda-bld/pytorch_1682343970094/work/aten/src/ATen/native/cuda/NLLLoss2d.cu:103: nll_loss2d_forward_kernel: block: [11,0,0], thread: [608,0,0] Assertion `t >= 0 && t < n_classes` failed.
/opt/conda/conda-bld/pytorch_1682343970094/work/aten/src/ATen/native/cuda/NLLLoss2d.cu:103: nll_loss2d_forward_kernel: block: [11,0,0], thread: [609,0,0] Assertion `t >= 0 && t < n_classes` failed.
/opt/conda/conda-bld/pytorch_1682343970094/work/aten/src/ATen/native/cuda/NLLLoss2d.cu:103: nll_loss2d_forward_kernel: block: [10,0,0], thread: [608,0,0] Assertion `t >= 0 && t < n_classes` failed.
/opt/conda/c

RuntimeError: CUDA error: device-side assert triggered
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
# 210 images
# train_model_of_type_for_crop("multiclass", "broad_bean")
# trainer = train_model_of_type_for_crop("binary", "broad_bean")

# 137 images
# train_model_of_type_for_crop("multiclass", "common_buckwheat")
# train_model_of_type_for_crop("binary", "common_buckwheat")

# 207 images
# train_model_of_type_for_crop("multiclass", "pea")
# train_model_of_type_for_crop("binary", "pea")

# 403 images
# train_model_of_type_for_crop("multiclass", "corn")
# train_model_of_type_for_crop("binary", "corn")

# 303 images
# train_model_of_type_for_crop("multiclass", "soybean")
# train_model_of_type_for_crop("binary", "soybean")

# 135 images
# train_model_of_type_for_crop("multiclass", "sunflower")
# train_model_of_type_for_crop("binary", "sunflower")

# 410 images
# train_model_of_type_for_crop("multiclass", "sugar_beet")
# train_model_of_type_for_crop("binary", "sugar_beet")

In [None]:
# import subprocess
# from typing import NoReturn

# def shutdown_windows() -> NoReturn:
#     subprocess.run(["shutdown", "/s", "/t", "0"])

# shutdown_windows()