# Training data preparation

## Download datasets

In [None]:
"""
%%sh
cat download_data.sh
./download_data.sh
"""

## Patch the datasets

In [2]:
from deep_learning_lab.data_preparation import Orchestrator, DataStructure

Orchestrator.DATASETS.keys()

dict_keys(['reid', 'fcr', 'bcs_a', 'bcs_b', 'bcs_e', 'bcs_h', 'bcs_n', 'bcs_s', 'bcs_u', 'bcs_un', 'bcc_a', 'bcc_b', 'bcc_bh', 'bcc_e', 'bcc_h', 'bcc_n', 'bcc_s', 'bcc_u', 'bcc_un'])

In [None]:
sets_labels = [['ImageRegion'], ['TextLine'], ['TextRegion']] # Atomic labels are to be promoted

# Create an instance of Orchestrator and specify the output directory structure
orc = Orchestrator(
    output_structure= DataStructure(dir_data= "training_data",
                                    dir_images= "images",
                                    dir_labels= "labels")
)

# Ingest any datasets and add default settings
orc.ingestDatasets(
    datasets= [],
    add_defaults= True
)

# Loop through each set of labels
for set_labels in sets_labels:
    # Ingest the labels and prompt for additional labels if needed
    orc.ingestLabels(
        uniform_set_labels= set_labels,
        prompt= False
    )
    
    # Validate the labels automatically and suppress verbose output
    orc.validate(
        auto_yes= True,
        verbose= 0
    )
    
    # Preprocess the data by resizing images and labels
    orc.preprocess(
        resize= (841, 1188), # To have 1e6 pixels and tensors of same size
        overwrite= False,
        verbose= 2
    )

    print()

# Deep learning lab

In [4]:
import deep_learning_lab.gpu_setup as gpu

# Select a CUDA device to use for computation
gpu.cudaDeviceSelection(preselected_device= 0)

# Print information about the selected CUDA device
print(gpu.cudaInfo())

Torch (1.12.1)
CUDA (11.3)
GPU (RTX A6000)
CUDA memory (48.69 GB)


## Training

In [5]:
labels = ['TextLine']

In [6]:
from deep_learning_lab import model

trainer = model.Trainer(
    labels, # a list of labels for the training data
    workdir= "results", # the directory where results will be saved
    input_dir= "training_data", # the directory where the training data is stored
    train_ratio= 0.80, # the ratio of data to be used for training
    val_ratio= 0.10 # the ratio of data to be used for validation
)

In [7]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

tensorboard_dir = trainer.tensorboard_dir
!echo $tensorboard_dir
#!rm -r $tensorboard_dir & mkdir -p $tensorboard_dir
%tensorboard --logdir $tensorboard_dir

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
results/TextLine/tensorboard/log


Reusing TensorBoard on port 6011 (pid 13409), started 0:01:21 ago. (Use '!kill 13409' to kill it.)

In [None]:
trainer.train(
    batch_size= 4, # the number of samples to use in each batch during training
    epochs= 100, # the number of epochs to train the model for
    learning_rate= 1e-4, # the rate at which the model adjusts its weights during training
    gamma_exp_lr= 0.9995, # the decay rate for the learning rate during training
    evaluate_every_epoch= 5, # how often to evaluate the model on the validation set during training
    val_patience= 4, # how many epochs to wait for improvement in validation loss before early stopping
    repeat_dataset= 4, # how many times to repeat the training data during each epoch
    output_size= 1e6 # the size of the images
)

{'color_labels': {'label_json_file': 'results/TextLine/training_data/classfile.json'}, 'train_dataset': {'type': 'image_csv', 'csv_filename': 'results/TextLine/training_data/train.csv', 'base_dir': 'results/TextLine/training_data', 'repeat_dataset': 4, 'compose': {'transforms': [{'type': 'fixed_size_resize', 'output_size': 1000000.0}]}}, 'val_dataset': {'type': 'image_csv', 'csv_filename': 'results/TextLine/training_data/val.csv', 'base_dir': 'results/TextLine/training_data', 'compose': {'transforms': [{'type': 'fixed_size_resize', 'output_size': 1000000.0}]}}, 'model': {'encoder': 'resnet50', 'decoder': {'decoder_channels': [512, 256, 128, 64, 32], 'max_channels': 512}}, 'metrics': [['miou', 'iou'], ['iou', {'type': 'iou', 'average': None}], 'precision'], 'optimizer': {'lr': 0.0001}, 'lr_scheduler': {'type': 'exponential', 'gamma': 0.9995}, 'val_metric': '+miou', 'early_stopping': {'patience': 4}, 'model_out_dir': 'results/TextLine/model', 'num_epochs': 100, 'evaluate_every_epoch': 5,

epoch 0: loss=???:   0%|          | 0/100 [00:00<?, ?it/s]

iter=0: loss=???: 0it [00:00, ?it/s]

## Inference

In [None]:
labels = ['TextLine']

In [None]:
from deep_learning_lab import model
import os

# Create an instance of Predictor
predictor = model.Predictor(
    labels, # a list of labels for the predicted classes
    input_dir= 'inference_data', # the directory containing the input images to predict on
    output_dir= None, # the directory to save the predicted images to (if None, saves to 'predictions')
    output_size= None, # the size of the predicted images (if None, uses input image size)
    from_csv= os.path.join('training_data', 'test.csv'), # is the path to a CSV file with input image paths and labels
    reset_input= True # whether to reset the input before starting the predictor
)

predictor.start(
    batch_size= 4, # the number of images to process at once
    drawRegions= True, # whether to draw regions on the images
    cutVignettes= True, # whether to cut out vignettes from the images
    bounding_box= False, # whether to use a bounding box or a polygon around the predicted regions
    verbose= True # whether to print status messages
)

## Tests

In [None]:
# from matplotlib import pyplot as plt
from PIL import Image
import matplotlib.pyplot as plt

assert len(results)

In [None]:
results[0].keys()

In [None]:
image_nb = 0
predictions = results[image_nb]

In [None]:
# Show the regions found on the original image
Image.fromarray(predictions['regions'])

In [None]:
# Show the probability map of the class 1
Image.fromarray(predictions['probasMaps'][1])

In [None]:
# Show the regions found
for vignette in predictions['vignettes']:
    plt.imshow(Image.fromarray(vignette))
    plt.show()