# Training Faster-RCNN Model

In [1]:
import sys
sys.path.insert(0,'/workspaces/breast-tumor-detection/Model/FasterRCNN/src/')

In [2]:
from config import DEVICE, NUM_CLASSES, NUM_EPOCHS, OUT_DIR
from config import VISUALIZE_TRANSFORMED_IMAGES
from config import SAVE_PLOTS_EPOCH, SAVE_MODEL_EPOCH
from model_modified import create_model, MyModel
from utils import Averager, show_tranformed_image
from tqdm.auto import tqdm
from dataset import train_loader, valid_loader
import torch
import matplotlib.pyplot as plt
import time

  from .autonotebook import tqdm as notebook_tqdm


Number of training samples: 29
Number of validation samples: 10



In [3]:
# function for running training iterations
def train(train_data_loader, model, optimizer, train_itr, train_loss_list, train_loss_hist):
    """Run Training Iterations"""
    print("Training")
    # initialize tqdm progress bar
    prog_bar = tqdm(train_data_loader, total=len(train_data_loader))

    for data in prog_bar:
        optimizer.zero_grad()
        images, targets = data
        images = [image.to(DEVICE) for image in images]
        targets = [
            {k: v.to(DEVICE, dtype=torch.int64) for k, v in t.items()} for t in targets
        ]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()
        train_loss_list.append(loss_value)

        train_loss_hist.send(loss_value)

        losses.backward(retain_graph=True)
        optimizer.step()

        train_itr += 1

        # update the loss value beside the progress bar for each iteration
        prog_bar.set_description(desc=f"Loss: {loss_value:.4f}")
    return train_loss_list


# function for running validation iterations
def validate(valid_data_loader, model, val_itr, val_loss_list, val_loss_hist):
    """Run Validation Iterations"""
    print("Validating")
    # initialize tqdm progress bar
    prog_bar = tqdm(valid_data_loader, total=len(valid_data_loader))

    for data in prog_bar:
        images, targets = data
        images = [image.to(DEVICE) for image in images]
        targets = [
            {k: v.to(DEVICE, dtype=torch.int64) for k, v in t.items()} for t in targets
        ]
        with torch.no_grad():
            loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()
        val_loss_list.append(loss_value)
        val_loss_hist.send(loss_value)
        val_itr += 1
        # update the loss value beside the progress bar for each iteration
        prog_bar.set_description(desc=f"Loss: {loss_value:.4f}")
    return val_loss_list

### Initialize model and parameters

In [15]:
# initialize the model and move to the computation device
model = MyModel(pretrained=create_model(NUM_CLASSES))
model = model.cuda()
# get the model parameters
params = [p for p in model.parameters() if p.requires_grad]
# define the optimizer
optimizer = torch.optim.SGD(params, lr=0.0001, momentum=0.9, weight_decay=0.0005)
# optimizer = torch.optim.Adam(params, lr = 0.0001)

# initialize the Averager class
train_loss_hist = Averager()
val_loss_hist = Averager()
train_itr = 1
val_itr = 1
# train and validation loss lists to store loss values of all...
# ... iterations till ena and plot graphs for all iterations
train_loss_list = []
val_loss_list = []
logits_list = []

# start the training epochs
for epoch in range(20):
    print(f"\nEPOCH {epoch+1} of {20}")

    # reset the training and validation loss histories for the current epoch
    train_loss_hist.reset()
    val_loss_hist.reset()
    
    # start timer and carry out training and validation
    start = time.time()
    train_loss = train(train_loader, model, optimizer, train_itr, train_loss_list, train_loss_hist)
    logits_list.append(list(next(model.parameters()).detach().cpu()))
    # print(next(model.parameters()).detach().cpu().numpy())
    val_loss = validate(valid_loader, model, val_itr, val_loss_list, val_loss_hist)
    print(f"Epoch #{epoch} train loss: {train_loss_hist.value:.3f}")
    print(f"Epoch #{epoch} validation loss: {val_loss_hist.value:.3f}")
    end = time.time()
    print(f"Took {((end - start) / 60):.3f} minutes for epoch {epoch}")


EPOCH 1 of 20
Training


Loss: 0.6550: 100%|██████████| 29/29 [00:02<00:00,  9.69it/s] 


Validating


Loss: 1.4379: 100%|██████████| 10/10 [00:00<00:00, 16.70it/s]


Epoch #0 train loss: 7.968
Epoch #0 validation loss: 2.169
Took 0.061 minutes for epoch 0

EPOCH 2 of 20
Training


Loss: 0.7715: 100%|██████████| 29/29 [00:02<00:00, 10.33it/s] 


Validating


Loss: 0.7516: 100%|██████████| 10/10 [00:00<00:00, 15.99it/s]


Epoch #1 train loss: 2.749
Epoch #1 validation loss: 0.796
Took 0.059 minutes for epoch 1

EPOCH 3 of 20
Training


Loss: 0.1903: 100%|██████████| 29/29 [00:02<00:00, 10.34it/s]


Validating


Loss: 0.2162: 100%|██████████| 10/10 [00:00<00:00, 16.04it/s]


Epoch #2 train loss: 0.514
Epoch #2 validation loss: 0.194
Took 0.059 minutes for epoch 2

EPOCH 4 of 20
Training


Loss: 0.3037: 100%|██████████| 29/29 [00:02<00:00, 10.30it/s]


Validating


Loss: 0.3916: 100%|██████████| 10/10 [00:00<00:00, 16.15it/s]


Epoch #3 train loss: 0.316
Epoch #3 validation loss: 0.311
Took 0.059 minutes for epoch 3

EPOCH 5 of 20
Training


Loss: 0.1379: 100%|██████████| 29/29 [00:02<00:00, 10.30it/s]


Validating


Loss: 0.2782: 100%|██████████| 10/10 [00:00<00:00, 16.14it/s]


Epoch #4 train loss: 0.435
Epoch #4 validation loss: 0.285
Took 0.059 minutes for epoch 4

EPOCH 6 of 20
Training


Loss: 0.1501: 100%|██████████| 29/29 [00:02<00:00, 10.21it/s]


Validating


Loss: 0.2757: 100%|██████████| 10/10 [00:00<00:00, 15.97it/s]


Epoch #5 train loss: 0.238
Epoch #5 validation loss: 0.231
Took 0.059 minutes for epoch 5

EPOCH 7 of 20
Training


Loss: 0.5976: 100%|██████████| 29/29 [00:02<00:00,  9.91it/s]


Validating


Loss: 0.1150: 100%|██████████| 10/10 [00:00<00:00, 16.05it/s]


Epoch #6 train loss: 0.349
Epoch #6 validation loss: 0.331
Took 0.061 minutes for epoch 6

EPOCH 8 of 20
Training


Loss: 0.7621: 100%|██████████| 29/29 [00:02<00:00, 10.20it/s]


Validating


Loss: 0.9077: 100%|██████████| 10/10 [00:00<00:00, 15.61it/s]


Epoch #7 train loss: 0.830
Epoch #7 validation loss: 0.778
Took 0.060 minutes for epoch 7

EPOCH 9 of 20
Training


Loss: 0.2256: 100%|██████████| 29/29 [00:02<00:00, 10.20it/s]


Validating


Loss: 0.4348: 100%|██████████| 10/10 [00:00<00:00, 16.30it/s]


Epoch #8 train loss: 0.712
Epoch #8 validation loss: 0.832
Took 0.059 minutes for epoch 8

EPOCH 10 of 20
Training


Loss: 0.7393: 100%|██████████| 29/29 [00:02<00:00, 10.16it/s]


Validating


Loss: 0.7567: 100%|██████████| 10/10 [00:00<00:00, 15.76it/s]


Epoch #9 train loss: 0.866
Epoch #9 validation loss: 0.739
Took 0.059 minutes for epoch 9

EPOCH 11 of 20
Training


Loss: 6.0551: 100%|██████████| 29/29 [00:02<00:00, 10.23it/s] 


Validating


Loss: 21.2616: 100%|██████████| 10/10 [00:00<00:00, 15.19it/s]


Epoch #10 train loss: 3.470
Epoch #10 validation loss: 18.466
Took 0.060 minutes for epoch 10

EPOCH 12 of 20
Training


Loss: nan: 100%|██████████| 29/29 [00:02<00:00, 11.03it/s]          


Validating


Loss: nan: 100%|██████████| 10/10 [00:00<00:00, 17.46it/s]


Epoch #11 train loss: nan
Epoch #11 validation loss: nan
Took 0.055 minutes for epoch 11

EPOCH 13 of 20
Training


Loss: nan: 100%|██████████| 29/29 [00:02<00:00, 11.02it/s]


Validating


Loss: nan: 100%|██████████| 10/10 [00:00<00:00, 18.04it/s]


Epoch #12 train loss: nan
Epoch #12 validation loss: nan
Took 0.054 minutes for epoch 12

EPOCH 14 of 20
Training


Loss: nan: 100%|██████████| 29/29 [00:02<00:00, 11.11it/s]


Validating


Loss: nan: 100%|██████████| 10/10 [00:00<00:00, 17.73it/s]


Epoch #13 train loss: nan
Epoch #13 validation loss: nan
Took 0.054 minutes for epoch 13

EPOCH 15 of 20
Training


Loss: nan: 100%|██████████| 29/29 [00:02<00:00, 11.27it/s]


Validating


Loss: nan: 100%|██████████| 10/10 [00:00<00:00, 17.65it/s]


Epoch #14 train loss: nan
Epoch #14 validation loss: nan
Took 0.054 minutes for epoch 14

EPOCH 16 of 20
Training


Loss: nan: 100%|██████████| 29/29 [00:02<00:00, 11.19it/s]


Validating


Loss: nan: 100%|██████████| 10/10 [00:00<00:00, 18.21it/s]


Epoch #15 train loss: nan
Epoch #15 validation loss: nan
Took 0.054 minutes for epoch 15

EPOCH 17 of 20
Training


Loss: nan: 100%|██████████| 29/29 [00:02<00:00, 11.17it/s]


Validating


Loss: nan: 100%|██████████| 10/10 [00:00<00:00, 18.16it/s]


Epoch #16 train loss: nan
Epoch #16 validation loss: nan
Took 0.054 minutes for epoch 16

EPOCH 18 of 20
Training


Loss: nan: 100%|██████████| 29/29 [00:02<00:00, 11.20it/s]


Validating


Loss: nan: 100%|██████████| 10/10 [00:00<00:00, 17.88it/s]


Epoch #17 train loss: nan
Epoch #17 validation loss: nan
Took 0.054 minutes for epoch 17

EPOCH 19 of 20
Training


Loss: nan: 100%|██████████| 29/29 [00:02<00:00, 11.14it/s]


Validating


Loss: nan: 100%|██████████| 10/10 [00:00<00:00, 18.02it/s]


Epoch #18 train loss: nan
Epoch #18 validation loss: nan
Took 0.054 minutes for epoch 18

EPOCH 20 of 20
Training


Loss: nan: 100%|██████████| 29/29 [00:02<00:00, 11.21it/s]


Validating


Loss: nan: 100%|██████████| 10/10 [00:00<00:00, 17.56it/s]


Epoch #19 train loss: nan
Epoch #19 validation loss: nan
Took 0.054 minutes for epoch 19


In [16]:
import numpy as np
logits_array = np.array(logits_list)
logits_array[:,0]

array([0.462992  , 0.46298516, 0.46297824, 0.46297133, 0.46296442,
       0.4629575 , 0.4629506 , 0.46294367, 0.46293676, 0.46292984,
       0.46292293,        nan,        nan,        nan,        nan,
              nan,        nan,        nan,        nan,        nan],
      dtype=float32)