In [1]:
"""use this to define the retrain model pipeline"""

import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET
import numpy as np
import json


import datetime
import time
import wandb
import presets
import torch
import torch.utils.data
import torchvision
import torchvision.models.detection
import torchvision.models.detection.mask_rcnn
import utils
from coco_utils import get_coco, get_coco_kp
from engine import evaluate, train_one_epoch
from group_by_aspect_ratio import create_aspect_ratio_groups, GroupedBatchSampler
from torchvision.transforms import InterpolationMode
from transforms import SimpleCopyPaste

root = os.getcwd()
print('current working dir: ', root)

print('is_available: ', torch.cuda.is_available())
print('device_count: ', torch.cuda.device_count())
print('current_device: ', torch.cuda.current_device())
print('current_device: ', torch.cuda.device(0))
print('get_device_name: ', torch.cuda.get_device_name(0))

current working dir:  c:\Users\endle\Desktop\TSI  -object-detection-pytorch-wandb-coco
is_available:  True
device_count:  1
current_device:  0
current_device:  <torch.cuda.device object at 0x000001DA7A29C160>
get_device_name:  NVIDIA GeForce RTX 3090


In [2]:

def xml_to_df(path):
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            value = (root.find('filename').text,
                     int(root.find('size')[0].text),
                     int(root.find('size')[1].text),
                     member[0].text,
                     int(member[4][0].text),
                     int(member[4][1].text),
                     int(member[4][2].text),
                     int(member[4][3].text)
                     )
            xml_list.append(value)
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    df = pd.DataFrame(xml_list, columns=column_name)
    return df

In [3]:
# upload new class images

new_class_data_path = r'C:\Users\endle\Desktop\tsi-obj-det-dataset\new-data'
old_data_path = r'C:\Users\endle\Desktop\tsi-obj-det-dataset\old-data'
current_data_path = r'C:\Users\endle\Desktop\tsi-obj-det-dataset\current-data'

df_old = xml_to_df(old_data_path)
df_old

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,1001.bmp,256,256,scratch,1,39,17,62
1,1009.bmp,256,256,scratch,1,1,31,81
2,1009.bmp,256,256,scratch,31,170,59,256
3,1009.bmp,256,256,scratch,16,166,29,191
4,1009.bmp,256,256,scratch,1,172,11,192
...,...,...,...,...,...,...,...,...
523,987.bmp,256,256,paint,38,11,50,26
524,987.bmp,256,256,paint,1,8,19,65
525,988.bmp,256,256,paint,15,231,73,256
526,988.bmp,256,256,scratch,220,195,256,256


In [4]:
# for now, new data is .xml, we'll need to mod this during production to be a pure df with same structure
# https://mlhive.com/2022/02/read-and-write-pascal-voc-xml-annotations-in-python

df_new = xml_to_df(new_class_data_path)
df_new

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,0 - scratch.jpg,320,320,scratch,9,7,82,90
1,1 - tape.jpg,320,320,tape,1,1,112,156
2,10 - scratch.jpg,320,320,scratch,9,15,89,81
3,11 - tape.jpg,320,320,tape,1,1,109,136
4,12 - tape.jpg,320,320,tape,1,1,102,135
5,13 - tape.jpg,320,320,tape,1,1,100,134
6,14 - tape.jpg,320,320,tape,1,1,142,106
7,15 - tape.jpg,320,320,tape,1,27,125,144
8,16 - tape.jpg,320,320,tape,1,1,157,106
9,17 - tape.jpg,320,320,tape,1,7,140,129


In [5]:
# combine dataframes

df_current = pd.concat([df_old, df_new])
df_current

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,1001.bmp,256,256,scratch,1,39,17,62
1,1009.bmp,256,256,scratch,1,1,31,81
2,1009.bmp,256,256,scratch,31,170,59,256
3,1009.bmp,256,256,scratch,16,166,29,191
4,1009.bmp,256,256,scratch,1,172,11,192
...,...,...,...,...,...,...,...,...
16,5 - paint.jpg,320,320,paint,16,22,84,74
17,6 - tape.jpg,320,320,tape,1,1,124,127
18,7 - tape.jpg,320,320,tape,1,1,113,132
19,8 - tape.jpg,320,320,tape,1,1,121,145


In [6]:
"""
UPDATED CONVERSION METHOD - STACK .XMLS INTO FILES
"""

coco = dict()
coco['images'] = []
coco['type'] = 'instances'
coco['annotations'] = []
coco['categories'] = []

category_set = dict()
image_set = set()

category_item_id = 0
image_id = 20180000000
annotation_id = 0

def addCatItem(name):
    global category_item_id
    category_item = dict()
    category_item['supercategory'] = 'none'
    category_item_id += 1
    category_item['id'] = category_item_id
    category_item['name'] = name
    coco['categories'].append(category_item)
    category_set[name] = category_item_id
    return category_item_id

def addImgItem(file_name, size):
    global image_id
    if file_name is None:
        raise Exception('Could not find filename tag in xml file.')
    if size['width'] is None:
        raise Exception('Could not find width tag in xml file.')
    if size['height'] is None:
        raise Exception('Could not find height tag in xml file.')
    image_id += 1
    image_item = dict()
    image_item['id'] = image_id
    image_item['file_name'] = file_name
    image_item['width'] = size['width']
    image_item['height'] = size['height']
    coco['images'].append(image_item)
    image_set.add(file_name)
    return image_id

def addAnnoItem(object_name, image_id, category_id, bbox):
    global annotation_id
    annotation_item = dict()
    annotation_item['segmentation'] = []
    seg = []
    #bbox[] is x,y,w,h
    #left_top
    seg.append(bbox[0])
    seg.append(bbox[1])
    #left_bottom
    seg.append(bbox[0])
    seg.append(bbox[1] + bbox[3])
    #right_bottom
    seg.append(bbox[0] + bbox[2])
    seg.append(bbox[1] + bbox[3])
    #right_top
    seg.append(bbox[0] + bbox[2])
    seg.append(bbox[1])

    annotation_item['segmentation'].append(seg)

    annotation_item['area'] = bbox[2] * bbox[3]
    annotation_item['iscrowd'] = 0
    annotation_item['ignore'] = 0
    annotation_item['image_id'] = image_id
    annotation_item['bbox'] = bbox
    annotation_item['category_id'] = category_id
    annotation_id += 1
    annotation_item['id'] = annotation_id
    coco['annotations'].append(annotation_item)

def parseXmlFiles(xml_path): 
    for f in os.listdir(xml_path):
        if not f.endswith('.xml'):
            continue
        
        bndbox = dict()
        size = dict()
        current_image_id = None
        current_category_id = None
        file_name = None
        size['width'] = None
        size['height'] = None
        size['depth'] = None

        xml_file = os.path.join(xml_path, f)
        print(xml_file)

        tree = ET.parse(xml_file)
        root = tree.getroot()
        if root.tag != 'annotation':
            raise Exception('pascal voc xml root element should be annotation, rather than {}'.format(root.tag))

        #elem is <folder>, <filename>, <size>, <object>
        for elem in root:
            current_parent = elem.tag
            current_sub = None
            object_name = None
            
            if elem.tag == 'folder':
                continue
            
            if elem.tag == 'filename':
                file_name = elem.text
                if file_name in category_set:
                    raise Exception('file_name duplicated')
                
            #add img item only after parse <size> tag
            elif current_image_id is None and file_name is not None and size['width'] is not None:
                if file_name not in image_set:
                    current_image_id = addImgItem(file_name, size)
                    print('add image with {} and {}'.format(file_name, size))
                else:
                    raise Exception('duplicated image: {}'.format(file_name)) 
            #subelem is <width>, <height>, <depth>, <name>, <bndbox>
            for subelem in elem:
                bndbox ['xmin'] = None
                bndbox ['xmax'] = None
                bndbox ['ymin'] = None
                bndbox ['ymax'] = None
                
                current_sub = subelem.tag
                if current_parent == 'object' and subelem.tag == 'name':
                    object_name = subelem.text
                    if object_name not in category_set:
                        current_category_id = addCatItem(object_name)
                    else:
                        current_category_id = category_set[object_name]

                elif current_parent == 'size':
                    if size[subelem.tag] is not None:
                        raise Exception('xml structure broken at size tag.')
                    size[subelem.tag] = int(subelem.text)

                #option is <xmin>, <ymin>, <xmax>, <ymax>, when subelem is <bndbox>
                for option in subelem:
                    if current_sub == 'bndbox':
                        if bndbox[option.tag] is not None:
                            raise Exception('xml structure corrupted at bndbox tag.')
                        bndbox[option.tag] = int(option.text)

                #only after parse the <object> tag
                if bndbox['xmin'] is not None:
                    if object_name is None:
                        raise Exception('xml structure broken at bndbox tag')
                    if current_image_id is None:
                        raise Exception('xml structure broken at bndbox tag')
                    if current_category_id is None:
                        raise Exception('xml structure broken at bndbox tag')
                    bbox = []
                    #x
                    bbox.append(bndbox['xmin'])
                    #y
                    bbox.append(bndbox['ymin'])
                    #w
                    bbox.append(bndbox['xmax'] - bndbox['xmin'])
                    #h
                    bbox.append(bndbox['ymax'] - bndbox['ymin'])
                    print('add annotation with {},{},{},{}'.format(object_name, current_image_id, current_category_id, bbox))
                    addAnnoItem(object_name, current_image_id, current_category_id, bbox )





In [7]:
# copy image files over from old and new to current 

import shutil

new_class_data_path = r'C:\Users\endle\Desktop\tsi-obj-det-dataset\new-data'
old_data_path = r'C:\Users\endle\Desktop\tsi-obj-det-dataset\old-data'
current_data_path = r'C:\Users\endle\Desktop\tsi-obj-det-dataset\current-data'

folder_list =[old_data_path, new_class_data_path]

for folder in folder_list:
   files = os.listdir(folder)
   for f in files:
      shutil.copy(folder + "\\" + f, current_data_path+ "\\" + f)


In [8]:
# make train/test split of combined dataset

# msk = np.random.rand(len(df_current)) < 0.8

# train = df_current[msk]
# test = df_current[~msk]

from sklearn.model_selection import train_test_split


train, test = train_test_split(df_current, test_size=0.2, random_state=42, shuffle=True)

train

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
481,723.bmp,256,256,paint,103,57,209,102
158,1814.bmp,256,256,scratch,57,229,81,256
15,1026.bmp,256,256,scratch,15,95,57,228
334,254.bmp,256,256,scratch,1,114,17,144
39,1083.bmp,256,256,pit,39,46,105,99
...,...,...,...,...,...,...,...,...
71,117.bmp,256,256,pit,127,236,183,256
106,152.bmp,256,256,dent,55,168,136,236
270,1935.bmp,256,256,pit,166,129,225,190
435,46.bmp,256,256,scratch,75,135,125,206


In [9]:
test

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
195,1842.bmp,256,256,dent,15,173,174,218
79,1295.bmp,256,256,scratch,1,190,35,224
479,70.bmp,256,256,scratch,1,174,42,256
109,1579.bmp,256,256,scratch,126,72,197,164
473,65.bmp,256,256,scratch,1,4,23,36
...,...,...,...,...,...,...,...,...
14,3 - tape.jpg,320,320,tape,17,1,105,116
140,1791.bmp,256,256,scratch,17,42,49,76
371,288.bmp,256,256,pit,176,189,197,205
18,1036.bmp,256,256,scratch,1,1,47,109


In [10]:

annos = r'C:\Users\endle\Desktop\TSI  -object-detection-pytorch-wandb-coco\retrain\data\annotations'
train_pth = r'C:\Users\endle\Desktop\TSI  -object-detection-pytorch-wandb-coco\retrain\data\train'
val_pth = r'C:\Users\endle\Desktop\TSI  -object-detection-pytorch-wandb-coco\retrain\data\val'


# gotta move xmls and images to train/val folders


# TRAIN

current_data_path = r'C:\Users\endle\Desktop\tsi-obj-det-dataset\current-data'


def move_coco_datafiles(df, current_data_path, new_data_path):
    """use on train test dataframes, move files from current datpath to new file paths"""

    for index, row in df.iterrows():
        f = row['filename']

        name = f.split('.')[0]

        xml_pth = current_data_path + '\\' + name + '.xml'
        img_pth = current_data_path + '\\' + f

        # now move both img and xml to path

        shutil.copy(xml_pth, new_data_path + "\\" + name + '.xml') # xml
        shutil.copy(img_pth, new_data_path + "\\" + f) # img

### now move to new folders

move_coco_datafiles(train, current_data_path, train_pth)
move_coco_datafiles(test, current_data_path, val_pth)





In [11]:
# now make annotations

def build_coco_annot(xml_path, output_path):
    parseXmlFiles(xml_path)
    json.dump(coco, open(output_path, 'w'))

build_coco_annot(train_pth, annos +'\\' + "train.json")

# reset vals!

coco = dict()
coco['images'] = []
coco['type'] = 'instances'
coco['annotations'] = []
coco['categories'] = []

category_set = dict()
image_set = set()

category_item_id = 0
image_id = 20180000000
annotation_id = 0

build_coco_annot(val_pth, annos +'\\' + "val.json")



C:\Users\endle\Desktop\TSI  -object-detection-pytorch-wandb-coco\retrain\data\train\0 - scratch.xml
add image with 0 - scratch.jpg and {'width': 320, 'height': 320, 'depth': 3}
add annotation with scratch,20180000001,1,[9, 7, 73, 83]
C:\Users\endle\Desktop\TSI  -object-detection-pytorch-wandb-coco\retrain\data\train\1 - tape.xml
add image with 1 - tape.jpg and {'width': 320, 'height': 320, 'depth': 3}
add annotation with tape,20180000002,2,[1, 1, 111, 155]
C:\Users\endle\Desktop\TSI  -object-detection-pytorch-wandb-coco\retrain\data\train\10 - scratch.xml
add image with 10 - scratch.jpg and {'width': 320, 'height': 320, 'depth': 3}
add annotation with scratch,20180000003,1,[9, 15, 80, 66]
C:\Users\endle\Desktop\TSI  -object-detection-pytorch-wandb-coco\retrain\data\train\1009.xml
add image with 1009.bmp and {'width': 256, 'height': 256, 'depth': 3}
add annotation with scratch,20180000004,1,[1, 1, 30, 80]
add annotation with scratch,20180000004,1,[31, 170, 28, 86]
add annotation with sc

In [16]:
# now retrain model with new dataset 

project_prompt = "tsi-object-detection-retrain-may11"
new_dataset_path = r'C:\Users\endle\Desktop\TSI  -object-detection-pytorch-wandb-coco\retrain\data'
output_dir_path = r'C:\Users\endle\Desktop\TSI  -object-detection-pytorch-wandb-coco\retrain\outputdir'
#data_dir_path = r"C:\Users\endle\Desktop\TSI  -object-detection-pytorch-wandb-coco\data"
data_dir_path = new_dataset_path
dataset_type = 'coco'
model = "retinanet_resnet50_fpn"
device_type = "cuda"
batch_size = 4 #8
epochs = 30
workers = 1
optimizer = "adamw"
norm_weight_decay = 0.5 # 0.9 IS EXCELLENT
momentum = 0.9
lr = 0.0000500  # balmy energy, 0.0000500 is excellent
#lr = 0.00001
weight_decay = 1e-4 # 1e-4 was good
lr_step_size = 8
data_agumentation = 'hflip'
dataset_type = "coco"

# start a new wandb run to track this script
wandb.init(
    # set the wandb project where this run will be logged
    project= project_prompt,
    
    # track hyperparameters and run metadata
    config={
    "learning_rate": lr,
    "architecture": model,
    "dataset": dataset_type,
    "epochs": epochs,
    }
)

def copypaste_collate_fn(batch):
    copypaste = SimpleCopyPaste(blending=True, resize_interpolation=InterpolationMode.BILINEAR)
    return copypaste(*utils.collate_fn(batch))


def get_dataset(name, image_set, transform, data_path):
    paths = {"coco": (data_path, get_coco, 91), "coco_kp": (data_path, get_coco_kp, 2)}
    p, ds_fn, num_classes = paths[name]

    ds = ds_fn(p, image_set=image_set, transforms=transform)
    return ds, num_classes


def get_transform(train, args):
    if train:
        return presets.DetectionPresetTrain(data_augmentation=args.data_augmentation)
    elif args.weights and args.test_only:
        weights = torchvision.models.get_weight(args.weights)
        trans = weights.transforms()
        return lambda img, target: (trans(img), target)
    else:
        return presets.DetectionPresetEval()


def get_args_parser(add_help=True):

    import argparse

    parser = argparse.ArgumentParser(description="PyTorch Detection Training", add_help=add_help)

    parser.add_argument("--data-path", default=data_dir_path, type=str, help="dataset path")
    parser.add_argument("--dataset", default=dataset_type, type=str, help="dataset name")
    parser.add_argument("--model", default=model, type=str, help="model name")
    parser.add_argument("--device", default=device_type, type=str, help="device (Use cuda or cpu Default: cuda)")
    parser.add_argument(
        "-b", "--batch-size", default=batch_size, type=int, help="images per gpu, the total batch size is $NGPU x batch_size"
    )
    parser.add_argument("--epochs", default=epochs, type=int, metavar="N", help="number of total epochs to run")
    parser.add_argument(
        "-j", "--workers", default=workers, type=int, metavar="N", help="number of data loading workers (default: 4)"
    )
    parser.add_argument("--opt", default=optimizer, type=str, help="optimizer")
    parser.add_argument(
        "--lr",
        default=lr,
        type=float,
        help="initial learning rate, 0.02 is the default value for training on 8 gpus and 2 images_per_gpu",
    )
    parser.add_argument("--momentum", default=momentum, type=float, metavar="M", help="momentum")
    parser.add_argument(
        "--wd",
        "--weight-decay",
        default=weight_decay,
        type=float,
        metavar="W",
        help="weight decay (default: 1e-4)",
        dest="weight_decay",
    )
    parser.add_argument(
        "--norm-weight-decay",
        default=norm_weight_decay,
        type=float,
        help="weight decay for Normalization layers (default: None, same value as --wd)",
    )
    parser.add_argument(
        "--lr-scheduler", default="multisteplr", type=str, help="name of lr scheduler (default: multisteplr)"
    )
    parser.add_argument(
        "--lr-step-size", default=lr_step_size, type=int, help="decrease lr every step-size epochs (multisteplr scheduler only)"
    )
    parser.add_argument(
        "--lr-steps",
        default=[16, 22],
        nargs="+",
        type=int,
        help="decrease lr every step-size epochs (multisteplr scheduler only)",
    )
    parser.add_argument(
        "--lr-gamma", default=0.1, type=float, help="decrease lr by a factor of lr-gamma (multisteplr scheduler only)"
    )
    parser.add_argument("--print-freq", default=20, type=int, help="print frequency")
    parser.add_argument("--output_dir", default=output_dir_path, type=str, help="path to save outputs")
    parser.add_argument("--resume", default=output_dir_path, type=str, help="path of checkpoint")
    parser.add_argument("--start_epoch", default=0, type=int, help="start epoch")
    parser.add_argument("--aspect-ratio-group-factor", default=3, type=int)
    parser.add_argument("--rpn-score-thresh", default=None, type=float, help="rpn score threshold for faster-rcnn")
    parser.add_argument(
        "--trainable-backbone-layers", default=None, type=int, help="number of trainable layers of backbone"
    )
    parser.add_argument(
        "--data-augmentation", default=data_agumentation, type=str, help="data augmentation policy (default: hflip)"
    )

    parser.add_argument(
        "--sync-bn",
        dest="sync_bn",
        help="Use sync batch norm",
        action="store_true",
    )
    parser.add_argument(
        "--test-only",
        dest="test_only",
        help="Only test the model",
        action="store_true",
    )

    parser.add_argument(
        "--use-deterministic-algorithms", action="store_true", help="Forces the use of deterministic algorithms only."
    )

    # distributed training parameters
    parser.add_argument("--world-size", default=1, type=int, help="number of distributed processes")
    parser.add_argument("--dist-url", default="env://", type=str, help="url used to set up distributed training")
    parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load")
    parser.add_argument("--weights-backbone", default='ResNet50_Weights.IMAGENET1K_V1', type=str, help="the backbone weights enum name to load")

    # Mixed precision training parameters
    parser.add_argument("--amp", default=True, action="store_true", help="Use torch.cuda.amp for mixed precision training")

    # Use CopyPaste augmentation training parameter
    parser.add_argument(
        "--use-copypaste",
        action="store_true",
        help="Use CopyPaste data augmentation. Works only with data-augmentation='lsj'.",
    )

    return parser

def main(output_dir_path, args):

    #if args.output_dir:
    #output_dir_path = r'C:\Users\endle\Desktop\sbl-object-detect-may5\output_dir'

    utils.mkdir(output_dir_path)

    utils.init_distributed_mode(args)
    print('what is in args: ,', args)

    device = torch.device(args.device)

    #if args.use_deterministic_algorithms:
    
    torch.use_deterministic_algorithms(True)

    # Data loading code
    print("Loading data")

    print('what is args dataset:', args.dataset)

    dataset, num_classes = get_dataset(args.dataset, "train", get_transform(True, args), args.data_path)

    print('current number of classes: ', num_classes)
    dataset_test, _ = get_dataset(args.dataset, "val", get_transform(False, args), args.data_path)

    print("Creating data loaders")
    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
        test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test, shuffle=False)
    else:
        train_sampler = torch.utils.data.RandomSampler(dataset)
        test_sampler = torch.utils.data.SequentialSampler(dataset_test)

    if args.aspect_ratio_group_factor >= 0:
        group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor)
        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)
    else:
        train_batch_sampler = torch.utils.data.BatchSampler(train_sampler, args.batch_size, drop_last=True)

    train_collate_fn = utils.collate_fn
    if args.use_copypaste:
        if args.data_augmentation != "lsj":
            raise RuntimeError("SimpleCopyPaste algorithm currently only supports the 'lsj' data augmentation policies")

        train_collate_fn = copypaste_collate_fn

    data_loader = torch.utils.data.DataLoader(
        dataset, batch_sampler=train_batch_sampler, num_workers=args.workers, collate_fn=train_collate_fn
    )

    data_loader_test = torch.utils.data.DataLoader(
        dataset_test, batch_size=1, sampler=test_sampler, num_workers=args.workers, collate_fn=utils.collate_fn
    )

    print("Creating model")
    kwargs = {"trainable_backbone_layers": args.trainable_backbone_layers}
    if args.data_augmentation in ["multiscale", "lsj"]:
        kwargs["_skip_resize"] = True
    if "rcnn" in args.model:
        if args.rpn_score_thresh is not None:
            kwargs["rpn_score_thresh"] = args.rpn_score_thresh
    # model = torchvision.models.get_model(
    #     args.model, weights=args.weights, weights_backbone=args.weights_backbone, num_classes=num_classes, **kwargs
    # )

    model = torchvision.models.detection.retinanet_resnet50_fpn(pretrained=True, num_classes=num_classes, **kwargs)

    model.to(device)
    if args.distributed and args.sync_bn:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        model_without_ddp = model.module

    if args.norm_weight_decay is None:
        parameters = [p for p in model.parameters() if p.requires_grad]
    else:
        param_groups = torchvision.ops._utils.split_normalization_params(model)
        wd_groups = [args.norm_weight_decay, args.weight_decay]
        parameters = [{"params": p, "weight_decay": w} for p, w in zip(param_groups, wd_groups) if p]

    opt_name = args.opt.lower()
    if opt_name.startswith("sgd"):
        optimizer = torch.optim.SGD(
            parameters,
            lr=args.lr,
            momentum=args.momentum,
            weight_decay=args.weight_decay,
            nesterov="nesterov" in opt_name,
        )
    elif opt_name == "adamw":
        optimizer = torch.optim.AdamW(parameters, lr=args.lr, weight_decay=args.weight_decay)
    else:
        raise RuntimeError(f"Invalid optimizer {args.opt}. Only SGD and AdamW are supported.")

    scaler = torch.cuda.amp.GradScaler() if args.amp else None

    args.lr_scheduler = args.lr_scheduler.lower()
    if args.lr_scheduler == "multisteplr":
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)
    elif args.lr_scheduler == "cosineannealinglr":
        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs)
    else:
        raise RuntimeError(
            f"Invalid lr scheduler '{args.lr_scheduler}'. Only MultiStepLR and CosineAnnealingLR are supported."
        )
    
    print('what is argsresume: ', args.resume)

    # if args.resume:
    #     checkpoint = torch.load(args.resume, map_location="cpu")
    #     model_without_ddp.load_state_dict(checkpoint["model"])
    #     optimizer.load_state_dict(checkpoint["optimizer"])
    #     lr_scheduler.load_state_dict(checkpoint["lr_scheduler"])
    #     args.start_epoch = checkpoint["epoch"] + 1
    #     if args.amp:
    #         scaler.load_state_dict(checkpoint["scaler"])

    if args.test_only:
        torch.backends.cudnn.deterministic = True
        evaluate(model, data_loader_test, device=device)
        return

    print("Start training")
    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)

        #print('look at data_loader: ', data_loader) # <torch.utils.data.dataloader.DataLoader object at 0x00000188DE551940>
        #print('len of dataloader: ', len(data_loader))

        x = train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq, scaler)
        
        print('metric logger avg stats: ', x) # metric logger avg stats:  lr: 0.000500  loss: 1.1949 (1.6263)  classification: 0.6939 (1.0290)  bbox_regression: 0.4993 (0.5973)

        print(x.meters.get('loss')) # 0.9397 (0.9770)

        lr_scheduler.step()
        if args.output_dir:
            checkpoint = {
                "model": model_without_ddp.state_dict(),
                "optimizer": optimizer.state_dict(),
                "lr_scheduler": lr_scheduler.state_dict(),
                "args": args,
                "epoch": epoch,
            }
            if args.amp:
                checkpoint["scaler"] = scaler.state_dict()
            utils.save_on_master(checkpoint, os.path.join(args.output_dir, f"model_{epoch}.pth"))
            utils.save_on_master(checkpoint, os.path.join(args.output_dir, "checkpoint.pth"))

        # evaluate after every epoch
        c = evaluate(model, data_loader_test, device=device)

        for iou_type, coco_eval in c.coco_eval.items():
            wandb.log({"AP/IoU/0.50-0.95/all/100": coco_eval.stats[0], 'epoch' : epoch})
            wandb.log({"AP/IoU/0.50/all/100": coco_eval.stats[1], 'epoch' : epoch})
            wandb.log({"AP/IoU/0.75/all/100": coco_eval.stats[2], 'epoch' : epoch})
            wandb.log({"AP/IoU/0.50-0.95/small/100": coco_eval.stats[3], 'epoch' : epoch})
            wandb.log({"AP/IoU/0.50-0.95/medium/100": coco_eval.stats[4], 'epoch' : epoch})
            wandb.log({"AP/IoU/0.50-0.95/large/100": coco_eval.stats[5], 'epoch' : epoch})
            wandb.log({"AR/IoU/0.50-0.95/all/1": coco_eval.stats[6], 'epoch' : epoch})
            wandb.log({"AR/IoU/0.50-0.95/all/10": coco_eval.stats[7], 'epoch' : epoch})
            wandb.log({"AR/IoU/0.50-0.95/all/100": coco_eval.stats[8], 'epoch' : epoch})
            wandb.log({"AR/IoU/0.50-0.95/small/100": coco_eval.stats[9], 'epoch' : epoch})
            wandb.log({"AR/IoU/0.50-0.95/medium/100": coco_eval.stats[10], 'epoch' : epoch})
            wandb.log({"AR/IoU/0.50-0.95/large/100": coco_eval.stats[11], 'epoch' : epoch})

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print(f"Training time {total_time_str}")

# run it

yo = ['--weights-backbone', 'ResNet50_Weights.IMAGENET1K_V1']
args = get_args_parser().parse_args(yo)
print(args)
main(output_dir_path, args)

0,1
AP/IoU/0.50-0.95/all/100,▁▄▅▅▅▅▆▇▇▇▇▇▇▇█▇██████████████
AP/IoU/0.50-0.95/large/100,▁█▆▇▄▇▆▇█████▇█▇████▇███████▇▇
AP/IoU/0.50-0.95/medium/100,▁▄▅▄▅▅▅▇▆▇▆▆▇▇█▇▇█████████████
AP/IoU/0.50-0.95/small/100,▁▃▅▅▅▆▆▇▇████▇████████████████
AP/IoU/0.50/all/100,▁▇███▇█▇▇▇▆▆▇█▇▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆
AP/IoU/0.75/all/100,▁▃▄▅▅▆▇▇▇▇▇▇█▇████████████████
AR/IoU/0.50-0.95/all/1,▁▆█▂▄▄▅█▅▇▅▅▆▆▇▅▇▇▇▇▇▇▇▇▇▇▇▇▆▆
AR/IoU/0.50-0.95/all/10,▄▇█▆▆▅▁▃▂▃▁▁▂▃▄▁▂▂▃▃▃▃▃▃▃▃▃▃▃▃
AR/IoU/0.50-0.95/all/100,▆▇█▆▆▅▁▃▂▃▁▁▂▃▄▁▂▂▃▃▃▃▃▃▃▃▃▃▃▃
AR/IoU/0.50-0.95/large/100,█▃▂▂▂▂▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
AP/IoU/0.50-0.95/all/100,0.32363
AP/IoU/0.50-0.95/large/100,0.52332
AP/IoU/0.50-0.95/medium/100,0.40276
AP/IoU/0.50-0.95/small/100,0.40443
AP/IoU/0.50/all/100,0.40917
AP/IoU/0.75/all/100,0.36526
AR/IoU/0.50-0.95/all/1,0.4184
AR/IoU/0.50-0.95/all/10,0.52294
AR/IoU/0.50-0.95/all/100,0.52294
AR/IoU/0.50-0.95/large/100,0.53333


Namespace(amp=True, aspect_ratio_group_factor=3, batch_size=4, data_augmentation='hflip', data_path='C:\\Users\\endle\\Desktop\\TSI  -object-detection-pytorch-wandb-coco\\retrain\\data', dataset='coco', device='cuda', dist_url='env://', epochs=30, lr=5e-05, lr_gamma=0.1, lr_scheduler='multisteplr', lr_step_size=8, lr_steps=[16, 22], model='retinanet_resnet50_fpn', momentum=0.9, norm_weight_decay=0.5, opt='adamw', output_dir='C:\\Users\\endle\\Desktop\\TSI  -object-detection-pytorch-wandb-coco\\retrain\\outputdir', print_freq=20, resume='C:\\Users\\endle\\Desktop\\TSI  -object-detection-pytorch-wandb-coco\\retrain\\outputdir', rpn_score_thresh=None, start_epoch=0, sync_bn=False, test_only=False, trainable_backbone_layers=None, use_copypaste=False, use_deterministic_algorithms=False, weight_decay=0.0001, weights=None, weights_backbone='ResNet50_Weights.IMAGENET1K_V1', workers=1, world_size=1)
Not using distributed mode
what is in args: , Namespace(amp=True, aspect_ratio_group_factor=3, b



what is argsresume:  C:\Users\endle\Desktop\TSI  -object-detection-pytorch-wandb-coco\retrain\outputdir
Start training
Epoch: [0]  [ 0/65]  eta: 0:02:41  lr: 0.000001  loss: 2.0851 (2.0851)  classification: 1.6451 (1.6451)  bbox_regression: 0.4400 (0.4400)  time: 2.4775  data: 2.0307  max mem: 7488
Epoch: [0]  [20/65]  eta: 0:00:24  lr: 0.000016  loss: 1.4665 (1.5806)  classification: 1.0555 (1.1321)  bbox_regression: 0.4290 (0.4485)  time: 0.4420  data: 0.0022  max mem: 7488
Epoch: [0]  [40/65]  eta: 0:00:11  lr: 0.000032  loss: 0.8333 (1.2432)  classification: 0.5051 (0.8323)  bbox_regression: 0.3739 (0.4109)  time: 0.3996  data: 0.0024  max mem: 7488
Epoch: [0]  [60/65]  eta: 0:00:02  lr: 0.000048  loss: 0.6759 (1.0620)  classification: 0.3231 (0.6759)  bbox_regression: 0.3291 (0.3861)  time: 0.3794  data: 0.0020  max mem: 7488
Epoch: [0]  [64/65]  eta: 0:00:00  lr: 0.000050  loss: 0.6452 (1.0336)  classification: 0.3062 (0.6540)  bbox_regression: 0.3194 (0.3797)  time: 0.3909  data

In [27]:
## evaluate performance of new object detector 

# add some new class images to holdout ds
# df_new
#new_class_data_path
old_holdout_ds_path = r'C:\Users\endle\Desktop\TSI  -object-detection-pytorch-wandb-coco\holdout_ds'
new_holdout_ds_path = r'C:\Users\endle\Desktop\TSI  -object-detection-pytorch-wandb-coco\retrain\new_holdoutds'


i = 0

for f in os.listdir(new_class_data_path):
     if f.endswith('.jpg'):
        splitmyname = f.split(' - ')[-1]
        label = splitmyname.split('.')[0]

        if label == 'tape':
            print(f)

            # copy new 

            old_pth = new_class_data_path + '\\' + f
            new_pth = new_holdout_ds_path + '\\' + f
            new_rname = new_holdout_ds_path + '\\' + 'test_img_' + label + str(i) + '.jpg'

            shutil.copy(old_pth, new_pth) 
            os.rename(new_pth, new_rname) #rename

            # send file to holdout as new class image.

        

for f in os.listdir(old_holdout_ds_path):
        if f.endswith('.jpg'):
            old_pth = old_holdout_ds_path + '\\' + f
            new_pth = new_holdout_ds_path + '\\' + f
            shutil.copy(old_pth, new_pth)


1 - tape.jpg
11 - tape.jpg


FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'C:\\Users\\endle\\Desktop\\TSI  -object-detection-pytorch-wandb-coco\\retrain\\new_holdoutds\\11 - tape.jpg' -> 'C:\\Users\\endle\\Desktop\\TSI  -object-detection-pytorch-wandb-coco\\retrain\\new_holdoutds\\test_img_tape0.jpg'