In [None]:
import pandas as pd
import glob
import json
import matplotlib.pyplot as plt
import os
import re
import seaborn as sns

In [None]:
resultdir = "/data/mtmoore/school/CSiML_AI395T/final_project/models/detectron2/"
#faster_rcnn_R_101_FPN_3x_combined_6batch_512RoI_0
trainre = re.compile(r'^.+\/detectron2\/(?P<modelname>.+)_(?P<dataset>[^_]+)_(?P<batch>\d+)batch_(?P<roinum>\d+)RoI_(?P<testnum>\d+)')


model_parent = "detectron2"
train_df = pd.DataFrame()

# use _0 to avoid log files
for trainresult in glob.glob(f"{resultdir}/*_0"):
    match = trainre.match(trainresult)
    if match is None:
        print(F"didn't match directory name {trainresult}")
        continue
    params = match.groupdict()
    new_result = params.copy()

    if not os.path.exists(f"{trainresult}/metrics.json"):
        print(f"{trainresult}/metrics.json doesn't exist right now, skipping")
        continue

    df = pd.read_json(f"{trainresult}/metrics.json", lines=True)
    df['modelname'] = params['modelname']
    df['dataset'] = params['dataset']
    
#    df.dropna(subset=['train/box_loss', 'train/cls_loss', 'train/dfl_loss'], inplace=True)
    train_df = pd.concat([train_df, df])
display(train_df)
print(train_df.dtypes)


In [None]:
#pd.set_option('display.max_rows', None)
pd.reset_option('^display.', silent=True)

def rename_model( m ):
    if m == "faster_rcnn_R_101_FPN_3x":
        return "Faster-RCNN ResNet-101 with FPN"
    elif m == "faster_rcnn_R_50_FPN_3x":
        return "Faster-RCNN ResNet-50 with FPN"
    else:
        return "Unknown"

        
train_df.rename(columns={ 
                         'modelname': 'Model Name',
                         'dataset': 'Dataset',
                         'iteration': 'Epoch',
                         'time': 'Duration',
                         'loss_box_reg': 'Box Regression',
                         'loss_cls': 'Classification',
                         'loss_rpn_cls': 'RPN Classification',
                         'loss_rpn_loc': 'RPN Localization',
                        },
                        inplace=True)
train_df['Model Name'] = train_df['Model Name'].apply(rename_model)

# Define the custom sort order
model_order = ['Faster-RCNN ResNet-50 with FPN', 'Faster-RCNN ResNet-101 with FPN']


train_df['Model Name'] = pd.Categorical(train_df['Model Name'], categories=model_order, ordered=True)

train_df.sort_values(['Model Name', 'Dataset', 'Epoch'], inplace=True)

#detectron 2 has time/Duration reported

#mask_list = []
#mask_list.append(train_df['Camera'] == "IP8M-H-SW") 
#mask_list.append(train_df['Model Name'] == "yolov11m") 
#for s in [ 3840, ]:
#    mask_list.append(train_df['Image Size (px)'] == s) 
#    #display(train_df[ join_mask(mask_list) ])
#    #print(f"{s}: {len( train_df[ join_mask(mask_list) ] )}" )
#    mask_list.pop()



plot_metrics = ['Box Regression','Classification', 'RPN Classification', 'RPN Localization']

def join_mask( mask_list ):
    full_mask = None
    for m in mask_list:
        if full_mask is None:
            full_mask = m
        else:
            full_mask = full_mask & m
    return full_mask

# Plot training Loss

In [None]:
# iterate over cameras
#mask_list = []
#for camera in train_df['Camera'].unique().tolist():
#    mask_list.append(train_df['Camera'] == camera) 
#    
#    # iterate over model
#    for model in train_df[join_mask(mask_list)]['Model Name'].unique().tolist(): 
#        mask_list.append((train_df['Model Name'] == model))
#
#        current_df = train_df[ join_mask( mask_list ) ]
#        
#        #display(current_df)
#        for metric in plot_metrics:
#            #print(f"{camera}, {model}, {metric}")
#            #fig = plt.figure()
#            #axs = sns.lineplot(data=current_df, x='Epoch', y=metric, hue='Image Size (px)', palette='colorblind' )
#            #axs.set_title(f"Training Loss {metric} for {model} (camera {camera})") 
#            #if metric == "Classification":
#            #    axs.set_ylim([0,10])
#            #plt.show()
#            #plt.close()
#            pass
#        mask_list.pop()
#    mask_list.pop()

In [None]:
for model in train_df['Model Name'].unique().tolist():
        mask_list = []
        mask_list.append( train_df['Model Name'] == model )
        mask_list.append( train_df['Epoch'] < 150 )
        datasets = train_df[ join_mask(mask_list) ]['Dataset'].unique().tolist()
        if len(datasets) == 1:
            continue
            
        fig, axs = plt.matplotlib.pyplot.subplots(nrows=len(datasets), ncols=1, sharex=True, sharey=False, figsize=(6,8))
        for i, dataset in enumerate(datasets):
            datasetname = dataset if dataset[0].isupper() else dataset.capitalize()
            mask_list.append((train_df['Dataset'] == dataset))
            current_df = train_df[ join_mask( mask_list ) ]
        
            melt_df = pd.melt(current_df, id_vars=['Epoch'], value_vars=plot_metrics, var_name='Loss', value_name='Loss Value')
            #display(melt_df)
            axs[i] = sns.lineplot(data=melt_df, x='Epoch', y='Loss Value', palette='colorblind', hue='Loss', ax=axs[i])
            axs[i].set_title(f"{datasetname} Dataset") 
        
            mask_list.pop()
           #axs[i].set_ylim([0,1])
        
        fig.suptitle(f"{model} Training Loss over First 150 Epochs")
        plt.savefig(os.path.join("/data/mtmoore/school/CSiML_AI395T/final_project/plots", f"training_losses_camera_model_detectron2_{model.replace(' ', '_')}_less_than_150epochs.png"))


# Plot Epoch Time

In [None]:
epoch_duration_mean_df = train_df.groupby(by=["Model Name", "Dataset"], observed=False).agg( Mean_Duration=('Duration', 'mean')  ).reset_index()

epoch_duration_mean_df.rename(columns={'Mean_Duration': "Mean Duration"}, inplace=True)


#models = epoch_duration_mean_df['Model Name'].unique().tolist()
#fig, axs = plt.matplotlib.pyplot.subplots(nrows=len(models), ncols=1, sharex=True, sharey=True, figsize=(5,10))
#for i, model in enumerate(models):
#    model_name = model if model[0].isupper() else model.capitalize()
    
#    current_df = epoch_duration_mean_df[ epoch_duration_mean_df['Model Name'] == model ]
#    display(current_df)
fig = plt.figure()
ax = sns.barplot(data=epoch_duration_mean_df, x="Model Name", y="Mean Duration", hue='Dataset', palette='colorblind',)
ax.set_ylim([0,1])
ax.set_title(f"Detectron2 Training Time Per Epoch", fontsize=13)
ax.set_ylabel("Avg Epoch Duration (sec)")
#plt.show()
plt.savefig(os.path.join("/data/mtmoore/school/CSiML_AI395T/final_project/plots", f"detectron2_epoch_training_time.png"))

#for camera in train_df['Camera'].unique().tolist():
#    mask_list.append(train_df['Camera'] == camera) 
    
#    # iterate over model
#    for model in train_df[join_mask(mask_list)]['Model Name'].unique().tolist(): 
#        mask_list.append( train_df['Model Name'] == model )
        
#        # iterate over model
#        for imgsize in train_df[join_mask(mask_list)]['Image Size (px)'].unique().tolist(): 
#            mask_list.append( train_df['Image Size (px)'] == imgsize )