# Dashboard to compare various carbon tracker results

## Datasets
- ukb pilot 

## Pipelines
- FreeSurfer: recon-all (1,2,3 stages)
- FastSurfer: FastSurferCNN (volumetric segmentation)

## Monitoring metrics
- experiment duration in hours
- power consumption in kWh (kilo-watt-hours) 

## Carbon trackers
- EIT
- CC
- CT

In [1]:
import sys
import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

from utils.dashboard_utils import * 

from experiment_impact_tracker.data_interface import DataInterface
from experiment_impact_tracker.data_utils import *
from experiment_impact_tracker.data_utils import (load_data_into_frame,
                                                  load_initial_info,
                                                  zip_data_and_info)

loading region bounding boxes for computing carbon emissions region, this may take a moment...
 454/454... rate=464.00 Hz, eta=0:00:00, total=0:00:00, wall=15:19 EST
Done!


  pd.set_option("display.max_colwidth", -1)


In [2]:
project_dir = '../../'

subject_list_file = f'{project_dir}subject_lists/ukb_pilot_subjects.csv'

tracker_logs_dir = f'{project_dir}tracker_output/'

pipelines = ['FastSurfer/CNN','FastSurfer/recon'] # pick from: 'FastSurfer/CNN', 'FastSurfer/recon', 'FreeSurfer'
exp_runs = ['RUN_1']


## Populate paths for different experimental runs
- includes pipeline and run ids

In [3]:
experiment_sets = []

for pipe in pipelines:
    for run in exp_runs:
        exp_id = f'{pipe}-{run}'
        exp_log_dir = f'{tracker_logs_dir}{pipe}/local_tests/{run}'
        experiment_sets.append((pipe, run, exp_log_dir))

experiment_sets


[('FastSurfer/CNN',
  'RUN_1',
  '../../tracker_output/FastSurfer/CNN/local_tests/RUN_1'),
 ('FastSurfer/recon',
  'RUN_1',
  '../../tracker_output/FastSurfer/recon/local_tests/RUN_1')]

## Select subjects and trackers

In [4]:
subject_list = ['sub-000']
trackers = ['EIT','CC'] # pick from 'EIT', 'CC'
read_flops = False
EIT_tracker_summary_df_concat = pd.DataFrame()
CC_tracker_summary_df_concat = pd.DataFrame()

for pipe, run, exp_log_dir in experiment_sets:
    print(f'Reading logs for: {pipe}')

    use_cuda = False
    if pipe == 'FastSurfer/CNN':
        use_cuda = True

    for tracker in trackers: 
        print(f'Using carbon tracker: {tracker}')
        
        subject_log_dirs = []
        for sub in subject_list:
            subject_log_dir = f'{exp_log_dir}/{sub}/{tracker}'
            subject_log_dirs.append(subject_log_dir)

        if tracker == 'EIT':
            EIT_tracker_summary_df, flops_df, power_df = collate_EIT_tracker_data(subject_log_dirs, use_cuda, read_flops)
            EIT_tracker_summary_df['subject_id'] = subject_list
            EIT_tracker_summary_df['pipeline'] = pipe
            EIT_tracker_summary_df['run'] = run
            EIT_tracker_summary_df['tracker'] = tracker
            col_list = list(EIT_tracker_summary_df.columns)
            EIT_tracker_summary_df = EIT_tracker_summary_df[col_list[-4:] + col_list[:-4]]
            EIT_tracker_summary_df_concat = EIT_tracker_summary_df_concat.append(EIT_tracker_summary_df)

        if tracker == 'CC':
            CC_tracker_summary_df = collate_CC_tracker_data(subject_log_dirs)
            CC_tracker_summary_df['subject_id'] = subject_list
            CC_tracker_summary_df['pipeline'] = pipe
            CC_tracker_summary_df['run'] = run
            CC_tracker_summary_df['tracker'] = tracker
            col_list = list(CC_tracker_summary_df.columns)
            CC_tracker_summary_df = CC_tracker_summary_df[col_list[-4:] + col_list[:-4]]
            CC_tracker_summary_df_concat = CC_tracker_summary_df_concat.append(CC_tracker_summary_df)

Reading logs for: FastSurfer/CNN
Using carbon tracker: EIT
Using carbon tracker: CC
Reading logs for: FastSurfer/recon
Using carbon tracker: EIT
Using carbon tracker: CC


  return json_normalize(json_array, max_level=max_level), json_array


In [5]:
EIT_tracker_summary_df_concat

Unnamed: 0,subject_id,pipeline,run,tracker,total_power,total_carbon,PUE,total_duration_papi,total_duration_impact_tracker
0,sub-000,FastSurfer/CNN,RUN_1,EIT,0.002247533,0.0001016117,1.58,0.013778,0.017324
0,sub-000,FastSurfer/recon,RUN_1,EIT,2.258673e-07,6.776018e-09,1.58,0.0,0.001389


In [6]:
CC_tracker_summary_df_concat

Unnamed: 0,subject_id,pipeline,run,tracker,timestamp,project_name,duration,emissions,energy_consumed,country_name,country_iso_code,region,on_cloud,cloud_provider,cloud_region
0,sub-000,FastSurfer/CNN,RUN_1,CC,2021-07-22T21:46:24,codecarbon,11218.454384,0.01340145,0.02917514,United States,USA,maryland,N,,
0,sub-000,FastSurfer/recon,RUN_1,CC,2021-08-10T11:41:02,codecarbon,3.055779,1.940474e-08,4.224438e-08,United States,USA,maryland,N,,
