# **Quality Metrics Result Analysis**

In this notebook, the raw energy consumption data, with different configurations of inference batch sizes, is analyzed. The results are averaged over the five runs, along with the computation of the standard deviation for each tracked parameter.

In [179]:
# import required libraries
import pandas as pd
import os
import re

In [None]:
# get working directory, necessary to gather the data to be analyzed
current_dir = os.getcwd()
print(f"Current Working Directory: {current_dir}")
parent_dir = os.path.abspath(os.path.join(current_dir, '..'))
print(f"Parent Directory: {parent_dir}")

## **Helper Functions**
Useful functions to properly format labels and to obtain final results from raw inference energy consumption data.

In [181]:
def get_model_emissions(model_nomen, audiocaps_csv, clotho_csv, output_csv):
    """
    Aggregates GPU energy and emission data for a specific model from AudioCaps
    and Clotho datasets based on training steps.

    This function reads emission data from two CSV files (one for AudioCaps and
    one for Clotho), extracts the number of training steps from the 'project_name'
    column, groups the data by steps and baseline ('audiocaps' or 'clotho'),
    sums the 'gpu_energy' and 'emissions' for each group, and combines the results
    into a single DataFrame. The combined data is then saved to a new CSV file.

    Parameters:
    -----------
    model_nomen : str
        The name of the model being analyzed. This name will be added as a
        column in the output DataFrame.

    audiocaps_csv : str
        The file path to the CSV file containing emission data for the
        AudioCaps dataset. This file is expected to have columns including
        'project_name', 'gpu_energy', and 'emissions'.

    clotho_csv : str
        The file path to the CSV file containing emission data for the
        Clotho dataset. This file is expected to have columns including
        'project_name', 'gpu_energy', and 'emissions'.

    output_csv : str
        The file path where the aggregated results will be saved as a CSV file.

    Returns:
    --------
    pd.DataFrame
        A pandas DataFrame containing the aggregated data with columns:
        'model', 'steps', 'baseline', 'gpu_energy', and 'emissions'.
        The function also saves this DataFrame to the path specified by
        `output_csv`.
    """
    def extract_steps(project_name):
        match = re.search(r'(\d+)-steps', project_name)
        return int(match.group(1)) if match else None

    df_audiocaps = pd.read_csv(audiocaps_csv)
    df_clotho = pd.read_csv(clotho_csv)

    df_audiocaps['steps'] = df_audiocaps['project_name'].apply(extract_steps)
    df_audiocaps['baseline'] = 'audiocaps'
    df_clotho['steps'] = df_clotho['project_name'].apply(extract_steps)
    df_clotho['baseline'] = 'clotho'

    agg_audiocaps = df_audiocaps.groupby(['steps', 'baseline']).agg({
        'gpu_energy': 'sum',
        'emissions': 'sum'
    }).reset_index()
    agg_clotho = df_clotho.groupby(['steps', 'baseline']).agg({
        'gpu_energy': 'sum',
        'emissions': 'sum'
    }).reset_index()

    agg_data = pd.concat([agg_audiocaps, agg_clotho], ignore_index=True)
    agg_data['model'] = model_nomen
    agg_data = agg_data[['model', 'steps', 'baseline', 'gpu_energy', 'emissions']]

    agg_data.to_csv(output_csv, index=False)
    print(f"Aggregated CSV for model '{model_nomen}' saved to '{output_csv}'.")
    return agg_data

In [182]:
def getdata(modello):
    """
    Retrieves and merges quality metrics (FAD, CLAP) and emission data for a specific model.

    This function reads FAD and CLAP scores from CSV files located in a
    structured directory based on the model name. It then reads corresponding
    emission data (emissions, gpu_energy) and merges it with the quality
    metrics based on 'steps' and 'baseline'. The merged data, excluding
    the first column, is printed to the console.

    Parameters:
    -----------
    modello : str
        The name of the model for which to retrieve and merge data. This is used
        to construct the file paths for quality metrics and emission data.

    Returns:
    --------
    None
        The function does not return a value but prints the merged DataFrame
        (excluding the first column and without the index) to the standard output.

    Assumed Dependencies:
    ---------------------
    - `current_dir`: A global or externally defined variable specifying the
      base directory where the 'results' folder is located.
    - `metrics`: A global or externally defined dictionary mapping metric names
      ("FAD", "CLAP") to a list containing the corresponding column name
      in the quality metrics CSV files (e.g., {"FAD": ["fad_score"], "CLAP": ["clap_score"]}).

    Input Files:
    ------------
    - `{current_dir}\results\quality_metrics\FAD\{modello}_fad_scores.csv`:
      Expected to contain columns 'steps', 'baseline', and the FAD score column
      named according to `metrics["FAD"][0]`.
    - `{current_dir}\results\quality_metrics\CLAP\{modello}_clap_scores.csv`:
      Expected to contain columns 'steps', 'baseline', and the CLAP score column
      named according to `metrics["CLAP"][0]`.
    - `{current_dir}\results\quality_metrics\emissions\{modello}\{modello}_emissions.csv`:
      Expected to contain columns 'steps', 'baseline', 'emissions', and 'gpu_energy'.

    Output:
    -------
    - The function prints a string representation of the merged pandas DataFrame
      (excluding the first column and the index) to the console.
    """
    merged_data = pd.DataFrame()

    for metr in ["FAD", "CLAP"]:
        file_path = rf"{current_dir}\results\quality_metrics\{metr}\{modello}_{metrics[metr][0]}s.csv"
        data = pd.read_csv(file_path)
        en_path = rf"{current_dir}\results\quality_metrics\emissions\{modello}\{modello}_emissions.csv"
        energy = pd.read_csv(en_path)
        for col in ['emissions', 'gpu_energy']:
            data = data.merge(energy[['steps', 'baseline', col]], on=['steps', 'baseline'], how='left')

        if merged_data.empty: merged_data = data
        else: merged_data.insert(loc=4, column="clap_score", value=data["clap_score"])

    print(merged_data.iloc[:, 1:].to_string(index=False))

In [183]:
def print_metrics(modello):
    print("-" * 70)
    print(modello.center(70))
    print("-" * 70)
    getdata(modello)
    print("\n")

## **Data analysis**
Using the previously defined functions, we now can analyze the data obtained for the five different runs for all considered models.

In [None]:
metrics = {"FAD": ["fad_score", "FAD"],
           "CLAP": ["clap_score", "Contrastive Language-Audio Pretraining Score"]}
models = ["AudioLDM", "AudioLDM2", "SAO", "MAA", "MAA2", "Tango", "Tango2"]

for model in models:
    audiocaps_file = fr"{current_dir}\results\quality_metrics\emissions\{model}\{model}_audiocaps.csv"
    clotho_file = fr"{current_dir}\results\quality_metrics\emissions\{model}\{model}_clotho.csv"
    output_file = fr"{current_dir}\results\quality_metrics\emissions\{model}\{model}_emissions.csv"
    get_model_emissions(model, audiocaps_file, clotho_file, output_file)

In [185]:
# analyze single models
model_name = "AudioLDM" # model = "AudioLDM2" # model = "MAA" # . . .

print_metrics(model_name)

----------------------------------------------------------------------
                               AudioLDM                               
----------------------------------------------------------------------
 steps  baseline  fad_score  clap_score  emissions  gpu_energy
    10    clotho   0.623181    0.328345   0.003523    0.005168
    25    clotho   0.566925    0.379216   0.006288    0.009578
    50    clotho   0.568657    0.385669   0.010888    0.016902
   100    clotho   0.574913    0.390508   0.020251    0.031703
   150    clotho   0.564129    0.407495   0.029547    0.046457
   200    clotho   0.574102    0.383884   0.038766    0.061184
    10 audiocaps   0.332963    0.308149   0.003489    0.005166
    25 audiocaps   0.229715    0.326384   0.006208    0.009555
    50 audiocaps   0.222753    0.322408   0.010784    0.016890
   100 audiocaps   0.210654    0.316021   0.019936    0.031586
   150 audiocaps   0.204954    0.340574   0.029099    0.046297
   200 audiocaps   0.207078    

In [186]:
for model in models:
    print_metrics(model)

----------------------------------------------------------------------
                               AudioLDM                               
----------------------------------------------------------------------
 steps  baseline  fad_score  clap_score  emissions  gpu_energy
    10    clotho   0.623181    0.328345   0.003523    0.005168
    25    clotho   0.566925    0.379216   0.006288    0.009578
    50    clotho   0.568657    0.385669   0.010888    0.016902
   100    clotho   0.574913    0.390508   0.020251    0.031703
   150    clotho   0.564129    0.407495   0.029547    0.046457
   200    clotho   0.574102    0.383884   0.038766    0.061184
    10 audiocaps   0.332963    0.308149   0.003489    0.005166
    25 audiocaps   0.229715    0.326384   0.006208    0.009555
    50 audiocaps   0.222753    0.322408   0.010784    0.016890
   100 audiocaps   0.210654    0.316021   0.019936    0.031586
   150 audiocaps   0.204954    0.340574   0.029099    0.046297
   200 audiocaps   0.207078    