# List Results

This notebook creates a table of examples with the hardware and software environments.

In [None]:
import csv, os
import pandas as pd

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

path = "./"

list_of_files = []

results_df = pd.DataFrame(
    {
        "example": pd.Series(dtype="str"),
        "hardware_environment": pd.Series(dtype="str"),
        "software_environment": pd.Series(dtype="str"),
        "fixed-seed": pd.Series(dtype="bool"),
        "random": pd.Series(dtype="bool"),
        "2628917891": pd.Series(dtype="float"),
    }
)

#
# Find all of the csv files in the results directory
#
for root, dirs, files in os.walk(path):
    for file in files:
        # Allow results to be ignored by including "ignore" in the directory or file name
        if "ignore" not in root:
            if file.endswith(".csv"):
                list_of_files.append(os.path.join(root, file).replace(path, ""))

#
# Create a dataframe with the results
#
for file in list_of_files:
    file_split = file.split("/")

    # Get the example name from the file path
    example = file_split[0]

    # Decide if it is fixed-seed or random from the file path
    if "fixed-seed" in file_split[2]:
        fixed_seed = True
    else:
        fixed_seed = False
    if "random" in file_split[2]:
        random = True
    else:
        random = False

    #
    # Read the csv file and verify it has 101 lines and store the seed
    # from the second line
    #
    with open(file, "r") as csv_file:

        # Verify the csv file has 101 lines
        lines = len(csv_file.readlines())
        if lines != 101:
            print("ERROR: %s has %d lines" % (file, lines))

        # if the seed is fixed, read the seed from the csv file
        if fixed_seed:
            # Read the file again
            csv_file.seek(0)

            # Read the seed from the second line
            rows = csv_file.readlines()
            seed = float(rows[1].split(",")[-1])
        else:
            seed = pd.NA

    # Get the hardware and software environment from the file path
    hardware_environment = "%s/%s" % (file_split[3], file_split[4])
    software_environment = file_split[5]

    #
    # Check the dataframe to see if the example, hardware environment, and software environment
    # have already been added. If they have, update the fixed-seed and random columns.
    #
    if (
        (results_df["example"] == example)
        & (results_df["hardware_environment"] == hardware_environment)
        & (results_df["software_environment"] == software_environment)
    ).any():
        if fixed_seed:
            results_df.loc[
                (results_df.example == example)
                & (results_df.hardware_environment == hardware_environment)
                & (results_df.software_environment == software_environment),
                "fixed-seed",
            ] = True
        if random:
            results_df.loc[
                (results_df.example == example)
                & (results_df.hardware_environment == hardware_environment)
                & (results_df.software_environment == software_environment),
                "random",
            ] = True
    else:
        #
        # If the example, hardware environment, and software environment do not exisit in
        # the dataframe then add a new row
        #
        results_df.loc[len(results_df.index)] = [
            example,
            hardware_environment,
            software_environment,
            fixed_seed,
            random,
            seed,
        ]

display(
    results_df.sort_values(
        by=["example", "hardware_environment", "software_environment"]
    )
)


Create a table of all of the results that contain duplicate results on the fixed seed runs.

In [None]:
seed_df = results_df["2628917891"]

display(
    results_df[seed_df.isin(seed_df[seed_df.duplicated()])].sort_values(
        by=["example", "2628917891", "hardware_environment", "software_environment"]
    )
)