# Analyze Results

In [None]:
import csv, ipywidgets, os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# set the fload precision when viewing dataframes
pd.set_option("display.precision", 10)

Create a drop down menu to select the results to analyze

In [None]:
# set the path to the results folder
results_dd = ipywidgets.Dropdown(
    options=[
        ("bidirectional_lstm_imdb", 1),
        ("image_classification_from_scratch", 2),
        ("imbalanced_classification", 3),
        ("text_classification_from_scratch", 4),
    ],
    value=1,
    description="Results to Analyze:",
)

results_dd

In [None]:
# set the path to the results folder
keep_duplicates_dd = ipywidgets.Dropdown(
    options=[
        ("Keep Duplicate Results", True),
        ("Exclude Duplicate Results", False),
    ],
    value=True,
    description="Keep Duplicate Results:",
)

keep_duplicates_dd

In [None]:
# set the path to the fixed-seed and random results
fixed_seed_path = "./%s/results/fixed-seed/" % results_dd.label
random_path = "./%s/results/random/" % results_dd.label

print(fixed_seed_path)
print(random_path)

Define a function to read the training results on different hardware and software environments.

In [None]:
#
# Function to get the results from the csv files for both the fixed-seed and random results
#
def get_results_from_csv(path):

    #
    # Get the list of csv files in the path
    #
    list_of_files = []

    for root, dirs, files in os.walk(path):
        for file in files:
            # Allow results to be ignored by including "ignore" in the directory or file name
            if "ignore" not in root:
                if file.endswith(".csv"):
                    list_of_files.append(os.path.join(root, file).replace(path, ""))

    # Create an empty data frame to store the results from the csv files
    results_df = pd.DataFrame()

    # Read the csv files and append the results to a list
    for run_data_file in list_of_files:

        df_column_name = os.path.dirname(run_data_file)

        #
        # Read the results from the csv file
        #
        with open("%s/%s" % (path, run_data_file), "r") as file:

            # Verify the csv file has 101 lines
            lines = len(file.readlines())
            if lines != 101:
                print("ERROR: %s has %d lines" % (run_data_file, lines))

            # Read the file again
            file.seek(0)

            # Create a list to store the seed and accuracy from the csv file
            csv_rows_list = []

            #
            # Read the csv file
            #
            csv_reader = csv.reader(file, delimiter=",")
            for row in csv_reader:
                # Skip the header row
                if row[9] == "test_accuracy":
                    continue
                else:
                    # Row 7 is the seed and row 9 is the test accuracy
                    csv_rows_list.append([row[7], row[9]])

        # Convert the csv_rows_list to numpy 2d array (seed, accuracy)
        csv_rows_np = np.array(csv_rows_list)
        # Add the seed column of the numpy 2d array to the results data frame
        results_df["seed"] = csv_rows_np[:, 0]
        # Add the accuracy column of the numpy 2d array to the results data frame
        results_df[df_column_name] = csv_rows_np[:, 1]
        # Require the accuracy column to be numeric type
        results_df[df_column_name] = results_df[df_column_name].apply(pd.to_numeric)

    if results_df["seed"][0] == "random":
        # The seed column is not required when it's "random"
        results_df = results_df.drop(columns=["seed"])
    else:
        # Require the seed column to be numeric type
        results_df["seed"] = results_df["seed"].apply(pd.to_numeric)
        # Make the seed column the row index
        results_df = results_df.set_index("seed")

    return results_df


#
# Read the results from the csv files
#
fixed_seed_df = get_results_from_csv(fixed_seed_path)
random_seed_df = get_results_from_csv(random_path)


Remove duplicate results based on keep_duplicates_dd drop down list

In [None]:
if keep_duplicates_dd.value == False:
    fixed_seed_df = fixed_seed_df.loc[:,~fixed_seed_df.apply(lambda x: x.duplicated(),axis=1).all()].copy()


Look at the results from the fixed seed runs and highlight the duplicate results.

In [None]:
# Create a new dataframe with just the first seed and transpose the index and columns
check_fixed_seed_duplicate_results_df = fixed_seed_df.head(1).T

#
# Highlight the duplicated rows
#
dupe_df = check_fixed_seed_duplicate_results_df[[2628917891]].duplicated(keep=False)
dupe_rows = dupe_df[dupe_df].index.values
check_fixed_seed_duplicate_results_df.style.apply(
    lambda x: ["background: yellow" if x.name in dupe_rows else "" for i in x], axis=1
)

KDE plot of the fixed seed training runs.

In [None]:
fixed_seed_df.shape

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))

sns.histplot(
    fixed_seed_df, kde=True, stat="proportion", kde_kws=dict(cut=3), legend=False
).set_title(
    "Histogram of each %s Fixed Seed Results (%s enviroments; %s training runs)"
    % (
        results_dd.label,
        fixed_seed_df.shape[1],
        fixed_seed_df.shape[0] * fixed_seed_df.shape[1],
    )
)

ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)

plt.xlim(0.75)
plt.show()


KDE plot of the random seed training runs.

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))

sns.histplot(
    random_seed_df, kde=True, stat="proportion", kde_kws=dict(cut=3), legend=False
).set_title(
    "Histogram of each %s Random Seed Results (%s enviroments; %s traning runs)"
    % (
        results_dd.label,
        random_seed_df.shape[1],
        random_seed_df.shape[0] * random_seed_df.shape[1],
    )
)

ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)

plt.xlim(0.75)
plt.show()

Histogram of the preformace of the fixed seed and random training runs.

In [None]:
full_fixed = []
full_random = []

# Create a list of all the values in the fixed_seed_df data frame
for column in fixed_seed_df.columns.values.tolist():
    full_fixed.extend(fixed_seed_df[column])

# Create a list of all the values in the random_seed_df data frame
for column in random_seed_df.columns.values.tolist():
    full_random.extend(random_seed_df[column])

fig, ax = plt.subplots(figsize=(15, 10))

sns.histplot(full_fixed, kde=True, stat="proportion", kde_kws=dict(cut=3), color="blue")
sns.histplot(
    full_random, kde=True, stat="proportion", kde_kws=dict(cut=3), color="green"
)

ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)

plt.legend(
    [
        "Fixed Seed (Training runs: %s; Environments: %s)"
        % (len(full_fixed), int(len(full_fixed) / 100)),
        "Random Seed (Training runs: %s; Environments: %s)"
        % (len(full_random), int(len(full_random) / 100)),
    ]
)

plt.title("Histogram of all %s Fixed and Random Results" % results_dd.label)

plt.xlim(0.75)
plt.show()

Calculate the standard deviation of the fixed seed training runs by hardware and software environments.

In [None]:
#
# Define a function to calculate the standard deviation of the values in a data frame based on a filter
#
def std_by_hw_sw_environment(df, filter):
    # Create a list of all the values in the data frame that match the filter
    full_list = []
    for column in df.filter(like=filter).columns.values.tolist():
        full_list.extend(df[column])

    # Calculate the standard deviation of the full list
    return [np.std(full_list), len(full_list)]


#
# Create a list of all the hardware and software environments from the fixed_seed_df data frame
#
columns = list(fixed_seed_df)

hardware_environments = []
software_environments = []

for column in columns:
    hardware_environments.append("%s/%s" % (column.split("/")[0], column.split("/")[1]))
    software_environments.append(column.split("/")[2])

# Remove duplicates hardware and software environments
hardware_environments = list(set(hardware_environments))
software_environments = list(set(software_environments))

hardware_std_df = pd.DataFrame(columns=["std", "count"])
software_std_df = pd.DataFrame(columns=["std", "count"])

for hardware_environment in hardware_environments:
    hardware_std_df.loc[hardware_environment] = std_by_hw_sw_environment(
        fixed_seed_df, hardware_environment
    )


for software_environment in software_environments:
    software_std_df.loc[software_environment] = std_by_hw_sw_environment(
        fixed_seed_df, software_environment
    )

print(" Total Fixed seed std: %s" % np.std(full_fixed))
print("Total Random seed std: %s" % np.std(full_random))

display(hardware_std_df)
display(software_std_df)

Select the max and min enviroment standard deviation for hardware and software

In [None]:
# drop columns with less than 400 trainings
hardware_std_400_df = hardware_std_df[hardware_std_df["count"] > 400]
software_std_400_df = software_std_df[software_std_df["count"] > 400]

hardware_max_std = hardware_std_400_df["std"].idxmax()
hardware_min_std = hardware_std_400_df["std"].idxmin()

software_max_std = software_std_400_df["std"].idxmax()
software_min_std = software_std_400_df["std"].idxmin()

print("Hardware: Max: %s, Min: %s" % (hardware_max_std, hardware_min_std))
print("Software: Max: %s, Min: %s" % (software_max_std, software_min_std))


In [None]:
max_fixed_seed = []
max_random_seed = []

# Create a list of all the values in the fixed_seed_df data frame
for column in fixed_seed_df.filter(like=hardware_max_std).columns.values.tolist():
    max_fixed_seed.extend(fixed_seed_df[column])

# Create a list of all the values in the random_seed_df data frame
for column in random_seed_df.filter(like=hardware_max_std).columns.values.tolist():
    max_random_seed.extend(random_seed_df[column])

min_fixed_seed = []
min_random_seed = []

# Create a list of all the values in the fixed_seed_df data frame
for column in fixed_seed_df.filter(like=hardware_min_std).columns.values.tolist():
    min_fixed_seed.extend(fixed_seed_df[column])

# Create a list of all the values in the random_seed_df data frame
for column in random_seed_df.filter(like=hardware_min_std).columns.values.tolist():
    min_random_seed.extend(random_seed_df[column])


fig, ax = plt.subplots(figsize=(15, 10))

legend = []

if len(max_fixed_seed) > 0:
    sns.histplot(
        max_fixed_seed, kde=True, stat="proportion", kde_kws=dict(cut=3), color="blue"
    )
    legend.append(
        "%s Fixed Seed (Training runs: %s; Environments: %s)"
        % (hardware_max_std, len(max_fixed_seed), int(len(max_fixed_seed) / 100))
    )

if len(max_random_seed) > 0:
    sns.histplot(
        max_random_seed, kde=True, stat="proportion", kde_kws=dict(cut=3), color="green"
    )
    legend.append(
        "%s Random Seed (Trainings: %s; Environments: %s)"
        % (hardware_max_std, len(max_random_seed), int(len(max_random_seed) / 100))
    )

if len(min_fixed_seed) > 0:
    sns.histplot(
        min_fixed_seed, kde=True, stat="proportion", kde_kws=dict(cut=3), color="red"
    )
    legend.append(
        "%s Fixed Seed (Trainings: %s; Environments: %s)"
        % (hardware_min_std, len(min_fixed_seed), int(len(min_fixed_seed) / 100)),
    )

if len(min_random_seed) > 0:
    sns.histplot(
        min_random_seed,
        kde=True,
        stat="proportion",
        kde_kws=dict(cut=3),
        color="orange",
    )
    legend.append(
        "%s Random Seed (Trainings: %s; Environments: %s)"
        % (hardware_min_std, len(min_random_seed), int(len(min_random_seed) / 100))
    )


ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)

plt.title(
    "Histogram of the Max vs Min %s Fixed and Random Results by Hardware Enviroment"
    % results_dd.label
)

plt.legend(legend)

plt.xlim(0.75)
plt.show()

In [None]:
max_fixed_seed = []
max_random_seed = []

# Create a list of all the values in the fixed_seed_df data frame
for column in fixed_seed_df.filter(like=software_max_std).columns.values.tolist():
    max_fixed_seed.extend(fixed_seed_df[column])

# Create a list of all the values in the random_seed_df data frame
for column in random_seed_df.filter(like=software_max_std).columns.values.tolist():
    max_random_seed.extend(random_seed_df[column])

min_fixed_seed = []
min_random_seed = []

# Create a list of all the values in the fixed_seed_df data frame
for column in fixed_seed_df.filter(like=software_min_std).columns.values.tolist():
    min_fixed_seed.extend(fixed_seed_df[column])

# Create a list of all the values in the random_seed_df data frame
for column in random_seed_df.filter(like=software_min_std).columns.values.tolist():
    min_random_seed.extend(random_seed_df[column])


fig, ax = plt.subplots(figsize=(15, 10))

legend = []

if len(max_fixed_seed) > 0:
    sns.histplot(
        max_fixed_seed, kde=True, stat="proportion", kde_kws=dict(cut=3), color="blue"
    )
    legend.append(
        "%s Fixed Seed (Trainings: %s; Environments: %s)" % (software_max_std, len(max_fixed_seed), int(len(max_fixed_seed) / 100))
    )

if len(max_random_seed) > 0:
    sns.histplot(
        max_random_seed, kde=True, stat="proportion", kde_kws=dict(cut=3), color="green"
    )
    legend.append(
        "%s Random Seed (Trainings: %s; Environments: %s)" % (software_max_std, len(max_random_seed), int(len(max_random_seed) / 100))
    )

if len(min_fixed_seed) > 0:
    sns.histplot(
        min_fixed_seed, kde=True, stat="proportion", kde_kws=dict(cut=3), color="red"
    )
    legend.append(
        "%s Fixed Seed (Trainings: %s; Environments: %s)" % (software_min_std, len(min_fixed_seed), int(len(min_fixed_seed) / 100)),
    )

if len(min_random_seed) > 0:
    sns.histplot(
        min_random_seed,
        kde=True,
        stat="proportion",
        kde_kws=dict(cut=3),
        color="orange",
    )
    legend.append(
        "%s Random Seed (Trainings: %s; Environments: %s)" % (software_min_std, len(min_random_seed), int(len(min_random_seed) / 100))
    )

ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)

plt.title(
    "Histogram of the Max vs Min %s Fixed and Random Results by Software Enviroment"
    % results_dd.label
)

plt.legend(legend)

plt.xlim(0.75)
plt.show()

Create a data frame with the standard deviation of each hardware and software environment for the fixed seed and random training runs.

In [None]:
fixed_std_df = fixed_seed_df.std().to_frame()
fixed_std_df.columns = ["std"]

random_std_df = random_seed_df.std().to_frame()
random_std_df.columns = ["std"]

std_df = pd.concat([fixed_std_df, random_std_df], axis=1)
std_df.columns = ["fixed", "random"]

std_df

In [None]:
std_plot = std_df.melt(ignore_index=False, var_name="type", value_name="values")
sns.barplot(data=std_plot, x="values", y=std_plot.index, hue="type")
plt.xlim(0.005, 0.03)

Create a data frame with the mean of each hardware and software environment for the fixed seed and random training runs.

In [None]:
fixed_mean_df = fixed_seed_df.mean().to_frame()
fixed_mean_df.columns = ["mean"]

random_mean_df = random_seed_df.mean().to_frame()
random_mean_df.columns = ["mean"]

mean_df = pd.concat([fixed_mean_df, random_mean_df], axis=1)
mean_df.columns = ["fixed", "random"]

mean_df


In [None]:
mean_plot = mean_df.melt(ignore_index=False, var_name="type", value_name="values")
sns.barplot(data=mean_plot, x="values", y=mean_plot.index, hue="type")
plt.xlim(0.85, 0.87)


Calulate the total standard deviation and mean of the fixed seed and random training runs.

In [None]:
print("  Fixed seed std: %s" % np.std(full_fixed))
print(" Random seed std: %s" % np.std(full_random))
print("=====================================")
print(" Fixed seed mean: %s" % np.mean(full_fixed))
print("Random seed mean: %s" % np.mean(full_random))