In [1]:
%%html
<style>
.output_wrapper, .output {
    height:auto !important;
    max-height: 100% !important;
}
.output_scroll {
    box-shadow:none !important;
    webkit-box-shadow:none !important;
}
</style>

In [2]:
import pandas as pd
from IPython.display import display, HTML, clear_output
import os
from ipywidgets import interact, widgets
import numpy as np
import matplotlib.pyplot as plt
display(HTML("<style>.container { width:100% !important; }</style>"))

pd.set_option('display.float_format', '{:.4f}'.format)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
print("Input folder location")
folder = input()
files = os.listdir(folder)
#\output\results

Input folder location
E:\BP\bachelors-thesis-main\output\results


In [4]:
def add_cols_to_df(df):
    df['PROCESSORS'] = None
    df['BASE_CLOCK'] = None
    df['BOOST_CLOCK'] = None
    df['MEMORY_CONFIG'] = None
    df['INTERFACE_WIDTH'] = None
    
    for index, row in df.iterrows():
        gpu_name = row['GPU']
        match gpu_name:
            case "Radeon RX Vega":
                df.at[index, 'PROCESSORS'] = 4096
                df.at[index, 'BASE_CLOCK'] = 1.25
                df.at[index, 'BOOST_CLOCK'] = 1.55
                df.at[index, 'MEMORY_CONFIG'] = 8
                df.at[index, 'INTERFACE_WIDTH'] = 2048
            case "NVIDIA GeForce GTX 1080 Ti":
                df.at[index, 'PROCESSORS'] = 3584
                df.at[index, 'BASE_CLOCK'] = 1.48
                df.at[index, 'BOOST_CLOCK'] = 1.58
                df.at[index, 'MEMORY_CONFIG'] = 11
                df.at[index, 'INTERFACE_WIDTH'] = 352
            case "NVIDIA GeForce RTX 2080 SUPER":
                df.at[index, 'PROCESSORS'] = 3072
                df.at[index, 'BASE_CLOCK'] = 1.65
                df.at[index, 'BOOST_CLOCK'] = 1.82
                df.at[index, 'MEMORY_CONFIG'] = 8
                df.at[index, 'INTERFACE_WIDTH'] = 256
            case "NVIDIA GeForce RTX 3070":
                df.at[index, 'PROCESSORS'] = 5888
                df.at[index, 'BASE_CLOCK'] = 1.50
                df.at[index, 'BOOST_CLOCK'] = 1.73
                df.at[index, 'MEMORY_CONFIG'] = 8
                df.at[index, 'INTERFACE_WIDTH'] = 256
            case "NVIDIA GeForce RTX 3090":
                df.at[index, 'PROCESSORS'] = 10496
                df.at[index, 'BASE_CLOCK'] = 1.40
                df.at[index, 'BOOST_CLOCK'] = 1.70
                df.at[index, 'MEMORY_CONFIG'] = 24
                df.at[index, 'INTERFACE_WIDTH'] = 384
            case "NVIDIA RTX A4000":
                df.at[index, 'PROCESSORS'] = 6144
                df.at[index, 'BASE_CLOCK'] = 0.74
                df.at[index, 'BOOST_CLOCK'] = 1.56
                df.at[index, 'MEMORY_CONFIG'] = 16
                df.at[index, 'INTERFACE_WIDTH'] = 256
    return df


In [5]:
def choose(change):
    file = change['new']
    path = os.path.join(folder, file)
    df = pd.read_csv(path, index_col=False)
    df = add_cols_to_df(df)
    df_to_corr = df.iloc[:, 2:]
    matrix = df_to_corr.corr()
    
    with output:
        clear_output(wait=True)
        display(HTML('<div style="width: 100%; overflow: auto;">'))
        display(HTML('<H3><center>' + os.path.splitext(file)[0]))
        display(matrix.style.background_gradient(cmap='coolwarm'))
        display(HTML('</div>'))
        plt.figure(figsize=(10, 8))
        plt.imshow(matrix, cmap='coolwarm', interpolation='nearest')
        plt.colorbar()
        plt.xticks(np.arange(len(df.columns[2:])), df.columns[2:], rotation=90)
        plt.yticks(np.arange(len(df.columns[2:])), df.columns[2:])
        plt.title('Correlation Matrix')
        plt.show()

In [6]:
def calculate_correlations(file_name):
    path = os.path.join(folder, file_name)
    df = pd.read_csv(path, index_col=False)
    df = add_cols_to_df(df)

    # Filter the DataFrame to include both "median" columns and GPU data columns
    median_cols = [col for col in df.columns if "median" in col.lower()]
    gpu_cols = ['PROCESSORS', 'BASE_CLOCK', 'BOOST_CLOCK', 'MEMORY_CONFIG', 'INTERFACE_WIDTH']
    cols_to_corr = median_cols + gpu_cols
    df_to_corr = df[["CONFIG"] + cols_to_corr]

    # Group by "CONFIG" column
    grouped_configs = df_to_corr.groupby("CONFIG")
    correlations_by_config = {}

    for config, df_config in grouped_configs:
        matrix = df_config.set_index("CONFIG").corr()
        correlations_by_config[config] = matrix

    # Display correlation matrices for each CONFIG
    with output:
        clear_output(wait=True)
        print(f"Correlation Matrices for {os.path.splitext(file_name)[0]}")
        for config, matrix in correlations_by_config.items():
            print(f"Median Correlation Matrix for CONFIG: {config}")
            vmin, vmax = 0, 1  # Set vmin and vmax for the heatmap
            if np.isnan(matrix.to_numpy()).all():
                # If the matrix contains all NaNs, set vmin and vmax to None
                vmin, vmax = None, None

            display(matrix.style.background_gradient(cmap='coolwarm', vmin=vmin, vmax=vmax))
            print(f"Correlation matrix shape: {matrix.shape}")

            plt.figure(figsize=(10, 8))
            plt.imshow(matrix, cmap='coolwarm', interpolation='nearest', vmin=vmin, vmax=vmax)
            plt.colorbar()
            plt.xticks(np.arange(len(matrix.columns)), matrix.columns, rotation=90)
            plt.yticks(np.arange(len(matrix.columns)), matrix.columns)
            plt.title(f'Median Correlation Matrix for CONFIG: {config} in {os.path.splitext(file_name)[0]}')
            plt.show()

file_dropdown = widgets.Dropdown(options=files, description='File:')

output = widgets.Output()

def update_output(*args):
    file_name = file_dropdown.value
    calculate_correlations(file_name)

file_dropdown.observe(update_output, 'value')

# Display the widgets and output
display(file_dropdown)
display(output)

# Initially, display results for the first file
calculate_correlations(files[0])


Dropdown(description='File:', options=('1Password, agilekeychain.csv', '1Password, cloudkeychain.csv', '3DES(P…

Output()

In [7]:
def calculate_correlations(file_name):
    path = os.path.join(folder, file_name)
    df = pd.read_csv(path, index_col=False)
    df = add_cols_to_df(df)

    # Filter the DataFrame to include both "median" columns and GPU data columns
    median_cols = ["SPEED_MEDIAN", "MEM_USED_MEDIAN", "MEM_UTIL_MEDIAN"]
    gpu_cols = ['PROCESSORS', 'BASE_CLOCK', 'BOOST_CLOCK', 'MEMORY_CONFIG', 'INTERFACE_WIDTH']
    cols_to_corr = median_cols + gpu_cols
    df_to_corr = df[["CONFIG"] + cols_to_corr]

    # Group by "CONFIG" column
    grouped_configs = df_to_corr.groupby("CONFIG")
    correlations_by_config = {}  # Dictionary to store correlation matrices for each CONFIG

    for config, df_config in grouped_configs:
        matrix = df_config.set_index("CONFIG").corr()
        correlations_by_config[config] = matrix

    # Display correlation matrices for each CONFIG
    with output:
        clear_output(wait=True)
        print(f"Correlation Matrices for {os.path.splitext(file_name)[0]}")
        for config, matrix in correlations_by_config.items():
            print(f"Median Correlation Matrix for CONFIG: {config}")
            vmin, vmax = 0, 1  # Set vmin and vmax for the heatmap
            if np.isnan(matrix.to_numpy()).all():
                # If the matrix contains all NaNs, set vmin and vmax to None
                vmin, vmax = None, None

            display(matrix.style.background_gradient(cmap='coolwarm', vmin=vmin, vmax=vmax))
            print(f"Correlation matrix shape: {matrix.shape}")

            plt.figure(figsize=(10, 8))
            plt.imshow(matrix, cmap='coolwarm', interpolation='nearest', vmin=vmin, vmax=vmax)
            plt.colorbar()
            plt.xticks(np.arange(len(matrix.columns)), matrix.columns, rotation=90)
            plt.yticks(np.arange(len(matrix.columns)), matrix.columns)
            plt.title(f'Median Correlation Matrix for CONFIG: {config} in {os.path.splitext(file_name)[0]}')
            plt.show()

# Create the interactive dropdown widget
file_dropdown = widgets.Dropdown(options=files, description='File:')

# Create an output widget to display the results
output = widgets.Output()

# Function to update the output when a new file is selected
def update_output(*args):
    file_name = file_dropdown.value
    calculate_correlations(file_name)

file_dropdown.observe(update_output, 'value')

# Display the widgets and output
display(file_dropdown)
display(output)

# Initially, display results for the first file
calculate_correlations(files[0])


Dropdown(description='File:', options=('1Password, agilekeychain.csv', '1Password, cloudkeychain.csv', '3DES(P…

Output()