In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Helper functions

In [None]:
def read_convert_csv(csv_path: str) -> pd.DataFrame:
    """
    Reads a CSV file, converts individual feature columns back into original feature columns and returns the DataFrame.
    """
    # Read the CSV file
    df = pd.read_csv(csv_path)

    # Define column names for each original feature
    hog_cols = [f'hog_{idx}' for idx in range(256)]
    log_cols = [f'log_{idx}' for idx in range(256)]
    vgg_cols = [f'vgg_{idx}' for idx in range(256)]
    resnet_cols = [f'resnet_{idx}' for idx in range(256)]

    # Combine individual feature columns into single feature columns
    df['hog'] = [feat for feat in df[hog_cols].to_numpy()]
    df['log'] = [feat for feat in df[log_cols].to_numpy()]
    df['vgg'] = [feat for feat in df[vgg_cols].to_numpy()]
    df['resnet'] = [feat for feat in df[resnet_cols].to_numpy()]

    # Drop the individual feature columns
    df = df.drop(hog_cols + log_cols + vgg_cols + resnet_cols, axis=1)

    return df

## Load data

In [None]:
csv_path = 'all_features.csv'

df = read_convert_csv(csv_path)
df.head(2) # Display the first two entries

In [None]:
# Select a feature to visualize. Possible feauteres are: 
# color_modes, rank_colors, spectral, sift, pixel_entropy, hog_entropy, log_entropy, vgg_entropy, resnet_entropy
FEATURE = 'spectral'

# Select a hierarcical level for visualization. Possible options are:
# replicate, group, dataset
LEVEL = 'group'

In [None]:
df[df['dataset'] == 'accessions_dataset2']['group'].unique()

In [None]:
df[df['dataset'] == 'accessions']['group'].unique()

In [None]:
FEATURE = 'spectral'
LEVEL = 'replicate'


fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 6), dpi=300)
sns.histplot(df, x=FEATURE, hue=LEVEL, element="step", stat="density", common_norm=False, kde=True)
plt.title(f'Distribution of {FEATURE} by {LEVEL}')
plt.xlabel(f'{FEATURE} {"(Normalized 0-100)"}')
plt.ylabel('Density')
plt.legend(title=LEVEL)
plt.tight_layout()
plt.show()