# Noiseness analysis

These cells assume that each JSON file is named in the form "seed_iteration" (for example, "23_1.json", "23_2.json", …, "23_10.json") and that each file contains fitness data for a simulation run on a fixed track (the seed). 
This approach lets you compute the noisiness and the variability across simulation runs for the same track -> to quantify how stable or noisy each track’s outcomes are.

In [None]:
import os
import json
import pandas as pd

DATA_DIR = './tests'
records = []

for fname in os.listdir(DATA_DIR):
    if fname.endswith('.json'):
        try:
            base, _ = os.path.splitext(fname)
            parts = base.split('_')
            if len(parts) < 2:
                continue
            seed = parts[0]
            iteration = parts[1]
            with open(os.path.join(DATA_DIR, fname), 'r', encoding='utf-8') as f:
                data = json.load(f)
            fitness = data.get('fitness')
            if fitness is None:
                continue
            fitness['seed'] = int(seed)
            fitness['iteration'] = int(iteration)
            records.append(fitness)
        except Exception as e:
            print(f"Error reading {fname}: {e}")

df = pd.DataFrame(records)
print("Loaded Data:")
print(df.head())

Compute descriptive statistics grouped by seed. This lets you quantify the noisiness (variance and standard deviation) of the features for each fixed track.

In [None]:
features = [col for col in df.columns if col not in ['seed', 'iteration']]
group_stats = df.groupby('seed')[features].agg(['mean', 'std']).reset_index()
group_stats.columns = ['_'.join(col).strip('_') for col in group_stats.columns.values]
print("Grouped Statistics:")
print(group_stats.head())

Visualize the noisiness for selected features with error bar plots. Each plot shows the mean and standard deviation for the given feature across simulation runs for each fixed track (seed).

In [None]:
import matplotlib.pyplot as plt

for metric in ["speed_entropy", "gaps_mean", "curvature_entropy"]:
    plt.figure(figsize=(10, 4))
    mean_col = f"{metric}_mean"
    std_col = f"{metric}_std"
    plt.errorbar(group_stats["seed"], group_stats[mean_col], yerr=group_stats[std_col], fmt="o", capsize=3)
    plt.xlabel("Track Seed")
    plt.ylabel(metric)
    plt.title(f"Noisiness Analysis: {metric}")
    plt.show()


compute the correlation matrix for all numeric features (including your candidate fitness measure) and visualize it with a heatmap.

In [None]:
import matplotlib.pyplot as plt
import numpy as np

corr = df.select_dtypes(include=[float, int]).corr()
print("Correlation Matrix:")
print(corr)

plt.figure(figsize=(10, 8))
cax = plt.imshow(corr, cmap='coolwarm', interpolation='nearest')
plt.title("Correlation Matrix with Fitness Measures")
plt.colorbar(cax, fraction=0.046, pad=0.04)
ticks = np.arange(len(corr.columns))
plt.xticks(ticks, corr.columns, rotation=90)
plt.yticks(ticks, corr.columns)
plt.tight_layout()
plt.show()

In [None]:
threshold = 0.7
print("\nSignificant Correlations (|r| >= {:.1f}):".format(threshold))
for i in range(len(corr.columns)):
    for j in range(i + 1, len(corr.columns)):
        if abs(corr.iloc[i, j]) >= threshold:
            print(f"{corr.columns[i]} & {corr.columns[j]}: {corr.iloc[i, j]:.2f}")


# Dimensionality Reduction & Visual Mapping