In [11]:
import pandas as pd
import ast
import re

In [13]:
# Load CSV
df = pd.read_csv(r"C:\Users\taimo\Desktop\FAU-Courses\ADLTS-Seminar\hyperopt_results\all_results.csv")

# Clean 'config' and 'model_params' columns before eval
def safe_eval_dict(s):
    # Replace np.float64(...) with just the float value inside
    s = re.sub(r"np\.float64\(([^)]+)\)", r"\1", s)
    return ast.literal_eval(s)

# Apply the safe eval
df['config'] = df['config'].apply(safe_eval_dict)
df['model_params'] = df['model_params'].apply(safe_eval_dict)

In [None]:
# Extract normalized hyperparameter signature
def extract_signature(row):
    cfg = row['config']
    model_cfg = cfg['model_params'][row['model']]

    return str({
        'batch_size': cfg.get('batch_size'),
        'epochs': cfg.get('epochs'),
        'learning_rate': cfg.get('learning_rate'),
        'strategy': cfg.get('semi_supervised', {}).get('strategy'),
        'labelled_ratio': cfg.get('semi_supervised', {}).get('labelled_ratio'),
        'threshold': cfg.get('semi_supervised', {}).get('threshold'),
        'temperature': cfg.get('semi_supervised', {}).get('temperature'),
        'data_balancing': sorted(cfg.get('data_balancing') or []),  # sort to normalize
        'l1_lambda': cfg.get('l1_lambda'),
        'gradient_clip': cfg.get('gradient_clip'),
        'dropout': model_cfg.get('dropout'),
        'hidden_size': model_cfg.get('hidden_size'),
        'num_layers': model_cfg.get('num_layers'),
        'kernel_size': model_cfg.get('kernel_size'),
        'num_channels': model_cfg.get('num_channels'),
    })

df['signature'] = df.apply(extract_signature, axis=1)

# Split into lstm and tcn dfs
df_lstm = df[df['model'] == 'lstm']
df_tcn = df[df['model'] == 'tcn']

def find_top_configs_per_tool(df_model):
    # Group by signature and tool, compute mean accuracy per group
    grouped = df_model.groupby(['signature', 'tool']).agg({'accuracy': 'mean'}).reset_index()

    # Count how many tools share each signature
    tool_counts = grouped.groupby('signature').agg(
        tool_count=('tool', 'nunique'),
        avg_accuracy=('accuracy', 'mean'),
        max_accuracy=('accuracy', 'max')
    ).reset_index()

    # Keep only configs shared across all tools in this model's df
    all_tools = df_model['tool'].nunique()
    common_configs = tool_counts[tool_counts['tool_count'] == all_tools]

    # Sort by max accuracy descending and pick best signature(s)
    best_signature = common_configs.sort_values(by='max_accuracy', ascending=False).iloc[0]['signature']

    # Filter original model df by this signature
    best_rows = df_model[df_model['signature'] == best_signature]

    return best_rows

best_lstm_rows = find_top_configs_per_tool(df_lstm)
best_tcn_rows = find_top_configs_per_tool(df_tcn)

# Save each separately
best_lstm_rows.to_csv("best_lstm_common_configs_across_tools.csv", index=False)
best_tcn_rows.to_csv("best_tcn_common_configs_across_tools.csv", index=False)

print("Saved best common LSTM config across tools to best_lstm_common_configs_across_tools.csv")
print("Saved best common TCN config across tools to best_tcn_common_configs_across_tools.csv")

Best common hyperparameter configuration across all tools:
                     tool  accuracy  \
18   electric_screwdriver    0.6217   
39    pneumatic_rivet_gun    0.7500   
60  pneumatic_screwdriver    0.4155   

                                            signature  
18  {'batch_size': 128, 'epochs': 30, 'learning_ra...  
39  {'batch_size': 128, 'epochs': 30, 'learning_ra...  
60  {'batch_size': 128, 'epochs': 30, 'learning_ra...  
