In [None]:
import os
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from pathlib import Path
import numpy as np
from sklearn.model_selection import train_test_split
import random

from data_processing import ImageLabelDataset,DualImageDataset,create_dataloaders,create_crossvalidation_loaders

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True  # Enforce deterministic algorithms
        torch.backends.cudnn.benchmark = False     # Disable benchmark for reproducibility

    os.environ['PYTHONHASHSEED'] = str(seed)       # Seed Python hashing, which can affect ordering
set_seed(42)

In [None]:
df = pd.read_pickle("../data/raw_all_data.pkl")

In [None]:
import pandas as pd
import scipy.stats as stats
from statsmodels.stats.multitest import multipletests
import itertools
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Create mean score column (if needed for composite analysis)
df['mean_score'] = df.iloc[:, :9].mean(axis=1)

In [None]:
import scipy.stats as stats
from statsmodels.stats.multitest import multipletests
from statsmodels.formula.api import ols
import statsmodels.api as sm

task_columns = df.iloc[:, :9].columns.tolist()

def safe_kruskal(*groups):
    """Handle edge cases for Kruskal-Wallis test"""
    # Remove groups with <2 observations
    groups = [g for g in groups if len(g) >= 2]
    if len(groups) < 2:
        return None, None
    
    # Check for identical values
    if all(np.allclose(g, g[0]) for g in groups):
        return None, None
    
    try:
        return stats.kruskal(*groups)
    except (ValueError, TypeError):
        return None, None

# 1. Test domain differences per robot
domain_results = {}
for robot in df['robot'].unique():
    robot_df = df[df['robot'] == robot]
    robot_task_results = {}
    
    for task in task_columns:
        groups = [robot_df[robot_df['domain'] == domain][task].values 
                 for domain in robot_df['domain'].unique()]
        h_stat, p_val = safe_kruskal(*groups)
        if p_val is not None:
            robot_task_results[task] = p_val
    
    if robot_task_results:
        tasks, pvals = zip(*robot_task_results.items())
        _, corrected_pvals, _, _ = multipletests(pvals, method='fdr_bh')
        domain_results[robot] = dict(zip(tasks, corrected_pvals))
    else:
        domain_results[robot] = {}

# 2. Test robot differences per domain
robot_results = {}
for domain in df['domain'].unique():
    domain_df = df[df['domain'] == domain]
    domain_task_results = {}
    
    for task in task_columns:
        groups = [domain_df[domain_df['robot'] == robot][task].values
                 for robot in domain_df['robot'].unique()]
        h_stat, p_val = safe_kruskal(*groups)
        if p_val is not None:
            domain_task_results[task] = p_val
    
    if domain_task_results:
        tasks, pvals = zip(*domain_task_results.items())
        _, corrected_pvals, _, _ = multipletests(pvals, method='fdr_bh')
        robot_results[domain] = dict(zip(tasks, corrected_pvals))
    else:
        robot_results[domain] = {}

# 3. Test interaction effects
interaction_results = {}
for task in task_columns:
    # Skip tasks with no variation
    if df[task].nunique() < 2:
        interaction_results[task] = None
        continue
        
    try:
        formula = f'Q("{task}") ~ C(robot) + C(domain) + C(robot):C(domain)'
        model = ols(formula, data=df).fit()
        anova_table = sm.stats.anova_lm(model, typ=2)
        
        if 'C(robot):C(domain)' in anova_table.index:
            p_val = anova_table.loc['C(robot):C(domain)', 'PR(>F)']
        else:
            p_val = None
    except Exception:
        p_val = None
    
    interaction_results[task] = p_val



In [None]:
sig_interaction_count = sum(
    1 for p in interaction_results.values() if p is not None and p < 0.05
)
total_tasks = len([p for p in interaction_results.values() if p is not None])
print(f"Statistically significant robot x domain interaction in {sig_interaction_count}/{total_tasks} tasks")

In [None]:
for task, p in interaction_results.items():
    if p is not None and p < 0.05:
        print(f"Significant robot x domain interaction in task: {task} (p={p:.4f})")
    else:
        print(f"NOT Significant robot x domain interaction in task: {task} (p={p:.4f})")

In [None]:
# robot_results structure: {domain (room): {task: p-value}}

significant_robot_counts = {}

for domain, task_pvals in robot_results.items():
    sig_count = sum(1 for p in task_pvals.values() if p < 0.05)
    total = len(task_pvals)
    print(f"For {domain}, statistically significant differences between robots in {sig_count}/{total} tasks")
    significant_robot_counts[domain] = (sig_count, total)


In [None]:
# domain_results structure: {robot: {task: p-value}}

significant_domain_counts = {}

for robot, task_pvals in domain_results.items():
    sig_count = sum(1 for p in task_pvals.values() if p < 0.05)
    total = len(task_pvals)
    print(f"For {robot}, statistically significant differences between rooms in {sig_count}/{total} tasks")
    significant_domain_counts[robot] = (sig_count, total)


In [None]:
alpha = 0.05  # significance threshold

# Get all tasks and all domains
tasks = task_columns  # or sorted(list of task columns)
domains = ['Home', 'BigOffice-2', 'BigOffice-3', 'Hallway', 'MeetingRoom', 'SmallOffice']

# Build LaTeX table
header = ' & ' + ' & '.join(domains) + ' \\\\ \\midrule\n'
rows = []
for task in tasks:
    row = [task]
    for domain in domains:
        p = robot_results[domain].get(task, None)
        if p is None:
            cell = '--'
        elif p < alpha:
            cell = '\\textbf{{{:.3f}}}'.format(p)
        else:
            cell = '{:.3f}'.format(p)
        row.append(cell)
    rows.append(' & '.join(row) + ' \\\\')

latex_table = '\\begin{tabular}{l' + 'c' * len(domains) + '}\n'
latex_table += '\\toprule\nTask' + ' & ' + ' & '.join(domains) + ' \\\\ \\midrule\n'
latex_table += '\n'.join(rows)
latex_table += '\n\\bottomrule\n\\end{tabular}'
print(latex_table)

In [None]:
robots = sorted(domain_results.keys())

header = ' & ' + ' & '.join(robots) + ' \\\\ \\midrule\n'
rows = []
for task in tasks:
    row = [task]
    for robot in robots:
        p = domain_results[robot].get(task, None)
        if p is None:
            cell = '--'
        elif p < alpha:
            cell = '\\textbf{{{:.3f}}}'.format(p)
        else:
            cell = '{:.3f}'.format(p)
        row.append(cell)
    rows.append(' & '.join(row) + ' \\\\')

latex_table_domains = '\\begin{tabular}{l' + 'c' * len(robots) + '}\n'
latex_table_domains += '\\toprule\nTask' + ' & ' + ' & '.join(robots) + ' \\\\ \\midrule\n'
latex_table_domains += '\n'.join(rows)
latex_table_domains += '\n\\bottomrule\n\\end{tabular}'
print(latex_table_domains)


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style("whitegrid")

entries_per_image = raw_df.groupby(['robot', 'domain', 'image_ref']).size().reset_index(name='num_entries')
domain_counts = entries_per_image.groupby(['domain', 'num_entries']).size().reset_index(name='count')

plt.figure(figsize=(12, 5))
ax = sns.barplot(
    data=domain_counts,
    x='num_entries',
    y='count',
    hue='domain',
    hue_order = ['Home', 'BigOffice-2', 'BigOffice-3', 'Hallway', 'MeetingRoom',
       'SmallOffice'],
    palette='viridis'
)
for p in ax.patches:
    height = p.get_height()
    if height > 0:
        ax.text(
            p.get_x() + p.get_width() / 2.,
            height + 0.5,  # Slightly above the bar
            int(height),
            ha='center',
            va='bottom',
            fontsize=11
        )
ax.set_ylim(0, domain_counts['count'].sum())

plt.xlabel('Number of Entries per Image')
plt.ylabel('Number of Images')
plt.title('Distribution of Number of Entries per Image by Domain')
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

duplicated_rows = raw_df.duplicated(keep=False)
duplicates_per_index = duplicated_rows.groupby([raw_df['robot'], raw_df['domain'], raw_df['image_ref']]).sum()
duplicates_per_index = duplicates_per_index.apply(lambda x: max(x - 1, 0))

duplicates_per_index = duplicates_per_index.reset_index(name='num_duplicates')
domain_counts = duplicates_per_index.groupby(['domain', 'num_duplicates']).size().reset_index(name='count')


plt.figure(figsize=(12, 6))
ax = sns.barplot(
    data=domain_counts,
    x='num_duplicates',
    y='count',
    hue='domain',
    hue_order = ['Home', 'BigOffice-2', 'BigOffice-3', 'Hallway', 'MeetingRoom',
       'SmallOffice'],
    palette='viridis'
)

for p in ax.patches:
    height = p.get_height()
    if height > 0:
        ax.text(
            p.get_x() + p.get_width() / 2.,
            height + 0.5,
            int(height),
            ha='center',
            va='bottom',
            fontsize=11
        )

# Set y-axis max to total number of duplicates
ax.set_ylim(0, domain_counts['count'].sum())

plt.xlabel('Number of Duplicates per Image')
plt.ylabel('Number of Images')
plt.title('Distribution of Number of Duplicates per Image by Domain')
plt.tight_layout()
plt.show()