In [17]:
import pandas as pd
import numpy as np
from scipy import stats
import statsmodels.api as sm
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from scipy.stats import chi2_contingency

# Load data
data = pd.read_csv('sniff_data.txt')

# Define state order
state_order = ['Idle_State', 'Approach_State', 'Conversation', 'Looking_for_Object', 'Navigate_to_Object']

In [20]:
def perform_statistical_analysis(data):
    print("=== STATISTICAL ANALYSIS OF ROBOT EXPERIMENT ===\n")
    
    # 1. Success Rates Analysis
    print("=== Overall Success Rates ===")
    for state in state_order:
        success_rate = data[state].mean() * 100
        ci = stats.norm.interval(0.95, loc=success_rate, scale=stats.sem(data[state]) * 100)
        print(f"{state}: {success_rate:.1f}% (95% CI: {ci[0]:.1f}% - {ci[1]:.1f}%)")
    print()

    # 2. Object Performance Analysis
    print("=== Object Performance Analysis ===")
    for obj in data['Object'].unique():
        obj_data = data[data['Object'] == obj]
        overall_success = obj_data[state_order].mean().mean() * 100
        print(f"{obj}: {overall_success:.1f}% success rate")
    
    # Perform ANOVA for object performance
    f_stat, p_val = stats.f_oneway(*[data[data['Object'] == obj][state_order].mean(axis=1) 
                                    for obj in data['Object'].unique()])
    print(f"\nANOVA test for object performance:")
    print(f"F-statistic: {f_stat:.3f}")
    print(f"p-value: {p_val:.3f}")
    if p_val < 0.05:
        print("Significant differences exist between objects.")
    else:
        print("No significant differences between objects.")
    print()

    # 3. Location Analysis
    print("=== Location Analysis ===")
    for loc in data['Location'].unique():
        loc_data = data[data['Location'] == loc]
        success_rate = loc_data[state_order].mean().mean() * 100
        print(f"Table {loc}: {success_rate:.1f}% success rate")
    
    # Chi-square test for location independence
    chi2, p_val, dof, expected = chi2_contingency(pd.crosstab(data['Location'], 
                                              (data[state_order].mean(axis=1) > 0.5).astype(int)))
    print(f"\nChi-square test for location independence:")
    print(f"Chi-square statistic: {chi2:.3f}")
    print(f"p-value: {p_val:.3f}")
    if p_val < 0.05:
        print("Location has a significant effect on success rates.")
    else:
        print("No significant effect of location on success rates.")
    print()

    # 4. State Transition Analysis
    print("=== State Transition Analysis ===")
    transitions = pd.DataFrame()
    for i in range(len(state_order)-1):
        current = state_order[i]
        next_state = state_order[i+1]
        transition_success = (data[current] & data[next_state]).mean() * 100
        print(f"{current} → {next_state}: {transition_success:.1f}% transition success rate")
    print()

    # 5. Correlation Analysis
    print("=== State Correlation Matrix ===")
    correlation_matrix = data[state_order].corr()
    print(correlation_matrix)
    
    # Visualize correlation matrix (optional)
    # import seaborn as sns
    # import matplotlib.pyplot as plt

    # plt.figure(figsize=(10, 8))
    # sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
    # plt.title('State Correlation Matrix')
    # plt.tight_layout()
    # plt.show()
    
    # 6. Logistic Regression Analysis
    print("\n=== Logistic Regression Analysis ===")
    
    # Predict final success (Navigate_to_Object) based on previous states
    X = data[state_order[:-1]]
    y = data['Navigate_to_Object']
    
    model = sm.Logit(y, sm.add_constant(X))
    
    try:
        results = model.fit()
        print(results.summary())
        
        # Extract odds ratios for interpretation
        odds_ratios = np.exp(results.params)
        print("\nOdds Ratios:")
        print(odds_ratios)
        
        return results
    
    except Exception as e:
        print("Logistic regression failed:", e)

In [21]:
# Run the analysis
results = perform_statistical_analysis(data)

=== STATISTICAL ANALYSIS OF ROBOT EXPERIMENT ===

=== Overall Success Rates ===
Idle_State: 65.0% (95% CI: 43.6% - 86.4%)
Approach_State: 100.0% (95% CI: nan% - nan%)
Conversation: 100.0% (95% CI: nan% - nan%)
Looking_for_Object: 80.0% (95% CI: 62.0% - 98.0%)
Navigate_to_Object: 55.0% (95% CI: 32.6% - 77.4%)

=== Object Performance Analysis ===
 ball: 100.0% success rate
 water_bottle: 56.7% success rate
 phone: 90.0% success rate
 backpack: 96.0% success rate
 bowl: 80.0% success rate

ANOVA test for object performance:
F-statistic: 3.014
p-value: 0.052
No significant differences between objects.

=== Location Analysis ===
Table 4: 80.0% success rate
Table 1: 86.7% success rate
Table 2: 85.0% success rate
Table 3: 68.0% success rate

Chi-square test for location independence:
Chi-square statistic: 0.104
p-value: 0.991
No significant effect of location on success rates.

=== State Transition Analysis ===
Idle_State → Approach_State: 65.0% transition success rate
Approach_State → Conver