In [17]:
# Copyright (C) 2023, Princeton University.
# This source code is licensed under the BSD 3-Clause license found in the LICENSE file in the root directory of this source tree.

# Authors: Karhan Kayan

import pandas as pd
import statsmodels.api as sm


def A_count_proportion(df):
    A_count = 0
    B_count = 0
    for index, _ in df.iterrows():
        row = df.iloc[index]
        if "Program A" in row['Answer.category.label']:
            A_count += 1
        elif "Program B" in row['Answer.category.label']:
            B_count += 1
    number_of_successes = A_count  # number of times program A (or B) was chosen as more realistic
    n = (A_count+B_count) # total number of submissions

    # Confidence level: 99%
    confidence_level = 0.99
    alpha = 1 - confidence_level

    # Calculate the confidence interval
    ci_low, ci_upp = sm.stats.proportion_confint(number_of_successes, n, alpha=alpha, method='binom_test')

    return A_count/(A_count+B_count),ci_low, ci_upp

def B_count_proportion(df):
    A_count = 0
    B_count = 0
    for index, _ in df.iterrows():
        row = df.iloc[index]
        if "Program A" in row['Answer.category.label']:
            A_count += 1
        elif "Program B" in row['Answer.category.label']:
            B_count += 1
    number_of_successes = B_count  # number of times program A (or B) was chosen as more realistic
    n = (A_count+B_count) # total number of submissions

    # Confidence level: 99%
    confidence_level = 0.99
    alpha = 1 - confidence_level

    # Calculate the confidence interval
    ci_low, ci_upp = sm.stats.proportion_confint(number_of_successes, n, alpha=alpha, method='binom_test')

    return B_count/(A_count+B_count),ci_low, ci_upp

def count_errors(df):
    error_count = 0
    not_sure_count = 0
    for index, _ in df.iterrows():
        row = df.iloc[index]
        if 'Yes' in row['Answer.category.label']:
            error_count += 1
        if 'Not Sure' in row['Answer.category.label']:
            not_sure_count += 1
    return error_count/(df.shape[0]-not_sure_count)

In [24]:
src = './results/infinigen-ATISS-first-person-layout-realism.csv'
df = pd.read_csv(src)
print(f'Infinigen first person layout is more realistic than ATISS {A_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {A_count_proportion(df)[1]:.3f} - {A_count_proportion(df)[2]:.3f}')

src = './results/infinigen-ATISS-first-person-realism.csv'
df = pd.read_csv(src)
print(f'Infinigen first person is more realistic than ATISS {A_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {A_count_proportion(df)[1]:.3f} - {A_count_proportion(df)[2]:.3f}')


src = './results/infinigen-sceneformer-first-person-layout-realism.csv'
df = pd.read_csv(src)
print(f'Infinigen first person layout is more realistic than Sceneformer {A_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {A_count_proportion(df)[1]:.3f} - {A_count_proportion(df)[2]:.3f}')


src = './results/infinigen-sceneformer-first-person-realism.csv'    
df = pd.read_csv(src)
print(f'Infinigen first person is more realistic than Sceneformer {A_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {A_count_proportion(df)[1]:.3f} - {A_count_proportion(df)[2]:.3f}')


src = './results/infinigen-fastsynth-first-person-layout-realism.csv'
df = pd.read_csv(src)
print(f'Infinigen first person layout is more realistic than FastSynth {A_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {A_count_proportion(df)[1]:.3f} - {A_count_proportion(df)[2]:.3f}')



src = './results/infinigen-fastsynth-first-person-realism.csv'
df = pd.read_csv(src)
print(f'Infinigen first person is more realistic than FastSynth {A_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {A_count_proportion(df)[1]:.3f} - {A_count_proportion(df)[2]:.3f}')

src = './results/infinigen-procthor-first-person-layout-realism.csv'
df = pd.read_csv(src)
print(f'Infinigen first person layout is more realistic than Procthor {A_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {A_count_proportion(df)[1]:.3f} - {A_count_proportion(df)[2]:.3f}')

src = './results/infinigen-procthor-first-person-realism.csv'
df = pd.read_csv(src)
print(f'Infinigen first person is more realistic than Procthor {A_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {A_count_proportion(df)[1]:.3f} - {A_count_proportion(df)[2]:.3f}')




src = './results/infinigen-ATISS-overhead-layout-realism.csv'
df = pd.read_csv(src)
print(f'Infinigen overhead layout is more realistic than ATISS {A_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {A_count_proportion(df)[1]:.3f} - {A_count_proportion(df)[2]:.3f}')

src = './results/infinigen-ATISS-overhead-realism.csv'
df = pd.read_csv(src)
print(f'Infinigen overhead is more realistic than ATISS {A_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {A_count_proportion(df)[1]:.3f} - {A_count_proportion(df)[2]:.3f}')


src = './results/infinigen-sceneformer-overhead-layout-realism.csv'
df = pd.read_csv(src)
print(f'Infinigen overhead layout is more realistic than Sceneformer {A_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {A_count_proportion(df)[1]:.3f} - {A_count_proportion(df)[2]:.3f}')

src = './results/infinigen-sceneformer-overhead-realism.csv'
df = pd.read_csv(src)
print(f'Infinigen overhead is more realistic than Sceneformer {A_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {A_count_proportion(df)[1]:.3f} - {A_count_proportion(df)[2]:.3f}')

src = './results/infinigen-fastsynth-overhead-layout-realism.csv'
df = pd.read_csv(src)
print(f'Infinigen overhead layout is more realistic than FastSynth {A_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {A_count_proportion(df)[1]:.3f} - {A_count_proportion(df)[2]:.3f}')

src = './results/infinigen-fastsynth-overhead-realism.csv'
df = pd.read_csv(src)
print(f'Infinigen overhead is more realistic than FastSynth {A_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {A_count_proportion(df)[1]:.3f} - {A_count_proportion(df)[2]:.3f}')



Infinigen first person layout is more realistic than ATISS 0.693 of the time. 99% confidence interval: 0.590 - 0.783
Infinigen first person is more realistic than ATISS 0.713 of the time. 99% confidence interval: 0.611 - 0.802
Infinigen first person layout is more realistic than Sceneformer 0.560 of the time. 99% confidence interval: 0.453 - 0.661
Infinigen first person is more realistic than Sceneformer 0.667 of the time. 99% confidence interval: 0.561 - 0.759
Infinigen first person layout is more realistic than FastSynth 0.853 of the time. 99% confidence interval: 0.766 - 0.917
Infinigen first person is more realistic than FastSynth 0.907 of the time. 99% confidence interval: 0.829 - 0.954
Infinigen first person layout is more realistic than Procthor 0.944 of the time. 99% confidence interval: 0.873 - 0.979
Infinigen first person is more realistic than Procthor 0.893 of the time. 99% confidence interval: 0.813 - 0.946
Infinigen overhead layout is more realistic than ATISS 0.393 of th

In [20]:
src = './results/infinigen-first-person-errors.csv'
df = pd.read_csv(src)
print(f'Infinigen first person has {count_errors(df):.3f} errors')

Infinigen first person has 0.175 errors


In [21]:
src = './results/infinigen-ATISS-first-person-layout-realism.csv'
df = pd.read_csv(src)
print(f'ATISS first person layout is more realistic than Infinigen {B_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {B_count_proportion(df)[1]:.3f} - {B_count_proportion(df)[2]:.3f}')

src = './results/infinigen-ATISS-first-person-realism.csv'
df = pd.read_csv(src)
print(f'ATISS first person is more realistic than Infinigen {B_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {B_count_proportion(df)[1]:.3f} - {B_count_proportion(df)[2]:.3f}')


src = './results/infinigen-sceneformer-first-person-layout-realism.csv'
df = pd.read_csv(src)
print(f'Sceneformer first person layout is more realistic than Infinigen {B_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {B_count_proportion(df)[1]:.3f} - {B_count_proportion(df)[2]:.3f}')


src = './results/infinigen-sceneformer-first-person-realism.csv'    
df = pd.read_csv(src)
print(f'Sceneformer first person is more realistic than Infinigen {B_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {B_count_proportion(df)[1]:.3f} - {B_count_proportion(df)[2]:.3f}')


src = './results/infinigen-fastsynth-first-person-layout-realism.csv'
df = pd.read_csv(src)
print(f'FastSynth first person layout is more realistic than Infinigen {B_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {B_count_proportion(df)[1]:.3f} - {B_count_proportion(df)[2]:.3f}')


src = './results/infinigen-fastsynth-first-person-realism.csv'
df = pd.read_csv(src)
print(f'FastSynth first person is more realistic than Infinigen {B_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {B_count_proportion(df)[1]:.3f} - {B_count_proportion(df)[2]:.3f}')


src = './results/infinigen-procthor-first-person-layout-realism.csv'
df = pd.read_csv(src)
print(f'Procthor first person layout is more realistic than Infinigen {B_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {B_count_proportion(df)[1]:.3f} - {B_count_proportion(df)[2]:.3f}')

src = './results/infinigen-procthor-first-person-realism.csv'
df = pd.read_csv(src)
print(f'Procthor first person is more realistic than Infinigen {B_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {B_count_proportion(df)[1]:.3f} - {B_count_proportion(df)[2]:.3f}')




src = './results/infinigen-ATISS-overhead-layout-realism.csv'
df = pd.read_csv(src)
print(f'ATISS overhead layout is more realistic than Infinigen {B_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {B_count_proportion(df)[1]:.3f} - {B_count_proportion(df)[2]:.3f}')

src = './results/infinigen-ATISS-overhead-realism.csv'
df = pd.read_csv(src)
print(f'ATISS overhead is more realistic than Infinigen {B_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {B_count_proportion(df)[1]:.3f} - {B_count_proportion(df)[2]:.3f}')


src = './results/infinigen-sceneformer-overhead-layout-realism.csv'
df = pd.read_csv(src)
print(f'sceneformer overhead layout is more realistic than Infinigen {B_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {B_count_proportion(df)[1]:.3f} - {B_count_proportion(df)[2]:.3f}')

src = './results/infinigen-sceneformer-overhead-realism.csv'
df = pd.read_csv(src)
print(f'sceneformer overhead is more realistic than Infinigen {B_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {B_count_proportion(df)[1]:.3f} - {B_count_proportion(df)[2]:.3f}')

src = './results/infinigen-fastsynth-overhead-layout-realism.csv'
df = pd.read_csv(src)
print(f'fastsynth overhead layout is more realistic than Infinigen {B_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {B_count_proportion(df)[1]:.3f} - {B_count_proportion(df)[2]:.3f}')

src = './results/infinigen-fastsynth-overhead-realism.csv'
df = pd.read_csv(src)
print(f'fastsynth overhead is more realistic than Infinigen {B_count_proportion(df)[0]:.3f} of the time. 99% confidence interval: {B_count_proportion(df)[1]:.3f} - {B_count_proportion(df)[2]:.3f}')



ATISS first person layout is more realistic than Infinigen 0.307 of the time. 99% confidence interval: 0.217 - 0.410
ATISS first person is more realistic than Infinigen 0.287 of the time. 99% confidence interval: 0.198 - 0.389
Sceneformer first person layout is more realistic than Infinigen 0.440 of the time. 99% confidence interval: 0.339 - 0.547
Sceneformer first person is more realistic than Infinigen 0.333 of the time. 99% confidence interval: 0.241 - 0.439
FastSynth first person layout is more realistic than Infinigen 0.147 of the time. 99% confidence interval: 0.083 - 0.234
Procthor first person layout is more realistic than Infinigen 0.056 of the time. 99% confidence interval: 0.021 - 0.127
Procthor first person is more realistic than Infinigen 0.107 of the time. 99% confidence interval: 0.054 - 0.187
FastSynth first person is more realistic than Infinigen 0.093 of the time. 99% confidence interval: 0.046 - 0.171
ATISS overhead layout is more realistic than Infinigen 0.607 of th

In [22]:
src = './results/procthor-first-person-errors.csv'
df = pd.read_csv(src)
print(f'procthor first person has {count_errors(df):.3f} errors')

procthor first person has 0.252 errors


In [27]:

def A_count_proportion(dataframes):
    A_count = 0
    total_B_count = 0
    for df in dataframes:
        B_count = 0
        for index, row in df.iterrows():
            if "Program A" in row['Answer.category.label']:
                A_count += 1
            else:  # Assuming any other label is a different Program B
                B_count += 1
        total_B_count += B_count

    number_of_successes = A_count  # number of times program A was chosen as more realistic
    n = A_count + total_B_count  # total number of submissions

    # Confidence level: 99%
    confidence_level = 0.99
    alpha = 1 - confidence_level

    # Calculate the confidence interval
    ci_low, ci_upp = sm.stats.proportion_confint(number_of_successes, n, alpha=alpha, method='binom_test')

    return A_count / n, ci_low, ci_upp

src = './results/infinigen-ATISS-first-person-realism.csv'
df1 = pd.read_csv(src)

src = './results/infinigen-sceneformer-first-person-realism.csv'    
df2 = pd.read_csv(src)

src = './results/infinigen-fastsynth-first-person-realism.csv'
df3 = pd.read_csv(src)

src = './results/infinigen-procthor-first-person-realism.csv'
df4 = pd.read_csv(src)

A_count_proportion([df1,df2,df3,df4])

(0.795, 0.7496838235735617, 0.8348538077362546)

In [28]:
src = './results/infinigen-ATISS-first-person-layout-realism.csv'
df1 = pd.read_csv(src)

src = './results/infinigen-sceneformer-first-person-layout-realism.csv'    
df2 = pd.read_csv(src)

src = './results/infinigen-fastsynth-first-person-layout-realism.csv'
df3 = pd.read_csv(src)

src = './results/infinigen-procthor-first-person-layout-realism.csv'
df4 = pd.read_csv(src)

A_count_proportion([df1,df2,df3,df4])

(0.7601351351351351, 0.7124366959489067, 0.8029941663408575)