In [None]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Data for Setup 1
data1 = {
    'Dataset': ['wikipedia', 'review', 'coin', 'comment', 'flight'],
    'TGN': [0.4854, 0.4029, 0.4315, 0.3901, 0.1797],
    'DyRep': [0.1238, 0.4012, 0.3057, 0.1317, 0.1451],
    'DyGFormer': [0.8162, 0.6495, 0.6752, 0.4919, 0.2709],
    'GraphMixer': [0.4624, 0.5120, 0.4725, 0.4489, 0.1715],
    'TCL': [0.5069, 0.3866, 0.4626, 0.4183, 0.1110],
    'TGAT': [0.4927, 0.3678, 0.3930, 0.4098, 0.1233],
    'CAWN': [0.7353, 0.4580, 0.3774, 0.2535, 0.2497]
}

setup1 = pd.DataFrame(data1)

# Data for Setup 2
data2 = {
    'Dataset': ['wikipedia', 'review', 'coin', 'comment', 'flight'],
    'TGN': [0.7172, 0.3934, 0.4396, 0.3419, 0.1914],
    'DyRep': [0.5390, 0.3837, 0.3480, 0.1264, 0.1693],
    'DyGFormer': [0.8164, 0.4615, 0.6207, 0.3202, 0.2738],
    'GraphMixer': [0.6645, 0.4330, 0.4719, 0.4011, 0.1861],
    'TCL': [0.7338, 0.3647, 0.4628, 0.2521, 0.1441],
    'TGAT': [0.6808, 0.3372, 0.4109, 0.2496, 0.1526],
    'CAWN': [0.7588, 0.4318, 0.3798, 0.1978, 0.2553]
}

setup2 = pd.DataFrame(data2)

# Data for Setup 3
data3 = {
    'Dataset': ['wikipedia', 'review', 'coin', 'comment', 'flight'],
    'TGN': [0.8930, None, 0.4603, 0.3714, 0.2461],
    'DyRep': [0.8372, None, 0.4224, 0.0474, 0.2300],
    'DyGFormer': [0.9059, 0.8262, 0.6422, 0.4981, 0.3190],
    'GraphMixer': [0.9184, 0.5005, 0.5075, 0.4483, 0.2608],
    'TCL': [0.9160, 0.3994, 0.4894, 0.4176, 0.2403],
    'TGAT': [0.9159, 0.3633, 0.4629, 0.4095, 0.2447],
    'CAWN': [0.9037, 0.4458, 0.5400, 0.3498, 0.3181 ]
}

setup3 = pd.DataFrame(data3)

# Add a 'Setup' column to each DataFrame
setup1['Setup'] = 'Setup1'
setup2['Setup'] = 'Setup2'
setup3['Setup'] = 'Setup3'

# Melt the DataFrames to long format
setup1_melted = setup1.melt(id_vars=['Dataset', 'Setup'], var_name='Model', value_name='MRR')
setup2_melted = setup2.melt(id_vars=['Dataset', 'Setup'], var_name='Model', value_name='MRR')
setup3_melted = setup3.melt(id_vars=['Dataset', 'Setup'], var_name='Model', value_name='MRR')

# Combine the melted DataFrames
combined_data = pd.concat([setup1_melted, setup3_melted])

# Perform ANOVA
model = ols('MRR ~ C(Setup) + C(Model) + C(Dataset) + C(Setup):C(Model) + C(Model):C(Dataset) + C(Setup):C(Dataset)', data=combined_data).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print(anova_table)


In [None]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Constructing DataFrame directly from data
data = {
    'Dataset': ['wikipedia', 'reddit', 'coin', 'comment', 'flight',
                'wikipedia', 'reddit', 'coin', 'comment', 'flight'],
    'MRR': [0.9184, 0.8262, 0.6422, 0.4981, 0.3190,
            0.8095, 0.7596, 0.6208, 0.4124, 0.3112],
    'Setup': ['Setup1', 'Setup1', 'Setup1', 'Setup1', 'Setup1',
              'Setup2', 'Setup2', 'Setup2', 'Setup2', 'Setup2']
}

df = pd.DataFrame(data)

# Ensure categorical data types
df['Setup'] = df['Setup'].astype('category')
df['Dataset'] = df['Dataset'].astype('category')


# Perform ANOVA
model = ols('MRR ~ C(Setup) + C(Dataset)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=1)
print(anova_table)
