## Obtain text format ANOVA results


0. adjust if necessary

In [44]:
alpha_level = 0.05

1. export all SPSS ANOVA result tables "Tests der Innersubjekteffekte" as .txt files  
(right-click on table and select "Exportieren...")
2. Select the file by running the cell below:

In [2]:
import tkinter as tk
from tkinter import filedialog
from pathlib import Path

# hiding the root tkinter window (not needed)
root = tk.Tk()
root.withdraw()
# open the file selector
filename = filedialog.askopenfilename(initialdir = "R:/AG-Beste-Studien/",
                                      title = "Select file",
                                      filetypes = (('Text files','*.txt'),("all files","*.*")))
filename

3. load in the data by running the cell below

In [4]:
import pandas as pd

ANOVA_df = pd.read_csv(filename, skiprows=3, skipfooter=3, delimiter='|', encoding='utf8', decimal=',', engine='python')
# get rid of spaces in column names
ANOVA_df.columns = [c.rstrip() for c in ANOVA_df.columns.tolist()]
# remove the lines in between
ANOVA_df = ANOVA_df[ANOVA_df["Quelle"] != "----------------------"]

4. name the factors as appearing in the article, by running the cell below:

In [5]:
# get the factor names as list, store as dict
factors = set([n.rstrip() for n in ANOVA_df["Unnamed: 0"].tolist()])
factors_text = {}
for f in factors:
    if len(f) > 0 and " * " not in f and not f.startswith("Fehler("):
        factors_text[f] = input(f"Enter a name for the factor '{f}':")
factors_text

Enter a name for the factor 'pre_post':Time Interval
Enter a name for the factor 'NGfreq_hi_lo':Nogo Frequency
Enter a name for the factor 'NG_G':Trial Type


5. run the cell below and copy the output as necessary in your manuscript

In [69]:
# display factor names for all rows
sources = [s.rstrip() for s in ANOVA_df["Unnamed: 0"].tolist()]
for i, source in enumerate(sources[1:]):
    sources[i+1] = sources[i] if len(sources[i+1]) == 0 else sources[i+1]
ANOVA_df["Unnamed: 0"] = sources

# limit table to Greenhouse-Geisser correction
ANOVA_df["Quelle"] = [e.rstrip() for e in ANOVA_df["Quelle"].tolist()]
ANOVA_GG_df = ANOVA_df[ANOVA_df["Quelle"] == "Greenhouse-Geisser"]

# split the error table
ANOVA_GG_df["Sig."] = [e.rstrip() for e in ANOVA_GG_df["Sig."].tolist()]
ANOVA_GG_effects_df = ANOVA_GG_df[ANOVA_GG_df["Sig."] != ""]
ANOVA_GG_errors_df = ANOVA_GG_df[ANOVA_GG_df["Sig."] == ""]
ANOVA_GG_errors_df.set_index("Unnamed: 0", inplace=True)

# sort non-significant table to p values
ANOVA_GG_effects_df["Sig."] = [float(v) for v in ANOVA_GG_effects_df["Sig."].tolist()]
ANOVA_GG_sig_df = ANOVA_GG_effects_df[ANOVA_GG_effects_df["Sig."] <= 0.05]
ANOVA_GG_sig_df.index = list(range(len(ANOVA_GG_sig_df)))
ANOVA_GG_nonsig_df = ANOVA_GG_effects_df[ANOVA_GG_effects_df["Sig."] > 0.05]
ANOVA_GG_nonsig_df.sort_values("Sig.", inplace=True)
ANOVA_GG_nonsig_df.index = list(range(len(ANOVA_GG_nonsig_df)))

# setup the html writer function
from IPython.display import HTML
def write_html_line(effect_name, df1, df2, F_val, p_val, eta_val):
    # prepare standard strings
    all_other = ""
    eta_part = "; &#951;<sub>p</sub><sup>2</sup> = " + eta_val
    p_sign = "="
    F_sign = "="
    # special cases
    if p_val < 0.001:
        p_sign = "&#60;"
        p_val = ".001"
    elif p_val > alpha_level:
        # non significant:
        p_sign = "&#8805;"
        F_sign = "&#8804;"
        all_other = "All other "
        eta_part = ""
    # formatting
    df1 = '%g'%(df1)
    df2 = '%g'%(df2)
    html_text = """
    <b>{effect_name}</b> <br>
    {all_other}F({df1}, {df2}) {F_sign} {F_val}; p {p_sign} {p_val}{eta_part} <br>
    """
    # include the variables
    return html_text.format(effect_name=effect_name, all_other=all_other, df1=df1, df2=df2, F_sign=F_sign, F_val=F_val, p_sign=p_sign, p_val=p_val, eta_part=eta_part)

# gather the important infos

display(HTML('<b>SIGNIFICANT RESULTS</b>'))

for i in range(len(ANOVA_GG_sig_df)):
    factor_name = " &#215 ".join([factors_text[p] for p in ANOVA_GG_sig_df["Unnamed: 0"][i].split(" * ")])
    html_sig = write_html_line(factor_name, 
        float(ANOVA_GG_sig_df["df"][i]), 
        float(ANOVA_GG_errors_df["df"]["Fehler(" + "*".join(ANOVA_GG_sig_df["Unnamed: 0"][i].split(" * ")) + ")"]),
        float(ANOVA_GG_sig_df["F"][i]), 
        ANOVA_GG_sig_df["Sig."][i], 
        ANOVA_GG_sig_df["Partielles Eta-Quadrat"][i]
       )
    display(HTML(html_sig))

display(HTML('<b>NON-SIGNIFICANT RESULTS</b>'))

factor_name = " &#215 ".join([factors_text[p] for p in ANOVA_GG_nonsig_df["Unnamed: 0"][0].split(" * ")])
html_nonsig = write_html_line(factor_name, 
    float(ANOVA_GG_nonsig_df["df"][0]), 
    float(ANOVA_GG_errors_df["df"]["Fehler(" + "*".join(ANOVA_GG_nonsig_df["Unnamed: 0"][0].split(" * ")) + ")"]),
    float(ANOVA_GG_nonsig_df["F"][0]), 
    ANOVA_GG_nonsig_df["Sig."][0], 
    ANOVA_GG_nonsig_df["Partielles Eta-Quadrat"][0]
   )
display(HTML(html_nonsig))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ANOVA_GG_df["Sig."] = [e.rstrip() for e in ANOVA_GG_df["Sig."].tolist()]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ANOVA_GG_effects_df["Sig."] = [float(v) for v in ANOVA_GG_effects_df["Sig."].tolist()]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ANOVA_GG_nonsig_df.sort_values("Sig.", inplace=True)
