In [11]:
import pandas as pd

In [12]:
def read_excel_multisheet(filename: str) -> pd.DataFrame:
    workbook = pd.ExcelFile(filename)
    sheets = workbook.sheet_names
    df = pd.concat([pd.read_excel(workbook, sheet_name=s, index_col="Unnamed: 0").sort_index()
                    .assign(source=s) for s in sheets])
    return df

В data_dict были дубликаты. Но чтобы ничего не ломать, его мы трогать не будем, а удалим дубликаты из сформированных на основе его датасетов:

In [None]:
fact_scores = pd.read_excel("../data_output_table/distractor_metrics/DistractorFactScores1.xlsx", engine="openpyxl")
gemini_scores = pd.read_excel("../data_output_table/auto_assess_distractors/gemini_results.xlsx", engine="openpyxl", index_col="Unnamed: 0")

In [14]:
fact_scores.shape, fact_scores["source"].value_counts()

((1561, 14),
 source
 BartDG_PM           167
 BartDG_ANPM         167
 BartDG              166
 ChatGPT4o           166
 true_distractors    166
 Deepseek            165
 RuRace_T5           160
 MuSeRC_GPT3         159
 RuRace_GPT3         156
 MuSeRC_T5            89
 Name: count, dtype: int64)

In [15]:
gemini_scores.shape, gemini_scores["source"].value_counts()

((1597, 5),
 source
 BartDG_PM           167
 BartDG_ANPM         167
 ChatGPT4o           167
 BartDG              166
 true_distractors    166
 RuRace_T5           165
 Deepseek            165
 MuSeRC_GPT3         159
 RuRace_GPT3         156
 MuSeRC_T5           119
 Name: count, dtype: int64)

In [16]:
fact_scores = fact_scores.drop_duplicates(subset=[
    "source", "reading_text", "distractor"
])
gemini_scores = gemini_scores.drop_duplicates(subset=[
    "source", "reading_text", "question", "distractor"
])

In [17]:
fact_scores.shape, fact_scores["source"].value_counts()

((1561, 14),
 source
 BartDG_PM           167
 BartDG_ANPM         167
 BartDG              166
 ChatGPT4o           166
 true_distractors    166
 Deepseek            165
 RuRace_T5           160
 MuSeRC_GPT3         159
 RuRace_GPT3         156
 MuSeRC_T5            89
 Name: count, dtype: int64)

In [18]:
gemini_scores.shape, gemini_scores["source"].value_counts()

((1561, 5),
 source
 BartDG_PM           167
 BartDG_ANPM         167
 BartDG              166
 ChatGPT4o           166
 true_distractors    166
 Deepseek            165
 RuRace_T5           160
 MuSeRC_GPT3         159
 RuRace_GPT3         156
 MuSeRC_T5            89
 Name: count, dtype: int64)

In [19]:
fact_scores.head()

Unnamed: 0.1,Unnamed: 0,source,vso_intersec_ind,vs_intersec_ind,vs_passivized_intersec_ind,vso_intersec,vs_intersec,vs_passivized_intersec,noun_intersec,propn_intersec,noun_intersec_ind,propn_intersec_ind,reading_text,distractor
0,0,BartDG,0,0,0,set(),set(),set(),{'родина'},set(),1,0,На задворках нашего села стояло на сваях длин...,Автор очень любил свою родину.
1,1,BartDG,0,0,0,set(),set(),set(),"{'скрипка', 'родина'}",set(),1,0,На задворках нашего села стояло на сваях длин...,Автор часто играл на скрипке на своей родине.
2,2,BartDG,0,0,0,set(),set(),set(),"{'война', 'жизнь'}",set(),1,0,На задворках нашего села стояло на сваях длин...,Соотечественник автора спас жизнь своему сыну ...
3,3,BartDG,0,0,0,set(),set(),set(),set(),set(),0,0,"Мы ехали берегом Лены на юг, а зима догоняла ...",Оба козла были горными козлами.
4,4,BartDG,0,0,0,set(),set(),set(),{'коза'},set(),1,0,"Мы ехали берегом Лены на юг, а зима догоняла ...","Старая коза, маленькая, была готова снова прыг..."


In [20]:
gemini_scores.head()

Unnamed: 0,source,reading_text,question,distractor,gemini_guess
0,BartDG,На задворках нашего села стояло на сваях длин...,Какое высказывание НЕ СООТВЕТСТВУЕТ тексту?,Автор очень любил свою родину.,1
1,BartDG,На задворках нашего села стояло на сваях длин...,Какое высказывание НЕ СООТВЕТСТВУЕТ тексту?,Автор часто играл на скрипке на своей родине.,0
2,BartDG,На задворках нашего села стояло на сваях длин...,Какое высказывание НЕ СООТВЕТСТВУЕТ тексту?,Соотечественник автора спас жизнь своему сыну ...,0
3,BartDG,"Мы ехали берегом Лены на юг, а зима догоняла ...",Какое высказывание НЕ СООТВЕТСТВУЕТ тексту?,Оба козла были горными козлами.,0
4,BartDG,"Мы ехали берегом Лены на юг, а зима догоняла ...",Какое высказывание НЕ СООТВЕТСТВУЕТ тексту?,"Старая коза, маленькая, была готова снова прыг...",0


In [21]:
scores_combined = pd.merge(
    gemini_scores[["source", "reading_text", "question", "distractor", "gemini_guess"]],
    fact_scores[
        ["source", "reading_text", "distractor",
         "vso_intersec_ind", "vs_intersec_ind", "vs_passivized_intersec_ind",
         "noun_intersec_ind", "propn_intersec_ind"
        ]
    ], on=["source", "reading_text", "distractor"]
)

In [None]:
scores_combined.to_excel("../data_for_comparison/corr/scores_combined.xlsx")

In [23]:
q_cols = ["gemini_guess", "vso_intersec_ind", "vs_intersec_ind", "vs_passivized_intersec_ind", "noun_intersec_ind", "propn_intersec_ind"]

In [24]:
source_totals = scores_combined.groupby("source")[q_cols].mean()
table2 = scores_combined.groupby(["source", "question"])[q_cols].mean()

for source in scores_combined["source"].unique():
    table2.loc[(source, "total"), :] = source_totals.loc[source]
    table2.loc[(source, "total"), "D/Q"] = len(scores_combined[scores_combined["source"] == source]) / 55
    table2.loc[(source, "Какое высказывание НЕ СООТВЕТСТВУЕТ тексту?"), "D/Q"] = len(
        scores_combined[(scores_combined["source"] == source) & (scores_combined["question"] == "Какое высказывание НЕ СООТВЕТСТВУЕТ тексту?")]
    ) / 35
    table2.loc[(source, "Какое высказывание СООТВЕТСТВУЕТ тексту?"), "D/Q"] = len(
        scores_combined[(scores_combined["source"] == source) & (scores_combined["question"] == "Какое высказывание СООТВЕТСТВУЕТ тексту?")]
    ) / 20

table2 = table2.sort_index()

table2

Unnamed: 0_level_0,Unnamed: 1_level_0,gemini_guess,vso_intersec_ind,vs_intersec_ind,vs_passivized_intersec_ind,noun_intersec_ind,propn_intersec_ind,D/Q
source,question,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
BartDG,total,0.5,0.024096,0.060241,0.006024,0.831325,0.228916,3.018182
BartDG,Какое высказывание НЕ СООТВЕТСТВУЕТ тексту?,0.254717,0.037736,0.084906,0.009434,0.867925,0.150943,3.028571
BartDG,Какое высказывание СООТВЕТСТВУЕТ тексту?,0.933333,0.0,0.016667,0.0,0.766667,0.366667,3.0
BartDG_ANPM,total,0.443114,0.011976,0.041916,0.0,0.862275,0.143713,3.036364
BartDG_ANPM,Какое высказывание НЕ СООТВЕТСТВУЕТ тексту?,0.188679,0.009434,0.04717,0.0,0.90566,0.084906,3.028571
BartDG_ANPM,Какое высказывание СООТВЕТСТВУЕТ тексту?,0.885246,0.016393,0.032787,0.0,0.786885,0.245902,3.05
BartDG_PM,total,0.461078,0.0,0.011976,0.005988,0.808383,0.137725,3.036364
BartDG_PM,Какое высказывание НЕ СООТВЕТСТВУЕТ тексту?,0.186916,0.0,0.018692,0.009346,0.906542,0.093458,3.057143
BartDG_PM,Какое высказывание СООТВЕТСТВУЕТ тексту?,0.95,0.0,0.0,0.0,0.633333,0.216667,3.0
ChatGPT4o,total,0.536145,0.006024,0.042169,0.0,0.873494,0.331325,3.018182


In [25]:
def map_question_texts(s: str) -> str:
    outp = s
    if s == "Какое высказывание СООТВЕТСТВУЕТ тексту?":
        outp = "СООТВ"
    elif s == "Какое высказывание НЕ СООТВЕТСТВУЕТ тексту?":
        outp = "НЕ СООТВ"
    return outp

table2 = table2.reset_index()
table2["question"] = table2["question"].map(map_question_texts)
table2 = table2.set_index(["source", "question"])

In [26]:
table2

Unnamed: 0_level_0,Unnamed: 1_level_0,gemini_guess,vso_intersec_ind,vs_intersec_ind,vs_passivized_intersec_ind,noun_intersec_ind,propn_intersec_ind,D/Q
source,question,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
BartDG,total,0.5,0.024096,0.060241,0.006024,0.831325,0.228916,3.018182
BartDG,НЕ СООТВ,0.254717,0.037736,0.084906,0.009434,0.867925,0.150943,3.028571
BartDG,СООТВ,0.933333,0.0,0.016667,0.0,0.766667,0.366667,3.0
BartDG_ANPM,total,0.443114,0.011976,0.041916,0.0,0.862275,0.143713,3.036364
BartDG_ANPM,НЕ СООТВ,0.188679,0.009434,0.04717,0.0,0.90566,0.084906,3.028571
BartDG_ANPM,СООТВ,0.885246,0.016393,0.032787,0.0,0.786885,0.245902,3.05
BartDG_PM,total,0.461078,0.0,0.011976,0.005988,0.808383,0.137725,3.036364
BartDG_PM,НЕ СООТВ,0.186916,0.0,0.018692,0.009346,0.906542,0.093458,3.057143
BartDG_PM,СООТВ,0.95,0.0,0.0,0.0,0.633333,0.216667,3.0
ChatGPT4o,total,0.536145,0.006024,0.042169,0.0,0.873494,0.331325,3.018182


In [27]:
table2 = table2.drop(["vs_passivized_intersec_ind"], axis="columns")

In [28]:
table2 = table2.rename(columns={"vso_intersec_ind": "FullNPMatch", "vs_intersec_ind": "SubjNPMatch", "noun_intersec_ind": "NounMatch", "propn_intersec_ind": "PropnMatch"})
table2

Unnamed: 0_level_0,Unnamed: 1_level_0,gemini_guess,FullNPMatch,SubjNPMatch,NounMatch,PropnMatch,D/Q
source,question,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
BartDG,total,0.5,0.024096,0.060241,0.831325,0.228916,3.018182
BartDG,НЕ СООТВ,0.254717,0.037736,0.084906,0.867925,0.150943,3.028571
BartDG,СООТВ,0.933333,0.0,0.016667,0.766667,0.366667,3.0
BartDG_ANPM,total,0.443114,0.011976,0.041916,0.862275,0.143713,3.036364
BartDG_ANPM,НЕ СООТВ,0.188679,0.009434,0.04717,0.90566,0.084906,3.028571
BartDG_ANPM,СООТВ,0.885246,0.016393,0.032787,0.786885,0.245902,3.05
BartDG_PM,total,0.461078,0.0,0.011976,0.808383,0.137725,3.036364
BartDG_PM,НЕ СООТВ,0.186916,0.0,0.018692,0.906542,0.093458,3.057143
BartDG_PM,СООТВ,0.95,0.0,0.0,0.633333,0.216667,3.0
ChatGPT4o,total,0.536145,0.006024,0.042169,0.873494,0.331325,3.018182


In [29]:
table2["gemini_guess"] = table2["gemini_guess"] * 100
table2["FullNPMatch"] = table2["FullNPMatch"] * 100
table2["SubjNPMatch"] = table2["SubjNPMatch"] * 100
table2["NounMatch"] = table2["NounMatch"] * 100
table2["PropnMatch"] = table2["PropnMatch"] * 100

In [None]:
table2.to_excel("../data_output_table/tables_for_manuscript/Table2-1.xlsx", float_format="%.2f")