# Stats for the paper

```TODOs:```
- [x] For each system: average faithful, average gramm, average coverage
- [x] For each system: distribution over the merging categories (we’re mostly interested in the reference system)
- [x] For each MERGE CATEGORY: average faithful, average gramm (over all systems except reference)
  - Imbalanced concat: 90% faithful, 90% gramm
  - Replacement: 60% faithful, 95% gramm
  - Balanced concat: …
  - Other: …
- [ ] Distribution over the merging categories (only on uncoverage summ sents -- consider a summ sent to be uncoverage if >= 51% of the responses were NO for coverage)
- [ ] For each system: 3 summ sents that are unfaithful, 3 summ sents that are ungramm, 3 summ sents that are uncoverage


In [1]:
"""Gathers all statistics for paper and makes plots."""
import itertools

import pandas as pd

processed = pd.read_csv("processed.csv")
processed = processed[processed["Approve"] == "x"]
processed.set_index(["Answer.article_hash", "WorkerId"], inplace=True)

systems = ["bottom-up", "reference", "dca", "pg", "novel", "abs-rl-rerank"]
n_sys = len(systems)

merge_d = {
    "Bal. Concat.": "bc",
    "Imbal. Concat.": "ic",
    "Replacement": "re",
    "Other": "ot",
}

n_systems = [f"n_{sys}" for sys in systems]
sys_gramm = [f"gramm_{sys}" for sys in systems]
sys_faith = [f"faith_{sys}" for sys in systems]
sys_cover = [f"cover_{sys}" for sys in systems]
sys_merge = [f"merge_{sys}_{opt}" for sys, opt in itertools.product(systems, merge_d.values())]

---
## Averages

### Tabular output

In [2]:
averages = pd.DataFrame(columns=["system", "gramm", "faith", "cover"])
averages["system"] = systems
for idx, row in averages.iterrows():
    sys = row["system"]
    for col in ["gramm", "faith", "cover"]:
        averages.iloc[idx][col] = processed[f"{col}_{sys}"].sum() / processed[f"n_{sys}"].sum()
averages

Unnamed: 0,system,gramm,faith,cover
0,bottom-up,0.788809,0.568592,0.785199
1,reference,0.915567,0.883905,0.74934
2,dca,0.723577,0.469512,0.626016
3,pg,0.846154,0.769231,0.895105
4,novel,0.785494,0.584877,0.753086
5,abs-rl-rerank,0.775862,0.689655,0.827586


### Graphical output

---
## Distribution over Merging

### Tabular output

In [3]:
merge_opts = list(merge_d.values())
merge_dist = pd.DataFrame(columns=["system"] + merge_opts)
merge_dist["system"] = systems
for idx, row in merge_dist.iterrows():
    sys = row["system"]
    for col in merge_opts:
        merge_dist.iloc[idx][col] = processed[f"merge_{sys}_{col}"].sum() / processed[f"n_{sys}"].sum()
    total = row[merge_opts].sum()
    assert total >= 1 - 1e-16, total
merge_dist

Unnamed: 0,system,bc,ic,re,ot
0,bottom-up,0.315884,0.379061,0.111913,0.193141
1,reference,0.313984,0.422164,0.0738786,0.189974
2,dca,0.229675,0.365854,0.0934959,0.310976
3,pg,0.398601,0.405594,0.111888,0.0839161
4,novel,0.274691,0.382716,0.132716,0.209877
5,abs-rl-rerank,0.396552,0.344828,0.12069,0.137931


### Graphical output

---
## Averages over Merging

### Tabular output

In [7]:
averages_over_merge = pd.DataFrame(columns=["system", "merge", "gramm", "faith", "cover"])
averages_over_merge["system"] = [t[0] for t in itertools.product(systems, merge_opts)]
averages_over_merge["merge" ] = [t[1] for t in itertools.product(systems, merge_opts)]

for idx, row in averages_over_merge.iterrows():
    sys, opt = row[["system", "merge"]]
    for col in ["gramm", "faith", "cover"]:
        mask = (processed[f"merge_{sys}_{opt}"] > 0)
        averages_over_merge.iloc[idx][col] = processed[mask][f"{col}_{sys}"].sum() / processed[f"n_{sys}"].sum()

averages_over_merge

Unnamed: 0,system,merge,gramm,faith,cover
0,bottom-up,bc,0.380866,0.33213,0.406137
1,bottom-up,ic,0.442238,0.34657,0.453069
2,bottom-up,re,0.16065,0.0830325,0.146209
3,bottom-up,ot,0.216606,0.0956679,0.162455
4,reference,bc,0.300792,0.311346,0.295515
5,reference,ic,0.385224,0.379947,0.345646
6,reference,re,0.0580475,0.060686,0.0474934
7,reference,ot,0.171504,0.131926,0.060686
8,dca,bc,0.270325,0.23374,0.276423
9,dca,ic,0.420732,0.300813,0.380081


### Graphical output

---
## "Uncoverage" Distribution over Merging

### Tabular output

### Graphical output

---
## 3 Examples (per system) of `[unfaithful, ungrammatical, uncoverage]`

In [8]:
# TODO: rework to be able to select out exact examples
from IPython.display import HTML
for sys in systems:
    unfaithful = processed.groupby(f"gramm_{sys}")
    print(unfaithful.groups)
    display(HTML())

{0: MultiIndex(levels=[['010d1a29e1b462d4ba059f17faa95db8bc826b7166a92e04c68bfb64d6e6657f', '05ab66adb741a3e1e394ac4b60a50d79aae1a71ca28641c7d143cb7d16a080ae', '05b13df5c83e7d9d29621e9b72c5ac3f6d9c214a4747085defd0eb7ccded1043', '0797a62afcb35c7d266972271c076b47007299e14a14cf81655ef8980ec85275', '087674feadd6d9a73358fba161ac3b2fdb0ca1c898fe4c4e461634f8852c585a', '098607059b15cea5f6323b08c0178fcc360670cc0004fa2670a9da678575d5f7', '0d03720aa3cab6c9d10371db976b201782b409c0a896aeb102084a455a134aed', '12c7fab063dd6c58d625905f4b4b208a3e91cf914a71e341642b739fea33216f', '1396397c1af88c7f6d6b4041cdcf462445a66fb9298857f3f48843cf2737a173', '148dbf0c56e396d343cb8c6fbc0f4ff5395ed24790e706baffb3fa57ee84dda3', '1500e78639872af23f8198a8eb2a4360cbbe23098d351688eb0f5af84e36b2b8', '1c6e3f52ecd23767f46b0aed0cb598bedb0acaf1169beef14f9c90f556324177', '1cbf392c745b5c51e585c11444b50f8af9e0d133e3fd57f4b851efb21ef58dd6', '1e04f3784e515a20f0eaf93ba514998c3c9562e13c3457d489d0b8d2981ca9c8', '22fb196e001be5ce965dd94

<IPython.core.display.HTML object>

{0: MultiIndex(levels=[['010d1a29e1b462d4ba059f17faa95db8bc826b7166a92e04c68bfb64d6e6657f', '05ab66adb741a3e1e394ac4b60a50d79aae1a71ca28641c7d143cb7d16a080ae', '05b13df5c83e7d9d29621e9b72c5ac3f6d9c214a4747085defd0eb7ccded1043', '0797a62afcb35c7d266972271c076b47007299e14a14cf81655ef8980ec85275', '087674feadd6d9a73358fba161ac3b2fdb0ca1c898fe4c4e461634f8852c585a', '098607059b15cea5f6323b08c0178fcc360670cc0004fa2670a9da678575d5f7', '0d03720aa3cab6c9d10371db976b201782b409c0a896aeb102084a455a134aed', '12c7fab063dd6c58d625905f4b4b208a3e91cf914a71e341642b739fea33216f', '1396397c1af88c7f6d6b4041cdcf462445a66fb9298857f3f48843cf2737a173', '148dbf0c56e396d343cb8c6fbc0f4ff5395ed24790e706baffb3fa57ee84dda3', '1500e78639872af23f8198a8eb2a4360cbbe23098d351688eb0f5af84e36b2b8', '1c6e3f52ecd23767f46b0aed0cb598bedb0acaf1169beef14f9c90f556324177', '1cbf392c745b5c51e585c11444b50f8af9e0d133e3fd57f4b851efb21ef58dd6', '1e04f3784e515a20f0eaf93ba514998c3c9562e13c3457d489d0b8d2981ca9c8', '22fb196e001be5ce965dd94

<IPython.core.display.HTML object>

{0: MultiIndex(levels=[['010d1a29e1b462d4ba059f17faa95db8bc826b7166a92e04c68bfb64d6e6657f', '05ab66adb741a3e1e394ac4b60a50d79aae1a71ca28641c7d143cb7d16a080ae', '05b13df5c83e7d9d29621e9b72c5ac3f6d9c214a4747085defd0eb7ccded1043', '0797a62afcb35c7d266972271c076b47007299e14a14cf81655ef8980ec85275', '087674feadd6d9a73358fba161ac3b2fdb0ca1c898fe4c4e461634f8852c585a', '098607059b15cea5f6323b08c0178fcc360670cc0004fa2670a9da678575d5f7', '0d03720aa3cab6c9d10371db976b201782b409c0a896aeb102084a455a134aed', '12c7fab063dd6c58d625905f4b4b208a3e91cf914a71e341642b739fea33216f', '1396397c1af88c7f6d6b4041cdcf462445a66fb9298857f3f48843cf2737a173', '148dbf0c56e396d343cb8c6fbc0f4ff5395ed24790e706baffb3fa57ee84dda3', '1500e78639872af23f8198a8eb2a4360cbbe23098d351688eb0f5af84e36b2b8', '1c6e3f52ecd23767f46b0aed0cb598bedb0acaf1169beef14f9c90f556324177', '1cbf392c745b5c51e585c11444b50f8af9e0d133e3fd57f4b851efb21ef58dd6', '1e04f3784e515a20f0eaf93ba514998c3c9562e13c3457d489d0b8d2981ca9c8', '22fb196e001be5ce965dd94

<IPython.core.display.HTML object>

{0: MultiIndex(levels=[['010d1a29e1b462d4ba059f17faa95db8bc826b7166a92e04c68bfb64d6e6657f', '05ab66adb741a3e1e394ac4b60a50d79aae1a71ca28641c7d143cb7d16a080ae', '05b13df5c83e7d9d29621e9b72c5ac3f6d9c214a4747085defd0eb7ccded1043', '0797a62afcb35c7d266972271c076b47007299e14a14cf81655ef8980ec85275', '087674feadd6d9a73358fba161ac3b2fdb0ca1c898fe4c4e461634f8852c585a', '098607059b15cea5f6323b08c0178fcc360670cc0004fa2670a9da678575d5f7', '0d03720aa3cab6c9d10371db976b201782b409c0a896aeb102084a455a134aed', '12c7fab063dd6c58d625905f4b4b208a3e91cf914a71e341642b739fea33216f', '1396397c1af88c7f6d6b4041cdcf462445a66fb9298857f3f48843cf2737a173', '148dbf0c56e396d343cb8c6fbc0f4ff5395ed24790e706baffb3fa57ee84dda3', '1500e78639872af23f8198a8eb2a4360cbbe23098d351688eb0f5af84e36b2b8', '1c6e3f52ecd23767f46b0aed0cb598bedb0acaf1169beef14f9c90f556324177', '1cbf392c745b5c51e585c11444b50f8af9e0d133e3fd57f4b851efb21ef58dd6', '1e04f3784e515a20f0eaf93ba514998c3c9562e13c3457d489d0b8d2981ca9c8', '22fb196e001be5ce965dd94

<IPython.core.display.HTML object>

{0: MultiIndex(levels=[['010d1a29e1b462d4ba059f17faa95db8bc826b7166a92e04c68bfb64d6e6657f', '05ab66adb741a3e1e394ac4b60a50d79aae1a71ca28641c7d143cb7d16a080ae', '05b13df5c83e7d9d29621e9b72c5ac3f6d9c214a4747085defd0eb7ccded1043', '0797a62afcb35c7d266972271c076b47007299e14a14cf81655ef8980ec85275', '087674feadd6d9a73358fba161ac3b2fdb0ca1c898fe4c4e461634f8852c585a', '098607059b15cea5f6323b08c0178fcc360670cc0004fa2670a9da678575d5f7', '0d03720aa3cab6c9d10371db976b201782b409c0a896aeb102084a455a134aed', '12c7fab063dd6c58d625905f4b4b208a3e91cf914a71e341642b739fea33216f', '1396397c1af88c7f6d6b4041cdcf462445a66fb9298857f3f48843cf2737a173', '148dbf0c56e396d343cb8c6fbc0f4ff5395ed24790e706baffb3fa57ee84dda3', '1500e78639872af23f8198a8eb2a4360cbbe23098d351688eb0f5af84e36b2b8', '1c6e3f52ecd23767f46b0aed0cb598bedb0acaf1169beef14f9c90f556324177', '1cbf392c745b5c51e585c11444b50f8af9e0d133e3fd57f4b851efb21ef58dd6', '1e04f3784e515a20f0eaf93ba514998c3c9562e13c3457d489d0b8d2981ca9c8', '22fb196e001be5ce965dd94

<IPython.core.display.HTML object>

{0: MultiIndex(levels=[['010d1a29e1b462d4ba059f17faa95db8bc826b7166a92e04c68bfb64d6e6657f', '05ab66adb741a3e1e394ac4b60a50d79aae1a71ca28641c7d143cb7d16a080ae', '05b13df5c83e7d9d29621e9b72c5ac3f6d9c214a4747085defd0eb7ccded1043', '0797a62afcb35c7d266972271c076b47007299e14a14cf81655ef8980ec85275', '087674feadd6d9a73358fba161ac3b2fdb0ca1c898fe4c4e461634f8852c585a', '098607059b15cea5f6323b08c0178fcc360670cc0004fa2670a9da678575d5f7', '0d03720aa3cab6c9d10371db976b201782b409c0a896aeb102084a455a134aed', '12c7fab063dd6c58d625905f4b4b208a3e91cf914a71e341642b739fea33216f', '1396397c1af88c7f6d6b4041cdcf462445a66fb9298857f3f48843cf2737a173', '148dbf0c56e396d343cb8c6fbc0f4ff5395ed24790e706baffb3fa57ee84dda3', '1500e78639872af23f8198a8eb2a4360cbbe23098d351688eb0f5af84e36b2b8', '1c6e3f52ecd23767f46b0aed0cb598bedb0acaf1169beef14f9c90f556324177', '1cbf392c745b5c51e585c11444b50f8af9e0d133e3fd57f4b851efb21ef58dd6', '1e04f3784e515a20f0eaf93ba514998c3c9562e13c3457d489d0b8d2981ca9c8', '22fb196e001be5ce965dd94

<IPython.core.display.HTML object>