In [None]:
import numpy as np
import pandas as pd
from sqlalchemy.orm import Query

import src.db.connect
import src.db.load
import src.db.models.bert_data as bm

In [None]:
engine = src.db.connect.make_engine("DB")

In [None]:
valid_coders = ["riedel", "coudry", "richter"]

query = (
    Query(bm.Label)
    .join(bm.Sample)
    .with_entities(
        bm.Label.sample_id.label("id"),
        bm.Sample.text,
        bm.Label.username,
        bm.Label.pop_antielite.label("elite"),
        bm.Label.pop_pplcentr.label("centr"),
    )
    .filter(bm.Label.username.in_(valid_coders))
)


with engine.connect() as conn:
    df = pd.read_sql(query.statement, conn)
    df["elite"] = df["elite"].astype(int)
    df["centr"] = df["centr"].astype(int)

# Three coders + single var

In [None]:
df_pivot = df.pivot(index=["id", "text"], values="elite", columns="username")

df_pivot = df_pivot.dropna().reset_index().set_index("id")
df_unequal = df_pivot.loc[
    ~(df_pivot.riedel == df_pivot.coudry) | ~(df_pivot.coudry == df_pivot.richter),
    ["coudry", "riedel", "richter", "text"],
]

df_unequal.to_excel(src.PATH / "tmp/unequal_elite_three_coders.xls")

In [None]:
df_pivot = df.pivot(index=["id", "text"], values="centr", columns="username")

df_pivot = df_pivot.dropna().reset_index().set_index("id")
df_unequal = df_pivot.loc[
    ~(df_pivot.riedel == df_pivot.coudry) | ~(df_pivot.coudry == df_pivot.richter),
    ["coudry", "riedel", "richter", "text"],
]

df_unequal.to_excel(src.PATH / "tmp/unequal_centr_three_coders.xls")

# Two coders + single var

In [None]:
df_pivot = df.pivot(index=["id", "text"], values="elite", columns="username").drop(
    "richter", axis=1
)

df_pivot = df_pivot.dropna().reset_index().set_index("id")
df_unequal = df_pivot.loc[~(df_pivot.riedel == df_pivot.coudry), ["coudry", "riedel", "text"]]

df_unequal.to_excel(src.PATH / "tmp/unequal_elite_two_coders.xls")

In [None]:
df_pivot = df.pivot(index=["id", "text"], values="centr", columns="username").drop(
    "richter", axis=1
)

df_pivot = df_pivot.dropna().reset_index().set_index("id")
df_unequal = df_pivot.loc[~(df_pivot.riedel == df_pivot.coudry), ["coudry", "riedel", "text"]]

df_unequal.to_excel(src.PATH / "tmp/unequal_centr_two_coders.xls")

# Two coders + two vars

In [None]:
def set_label(row):
    if row["elite"] == 1 and row["centr"] == 1:
        return 12
    elif np.isnan(row["elite"]) or np.isnan(row["centr"]):
        return -9
    elif row["elite"] == 1:
        return 1
    elif row["centr"] == 1:
        return 2
    else:
        return 0


df_pivot = df.copy()
df_pivot["label"] = df.apply(set_label, axis=1)
df_pivot = df_pivot.pivot(index=["id", "text"], values="label", columns="username").drop(
    "richter", axis=1
)

df_pivot = df_pivot.dropna().reset_index().set_index("id")
df_unequal = df_pivot.loc[~(df_pivot.riedel == df_pivot.coudry), ["coudry", "riedel", "text"]]

df_unequal.to_excel(src.PATH / "tmp/unequal_both_vars_two_coders.xls")