In [None]:
import pandas as pd
from sqlalchemy.orm import Query

import src
from src.db.connect import make_engine
from src.db.models import bert_data as bm
from src.db.models import open_discourse as od

In [None]:
engine = make_engine("DB")

In [None]:
query = (
    Query(od.Speech)
    .join(od.Politician)
    .join(bm.Sample)
    .join(od.Faction)
    .join(bm.Prediction)
    .filter(
        od.Speech.electoral_term.in_([18, 19]),
        od.Faction.id != -1,
        od.Speech.politician_id != -1,
    )
    .with_entities(
        od.Speech.electoral_term.label("speech_term"),
        od.Speech.date.label("speech_date"),
        bm.Sample.speeches_id.label("speech_id"),
        bm.Sample.sentence_no.label("sentence_no"),
        od.Politician.id.label("politician_id"),
        od.Politician.first_name.label("politician_first_name"),
        od.Politician.last_name.label("politician_last_name"),
        od.Faction.abbreviation.label("faction_abbrv"),
        bm.Sample.text.label("sentence"),
        bm.Prediction.elite.label("pred_antielite"),
        bm.Prediction.pplcentr.label("pred_peoplecentrism"),
        bm.Prediction.left.label("pred_left"),
        bm.Prediction.right.label("pred_right"),
    )
    .order_by(
        od.Speech.electoral_term.asc(),
        od.Speech.date.asc(),
        bm.Sample.speeches_id.asc(),
        bm.Sample.sentence_no.asc(),
    )
    .distinct()
)

In [None]:
with engine.connect() as conn:
    df = pd.read_sql(query.statement, conn)

In [None]:
df.to_csv(
    src.PATH / "tmp/predictions.csv.zip",
    compression={"method": "gzip", "compresslevel": 9},
    index=False,
)