In [None]:
import pandas as pd
from sqlalchemy.orm import Query
from sqlalchemy.orm import Session

import src
import src.db.models.bert_data as bm
import src.db.models.open_discourse as od
from src.db.connect import make_engine

In [None]:
engine = make_engine("DB")
s = Session(engine)

In [None]:
out_path = src.PATH / "tmp"

# Predictions


In [None]:
query = (
    Query(bm.Sample)
    .join(bm.Prediction)
    .join(od.Speech)
    .join(od.Politician)
    .with_entities(
        bm.Sample.id.label("sample_id"),
        bm.Sample.speeches_id.label("speech_id"),
        od.Politician.id.label("speaker_id"),
        bm.Sample.sentence_no,
        bm.Sample.sentence_length,
        bm.Prediction.elite,
        bm.Prediction.pplcentr,
        bm.Prediction.left,
        bm.Prediction.right,
        bm.Sample.text,
    )
    .distinct()
)

with engine.connect() as conn:
    df = pd.read_sql(query.statement, conn)

df.to_csv(out_path / "samples_predictions.csv.zip", index=False)

# Politicians

In [None]:
query = (
    Query(bm.Sample)
    .join(bm.Prediction)
    .join(od.Speech)
    .join(od.Politician)
    .with_entities(
        od.Politician.id.label("speaker_id"),
        od.Politician.first_name,
        od.Politician.last_name,
        od.Politician.gender,
        od.Politician.birth_place,
        od.Politician.birth_date,
        od.Politician.death_date,
        od.Politician.academic_title,
        od.Politician.aristocracy,
        od.Politician.profession,
    )
    .distinct()
)

with engine.connect() as conn:
    df = pd.read_sql(query.statement, conn)

df.to_csv(out_path / "speakers.csv.zip", index=False)

# Speeches

In [None]:
query = (
    Query(bm.Sample)
    .join(bm.Prediction)
    .join(od.Speech)
    .join(od.Politician)
    .join(od.Faction)
    .with_entities(
        od.Speech.id.label("speech_id"),
        od.Speech.electoral_term,
        od.Speech.session,
        od.Speech.date,
        od.Speech.politician_id.label("speaker_id"),
        od.Faction.abbreviation.label("faction"),
        od.Speech.document_url,
    )
    .distinct()
)

with engine.connect() as conn:
    df = pd.read_sql(query.statement, conn)

df.to_csv(out_path / "speeches.csv.zip", index=False)