In [None]:
import matplotlib as mlp
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import preprocessing
from sqlalchemy import case
from sqlalchemy import literal
from sqlalchemy.orm import Query

import src
import src.db.models.bert_data as bm
import src.db.models.open_discourse as od

In [None]:
pd.set_option("display.max_colwidth", 2048)
pd.set_option("display.max_rows", 256)

engine = src.db.connect.make_engine("DB")

# Load data


In [None]:
thresh = {"elite": 0.5013018, "centr": 0.5017193, "left": 0.42243505, "right": 0.38281676}

In [None]:
colormap = {
    "CDU/CSU": "#000000",
    "Grüne": "#1AA037",
    "DIE LINKE.": "#8B008B",
    "FDP": "#FFEF00",
    "AfD": "#0489DB",
    "SPD": "#E3000F",
}

In [None]:
query = (
    Query(bm.Sample)
    .join(bm.Prediction)
    .join(od.Speech)
    .join(od.Faction)
    .join(od.Politician)
    .filter(
        bm.Sample.sentence_length > 2,
        od.Faction.abbreviation != "Fraktionslos",
    )
    .with_entities(
        od.Speech.id.label("speech_id"),
        od.Speech.electoral_term,
        od.Speech.date,
        bm.Sample.sentence_no,
        od.Politician.id.label("pol_id"),
        od.Politician.first_name,
        od.Politician.last_name,
        od.Faction.abbreviation,
        case(
            (bm.Prediction.elite >= thresh["elite"], literal(1)),
            (bm.Prediction.elite < thresh["elite"], literal(0)),
        ).label("antielite"),
        case(
            (bm.Prediction.pplcentr >= thresh["centr"], literal(1)),
            (bm.Prediction.pplcentr < thresh["centr"], literal(0)),
        ).label("pplcentr"),
        case(
            (bm.Prediction.left >= thresh["left"], literal(1)),
            (bm.Prediction.left < thresh["left"], literal(0)),
        ).label("left"),
        case(
            (bm.Prediction.right >= thresh["right"], literal(1)),
            (bm.Prediction.right < thresh["right"], literal(0)),
        ).label("right"),
    )
)

In [None]:
with engine.connect() as conn:
    df_raw = pd.read_sql(query.statement, conn)
df_raw.date = pd.to_datetime(df_raw.date)

In [None]:
# filter all speeches with less than 3 sentences
df_raw = df_raw.groupby("speech_id").filter(lambda x: x["sentence_no"].max() > 3)

In [None]:
assert len(df_raw) == df_raw.groupby(["speech_id", "sentence_no"]).ngroups

# Basic sentence level stats


In [None]:
len(df_raw)

1258876

In [None]:
(df_raw.iloc[:, 8:].sum() / len(df_raw) * 100).round(1)

antielite    6.7
pplcentr     2.2
left         1.1
right        0.6
dtype: float64

In [None]:
pop_total = df_raw["antielite"] * df_raw["pplcentr"]
pop_total = pop_total.apply(lambda x: 0 if x == 0 else 1)
print(pop_total.value_counts(normalize=False))
pop_total.value_counts(normalize=True).round(3)

0    1255987
1       2889
dtype: int64


0    0.998
1    0.002
dtype: float64

# Populism by Politician


## define funcs


In [None]:
def show_output(df, var):
    most_populist_pols = []
    for _, group in df.groupby("electoral_term"):
        out = group.sort_values(var, ascending=False)
        out["rank"] = list(range(1, len(out) + 1))
        most_populist_pols.append(out.head(5))
    return pd.concat(most_populist_pols)

In [None]:
def format_latex(tex):
    lines = tex.splitlines()
    new = []
    for i, line in enumerate(lines):
        # print(i, line)
        if i == 2:
            line = r"\headrow Term & Rank & First Name & Last Name & Party \\"
        if i == 3:
            continue
        if i == 5:
            line = line.replace(r"\multirow[c]{5}{*}{18}", r"\textbf{18}")
        if i == 10:
            line = line.replace(r"\multirow[c]{5}{*}{19}", r"\textbf{19}")
        if i == 10:
            new.append(r"& & & & \\\midrule")
        if 4 < i < 14:
            line = line + r"\midrule"
        new.append(line)

    tex = "\n".join(new)
    print(tex)

## Populism by electoral_term and politician


In [None]:
df_pol = (
    df_raw.drop(["speech_id", "sentence_no", "date"], axis=1)
    .groupby(["pol_id", "first_name", "last_name", "abbreviation", "electoral_term"])
    .mean()
    .reset_index()
)

df_pol = df_pol.loc[df_pol.pol_id != -1, :]
df_pol["pop"] = df_pol.antielite * df_pol.pplcentr
df_pol["pop_left"] = df_pol["pop"] * df_pol.left
df_pol["pop_right"] = df_pol["pop"] * df_pol.right

### general populism

-> use this for table in paper

In [None]:
show_output(df_pol, "pop")

Unnamed: 0,pol_id,first_name,last_name,abbreviation,electoral_term,antielite,pplcentr,left,right,pop,pop_left,pop_right,rank
646,11004183,Sahra,Wagenknecht,DIE LINKE.,18,0.309859,0.048709,0.119718,0.023474,0.015093,0.001807,0.000354,1
340,11003786,Katja,Kipping,DIE LINKE.,18,0.118351,0.085106,0.064495,0.007314,0.010072,0.00065,7.4e-05,2
410,11003869,Sabine,Zimmermann,DIE LINKE.,18,0.121333,0.073886,0.055053,0.002898,0.008965,0.000494,2.6e-05,3
261,11003625,Andreas,Scheuer,CDU/CSU,18,0.114094,0.073826,0.0,0.033557,0.008423,0.0,0.000283,4
523,11004080,Jutta,Krellmann,DIE LINKE.,18,0.124219,0.059681,0.042332,0.0,0.007414,0.000314,0.0,5
1278,11004892,Martin,Sichert,AfD,19,0.31541,0.130492,0.050492,0.112131,0.041158,0.002078,0.004615,1
1309,11004930,Alice,Weidel,AfD,19,0.38974,0.073507,0.035988,0.079632,0.028649,0.001031,0.002281,2
1250,11004859,Martin,Reichardt,AfD,19,0.267475,0.093438,0.027104,0.086305,0.024992,0.000677,0.002157,3
1254,11004865,Bernd,Riexinger,DIE LINKE.,19,0.189542,0.114379,0.124183,0.003268,0.02168,0.002692,7.1e-05,4
1104,11004698,Gottfried,Curio,AfD,19,0.283436,0.075617,0.006173,0.15535,0.021433,0.000132,0.00333,5


In [None]:
df = show_output(df_pol, "pop")

df = (
    df.rename({"abbreviation": "party"}, axis=1)
    .loc[:, ["electoral_term", "rank", "first_name", "last_name", "party"]]
    .set_index(["electoral_term", "rank"])
)

tex = df.style.to_latex(hrules=True)

format_latex(tex)

\begin{tabular}{lllll}
\toprule
\headrow Term & Rank & First Name & Last Name & Party \\
\midrule
\textbf{18} & 1 & Sahra & Wagenknecht & DIE LINKE. \\\midrule
 & 2 & Katja & Kipping & DIE LINKE. \\\midrule
 & 3 & Sabine & Zimmermann & DIE LINKE. \\\midrule
 & 4 & Andreas & Scheuer & CDU/CSU \\\midrule
 & 5 & Jutta & Krellmann & DIE LINKE. \\\midrule
& & & & \\\midrule
\textbf{19} & 1 & Martin & Sichert & AfD \\\midrule
 & 2 & Alice & Weidel & AfD \\\midrule
 & 3 & Martin & Reichardt & AfD \\\midrule
 & 4 & Bernd & Riexinger & DIE LINKE. \\\midrule
 & 5 & Gottfried & Curio & AfD \\
\bottomrule
\end{tabular}


### left-wing populism


In [None]:
show_output(df_pol, "pop_left")

Unnamed: 0,pol_id,first_name,last_name,abbreviation,electoral_term,antielite,pplcentr,left,right,pop,pop_left,pop_right,rank
646,11004183,Sahra,Wagenknecht,DIE LINKE.,18,0.309859,0.048709,0.119718,0.023474,0.015093,0.001807,0.000354,1
340,11003786,Katja,Kipping,DIE LINKE.,18,0.118351,0.085106,0.064495,0.007314,0.010072,0.00065,7.4e-05,2
10,11000365,Jörg Diether,Dehm-Desoi,DIE LINKE.,18,0.232682,0.026643,0.095915,0.021314,0.006199,0.000595,0.000132,3
410,11003869,Sabine,Zimmermann,DIE LINKE.,18,0.121333,0.073886,0.055053,0.002898,0.008965,0.000494,2.6e-05,4
588,11004129,Richard,Pitterle,DIE LINKE.,18,0.156664,0.025721,0.080281,0.0,0.00403,0.000323,0.0,5
1254,11004865,Bernd,Riexinger,DIE LINKE.,19,0.189542,0.114379,0.124183,0.003268,0.02168,0.002692,7.1e-05,1
1278,11004892,Martin,Sichert,AfD,19,0.31541,0.130492,0.050492,0.112131,0.041158,0.002078,0.004615,2
647,11004183,Sahra,Wagenknecht,DIE LINKE.,19,0.303333,0.056667,0.103333,0.008333,0.017189,0.001776,0.000143,3
1231,11004837,Zaklin,Nastic,DIE LINKE.,19,0.25734,0.063903,0.107081,0.02418,0.016445,0.001761,0.000398,4
341,11003786,Katja,Kipping,DIE LINKE.,19,0.13285,0.130435,0.088164,0.003623,0.017328,0.001528,6.3e-05,5


### right-wing populism


In [None]:
show_output(df_pol, "pop_right")

Unnamed: 0,pol_id,first_name,last_name,abbreviation,electoral_term,antielite,pplcentr,left,right,pop,pop_left,pop_right,rank
646,11004183,Sahra,Wagenknecht,DIE LINKE.,18,0.309859,0.048709,0.119718,0.023474,0.015093,0.001807,0.000354,1
261,11003625,Andreas,Scheuer,CDU/CSU,18,0.114094,0.073826,0.0,0.033557,0.008423,0.0,0.000283,2
309,11003746,Sevim,Dağdelen,DIE LINKE.,18,0.243608,0.028359,0.02278,0.034868,0.006908,0.000157,0.000241,3
10,11000365,Jörg Diether,Dehm-Desoi,DIE LINKE.,18,0.232682,0.026643,0.095915,0.021314,0.006199,0.000595,0.000132,4
340,11003786,Katja,Kipping,DIE LINKE.,18,0.118351,0.085106,0.064495,0.007314,0.010072,0.00065,7.4e-05,5
1278,11004892,Martin,Sichert,AfD,19,0.31541,0.130492,0.050492,0.112131,0.041158,0.002078,0.004615,1
1104,11004698,Gottfried,Curio,AfD,19,0.283436,0.075617,0.006173,0.15535,0.021433,0.000132,0.00333,2
1309,11004930,Alice,Weidel,AfD,19,0.38974,0.073507,0.035988,0.079632,0.028649,0.001031,0.002281,3
1250,11004859,Martin,Reichardt,AfD,19,0.267475,0.093438,0.027104,0.086305,0.024992,0.000677,0.002157,4
1153,11004749,Martin,Hess,AfD,19,0.293129,0.061404,0.005117,0.112573,0.017999,9.2e-05,0.002026,5


## Populism by Policitian + speech + electoral_term


In [None]:
df_speeches = (
    df_raw.drop(["sentence_no", "date"], axis=1)
    .groupby(
        [
            "pol_id",
            "first_name",
            "last_name",
            "abbreviation",
            "electoral_term",
            "speech_id",
        ]
    )
    .mean()
    .reset_index()
)
df_speeches = df_speeches.loc[df_speeches.pol_id != -1, :]
df_speeches["pop"] = df_speeches.antielite * df_speeches.pplcentr
df_speeches["pop_left"] = df_speeches["pop"] * df_speeches.left
df_speeches["pop_right"] = df_speeches["pop"] * df_speeches.right

df_pol = (
    df_speeches.drop("speech_id", axis=1)
    .groupby(["pol_id", "first_name", "last_name", "abbreviation", "electoral_term"])
    .mean()
    .reset_index()
)

### general populism


In [None]:
show_output(df_pol, "pop")

Unnamed: 0,pol_id,first_name,last_name,abbreviation,electoral_term,antielite,pplcentr,left,right,pop,pop_left,pop_right,rank
641,11004183,Sahra,Wagenknecht,DIE LINKE.,18,0.274289,0.039201,0.103937,0.025621,0.01249,0.001786,0.000593,1
256,11003625,Andreas,Scheuer,CDU/CSU,18,0.112495,0.076862,0.0,0.034879,0.008239,0.0,0.000436,2
405,11003869,Sabine,Zimmermann,DIE LINKE.,18,0.104987,0.073413,0.054851,0.001975,0.007884,0.000604,1.3e-05,3
16,11001023,Ulla,Jelpke,DIE LINKE.,18,0.143658,0.042774,0.038165,0.013965,0.007573,0.000738,8.7e-05,4
5,11000365,Jörg Diether,Dehm-Desoi,DIE LINKE.,18,0.20388,0.020538,0.07436,0.021962,0.007298,0.001571,0.000161,5
1273,11004892,Martin,Sichert,AfD,19,0.270665,0.114663,0.044342,0.103656,0.034284,0.002769,0.005539,1
1304,11004930,Alice,Weidel,AfD,19,0.379145,0.072685,0.032144,0.073528,0.030785,0.001175,0.002545,2
1135,11004736,Mariana Iris,Harder-Kühnel,AfD,19,0.198616,0.102246,0.029713,0.100301,0.026049,0.003297,0.004667,3
1245,11004859,Martin,Reichardt,AfD,19,0.259349,0.086581,0.023207,0.081402,0.023748,0.00104,0.00314,4
1249,11004865,Bernd,Riexinger,DIE LINKE.,19,0.176643,0.109654,0.115164,0.003367,0.023342,0.004271,0.000291,5


### left-wing populism


In [None]:
show_output(df_pol, "pop_left")

Unnamed: 0,pol_id,first_name,last_name,abbreviation,electoral_term,antielite,pplcentr,left,right,pop,pop_left,pop_right,rank
323,11003773,Inge,Höger-Neuling,DIE LINKE.,18,0.091668,0.013452,0.025414,0.003997,0.00517,0.002857,2.9e-05,1
641,11004183,Sahra,Wagenknecht,DIE LINKE.,18,0.274289,0.039201,0.103937,0.025621,0.01249,0.001786,0.000593,2
5,11000365,Jörg Diether,Dehm-Desoi,DIE LINKE.,18,0.20388,0.020538,0.07436,0.021962,0.007298,0.001571,0.000161,3
395,11003858,Alexander,Ulrich,DIE LINKE.,18,0.212245,0.036868,0.044595,0.014548,0.007054,0.00111,0.000144,4
207,11003556,Gabriele,Hiller-Ohm,SPD,18,0.038301,0.041432,0.026086,0.0,0.004176,0.000833,0.0,5
380,11003837,Gerhard,Schick,Grüne,19,0.163133,0.042694,0.106275,0.0,0.01336,0.004554,0.0,1
336,11003786,Katja,Kipping,DIE LINKE.,19,0.114209,0.122394,0.079609,0.002787,0.020963,0.004305,0.000177,2
1249,11004865,Bernd,Riexinger,DIE LINKE.,19,0.176643,0.109654,0.115164,0.003367,0.023342,0.004271,0.000291,3
1135,11004736,Mariana Iris,Harder-Kühnel,AfD,19,0.198616,0.102246,0.029713,0.100301,0.026049,0.003297,0.004667,4
6,11000365,Jörg Diether,Dehm-Desoi,DIE LINKE.,19,0.169962,0.0424,0.048584,0.014086,0.012899,0.003209,0.000285,5


### right-wing populism


In [None]:
show_output(df_pol, "pop_right")

Unnamed: 0,pol_id,first_name,last_name,abbreviation,electoral_term,antielite,pplcentr,left,right,pop,pop_left,pop_right,rank
213,11003572,Michael,Kretschmer,CDU/CSU,18,0.043029,0.0444,0.004167,0.014113,0.007278,5.8e-05,0.001563,1
641,11004183,Sahra,Wagenknecht,DIE LINKE.,18,0.274289,0.039201,0.103937,0.025621,0.01249,0.001786,0.000593,2
838,11004346,Yvonne,Magwas,CDU/CSU,18,0.059188,0.032314,0.003559,0.02135,0.004102,1e-06,0.000453,3
256,11003625,Andreas,Scheuer,CDU/CSU,18,0.112495,0.076862,0.0,0.034879,0.008239,0.0,0.000436,4
304,11003746,Sevim,Dağdelen,DIE LINKE.,18,0.162156,0.022724,0.016529,0.024319,0.004294,0.000433,0.000223,5
1107,11004707,Thomas,Ehrhorn,AfD,19,0.24671,0.050332,0.012318,0.13078,0.020296,0.000206,0.00928,1
1099,11004698,Gottfried,Curio,AfD,19,0.269928,0.061637,0.003636,0.151902,0.022402,0.000102,0.006492,2
1273,11004892,Martin,Sichert,AfD,19,0.270665,0.114663,0.044342,0.103656,0.034284,0.002769,0.005539,3
1135,11004736,Mariana Iris,Harder-Kühnel,AfD,19,0.198616,0.102246,0.029713,0.100301,0.026049,0.003297,0.004667,4
1245,11004859,Martin,Reichardt,AfD,19,0.259349,0.086581,0.023207,0.081402,0.023748,0.00104,0.00314,5
