In [1]:
import pandas as pd

import src
import src.pop_aggregate as pop_metrics

# Load data


In [2]:
df = pd.read_parquet(src.PATH / "data/raw/sentences.parquet.gzip")
preds = pd.read_parquet(src.PATH / "data/interim/sentence_predictions.parquet.gzip")

df = pd.merge(df, preds, on="sample_id")

In [3]:
thresh = {"elite": 0.415961, "pplcentr": 0.295400, "left": 0.429109, "right": 0.302714}

In [4]:
for key, val in thresh.items():
    df[key] = df[key].apply(lambda x: 1 if x > val else 0)

In [5]:
# filter all speeches with less than 3 sentences
df = df.groupby("speech_id").filter(lambda x: x["sentence_no"].max() > 3)

# Basic sentence level stats


In [6]:
(df.loc[:, ["elite", "pplcentr", "left", "right"]].sum() / len(df) * 100).round(1)

elite       7.4
pplcentr    2.1
left        1.2
right       0.5
dtype: float64

In [7]:
pop_total = df["elite"] * df["pplcentr"]
pop_total = pop_total.apply(lambda x: 0 if x == 0 else 1)
print(pop_total.value_counts(normalize=False))
pop_total.value_counts(normalize=True).round(3)

0    1256473
1       2403
dtype: int64


0    0.998
1    0.002
dtype: float64

# Populism by Politician


## define funcs


In [8]:
def show_output(df, var):
    most_populist_pols = []
    for _, group in df.groupby("electoral_term"):
        out = group.sort_values(var, ascending=False)
        out["rank"] = list(range(1, len(out) + 1))
        most_populist_pols.append(out.head(5))
    return pd.concat(most_populist_pols)

In [9]:
def format_latex(tex):
    lines = tex.splitlines()
    new = []
    for i, line in enumerate(lines):
        # print(i, line)
        if i == 2:
            line = r"\headrow Term & Rank & First Name & Last Name & Party \\"
        if i == 3:
            continue
        if i == 5:
            line = line.replace(r"\multirow[c]{5}{*}{18}", r"\textbf{18}")
        if i == 10:
            line = line.replace(r"\multirow[c]{5}{*}{19}", r"\textbf{19}")
        if i == 10:
            new.append(r"& & & & \\\midrule")
        if 4 < i < 14:
            line = line + r"\midrule"
        new.append(line)

    tex = "\n".join(new)
    return tex


## Populism by electoral_term and politician


In [10]:
df_pol = (
    df.drop(["speech_id", "sentence_no", "date"], axis=1)
    .groupby(["politician_id", "first_name", "last_name", "abbreviation", "electoral_term"])
    .mean(numeric_only=True)
    .reset_index()
)

df_pol = df_pol.loc[df_pol.politician_id != -1, :]
df_pol["pop"] = df_pol.elite * df_pol.pplcentr
df_pol["pop_left"] = df_pol["pop"] * df_pol.left
df_pol["pop_right"] = df_pol["pop"] * df_pol.right

pop_metrics.ELITE = "elite"
pop_metrics.CENTR = "pplcentr"

df_pol["pop_bollen"] = df_pol.apply(lambda x: pop_metrics.bollen_index(x), axis=1)
df_pol["pop_goertz"] = df_pol.apply(lambda x: pop_metrics.goertz_index(x), axis=1)
thresh = {
    "elite": df_pol.elite.quantile(0.75),
    "pplcentr": df_pol.pplcentr.quantile(0.75),
}
df_pol["pop_sartori"] = df_pol.apply(
    lambda x: pop_metrics.sartori_index(x, threshold=thresh), axis=1
)

### general populism

-> use this for table in paper

In [11]:
show_output(df_pol, "pop")

Unnamed: 0,politician_id,first_name,last_name,abbreviation,electoral_term,sample_id,session,sentence_length,elite,pplcentr,left,right,pop,pop_left,pop_right,pop_bollen,pop_goertz,pop_sartori,rank
646,11004183,Sahra,Wagenknecht,DIE LINKE,18,639690.893192,134.489437,17.965376,0.324531,0.039319,0.137911,0.013498,0.01276,0.00176,0.000172,0.181925,0.039319,1,1
261,11003625,Andreas,Scheuer,CDU/CSU,18,550801.208054,97.228188,15.167785,0.114094,0.067114,0.006711,0.013423,0.007657,5.1e-05,0.000103,0.090604,0.067114,1,2
340,11003786,Katja,Kipping,DIE LINKE,18,618833.52859,126.170878,15.108378,0.113697,0.054521,0.041888,0.003324,0.006199,0.00026,2.1e-05,0.084109,0.054521,1,3
410,11003869,Sabine,Zimmermann,DIE LINKE,18,615838.009417,124.787396,13.299529,0.120971,0.051068,0.047447,0.001811,0.006178,0.000293,1.1e-05,0.08602,0.051068,1,4
115,11003132,Katrin Dagmar,Göring-Eckardt,Grüne,18,617595.916288,125.554715,14.662128,0.13414,0.043369,0.026475,0.001765,0.005817,0.000154,1e-05,0.088754,0.043369,1,5
1278,11004892,Martin,Sichert,AfD,19,685621.783607,127.916066,17.36459,0.308852,0.113443,0.051803,0.080656,0.035037,0.001815,0.002826,0.211148,0.113443,1,1
1309,11004930,Alice,Weidel,AfD,19,660940.638591,117.51072,16.506891,0.375191,0.061256,0.054364,0.061256,0.022983,0.001249,0.001408,0.218224,0.061256,1,2
1332,11004962,Janosch,Dahmen,Grüne,19,297482.333333,232.142857,20.678571,0.321429,0.071429,0.0,0.0,0.022959,0.0,0.0,0.196429,0.071429,1,3
1250,11004859,Martin,Reichardt,AfD,19,606634.831669,130.936519,15.715407,0.259629,0.078459,0.019971,0.07418,0.02037,0.000407,0.001511,0.169044,0.078459,1,4
1153,11004749,Martin,Hess,AfD,19,604936.716374,133.256579,16.232456,0.318713,0.059211,0.003655,0.097953,0.018871,6.9e-05,0.001848,0.188962,0.059211,1,5


In [12]:
df = show_output(df_pol, "pop")

df = (
    df.rename({"abbreviation": "party"}, axis=1)
    .loc[:, ["electoral_term", "rank", "first_name", "last_name", "party"]]
    .set_index(["electoral_term", "rank"])
)

tex = df.style.to_latex(hrules=True)
tex = format_latex(tex)
out = src.PATH / "results/tables/populist_politicians.tex"
out.write_text(tex)

609

### Goertz Index


In [13]:
show_output(df_pol, "pop_goertz")

Unnamed: 0,politician_id,first_name,last_name,abbreviation,electoral_term,sample_id,session,sentence_length,elite,pplcentr,left,right,pop,pop_left,pop_right,pop_bollen,pop_goertz,pop_sartori,rank
261,11003625,Andreas,Scheuer,CDU/CSU,18,550801.208054,97.228188,15.167785,0.114094,0.067114,0.006711,0.013423,0.007657,5.1e-05,0.000103,0.090604,0.067114,1,1
340,11003786,Katja,Kipping,DIE LINKE,18,618833.52859,126.170878,15.108378,0.113697,0.054521,0.041888,0.003324,0.006199,0.00026,2.1e-05,0.084109,0.054521,1,2
410,11003869,Sabine,Zimmermann,DIE LINKE,18,615838.009417,124.787396,13.299529,0.120971,0.051068,0.047447,0.001811,0.006178,0.000293,1.1e-05,0.08602,0.051068,1,3
921,11004390,Corinna,Rüffer,Grüne,18,653618.127771,140.663625,14.969361,0.062581,0.050847,0.02086,0.0,0.003182,6.6e-05,0.0,0.056714,0.050847,0,4
699,11004256,Karl Heinz,Brunner,SPD,18,636995.518316,133.176929,18.069369,0.052221,0.048324,0.003118,0.003118,0.002524,8e-06,8e-06,0.050273,0.048324,0,5
1278,11004892,Martin,Sichert,AfD,19,685621.783607,127.916066,17.36459,0.308852,0.113443,0.051803,0.080656,0.035037,0.001815,0.002826,0.211148,0.113443,1,1
1029,11004454,Pia,Zimmermann,DIE LINKE,19,804853.309717,97.388664,14.769231,0.125506,0.097166,0.066802,0.0,0.012195,0.000815,0.0,0.111336,0.097166,1,2
341,11003786,Katja,Kipping,DIE LINKE,19,778622.859903,104.400966,15.568841,0.130435,0.092995,0.072464,0.002415,0.01213,0.000879,2.9e-05,0.111715,0.092995,1,3
295,11003715,Lars,Klingbeil,SPD,19,906582.461165,133.645631,16.606796,0.11165,0.087379,0.014563,0.0,0.009756,0.000142,0.0,0.099515,0.087379,1,4
1250,11004859,Martin,Reichardt,AfD,19,606634.831669,130.936519,15.715407,0.259629,0.078459,0.019971,0.07418,0.02037,0.000407,0.001511,0.169044,0.078459,1,5


In [14]:
df = show_output(df_pol, "pop_goertz")

df = (
    df.rename({"abbreviation": "party"}, axis=1)
    .loc[:, ["electoral_term", "rank", "first_name", "last_name", "party"]]
    .set_index(["electoral_term", "rank"])
)

tex = df.style.to_latex(hrules=True)

print(format_latex(tex))

\begin{tabular}{lllll}
\toprule
\headrow Term & Rank & First Name & Last Name & Party \\
\midrule
\textbf{18} & 1 & Andreas & Scheuer & CDU/CSU \\\midrule
 & 2 & Katja & Kipping & DIE LINKE \\\midrule
 & 3 & Sabine & Zimmermann & DIE LINKE \\\midrule
 & 4 & Corinna & Rüffer & Grüne \\\midrule
 & 5 & Karl Heinz & Brunner & SPD \\\midrule
& & & & \\\midrule
\textbf{19} & 1 & Martin & Sichert & AfD \\\midrule
 & 2 & Pia & Zimmermann & DIE LINKE \\\midrule
 & 3 & Katja & Kipping & DIE LINKE \\\midrule
 & 4 & Lars & Klingbeil & SPD \\\midrule
 & 5 & Martin & Reichardt & AfD \\
\bottomrule
\end{tabular}


### Bollen Index


In [15]:
show_output(df_pol, "pop_bollen")

Unnamed: 0,politician_id,first_name,last_name,abbreviation,electoral_term,sample_id,session,sentence_length,elite,pplcentr,left,right,pop,pop_left,pop_right,pop_bollen,pop_goertz,pop_sartori,rank
5,11000198,Peter,Bleser,CDU/CSU,18,621641.5,127.0,15.5,0.375,0.0,0.0,0.0,0.0,0.0,0.0,0.1875,0.0,0,1
646,11004183,Sahra,Wagenknecht,DIE LINKE,18,639690.893192,134.489437,17.965376,0.324531,0.039319,0.137911,0.013498,0.01276,0.00176,0.000172,0.181925,0.039319,1,2
10,11000365,Jörg Diether,Dehm-Desoi,DIE LINKE,18,547057.008881,96.218472,17.309059,0.273535,0.019538,0.115453,0.010657,0.005344,0.000617,5.7e-05,0.146536,0.019538,0,3
309,11003746,Sevim,Dağdelen,DIE LINKE,18,672681.095769,148.034868,16.69642,0.2543,0.019991,0.015342,0.018131,0.005084,7.8e-05,9.2e-05,0.137146,0.019991,0,4
400,11003858,Alexander,Ulrich,DIE LINKE,18,610361.75627,122.660658,14.666928,0.210031,0.015674,0.047022,0.001567,0.003292,0.000155,5e-06,0.112853,0.015674,0,5
1309,11004930,Alice,Weidel,AfD,19,660940.638591,117.51072,16.506891,0.375191,0.061256,0.054364,0.061256,0.022983,0.001249,0.001408,0.218224,0.061256,1,1
1278,11004892,Martin,Sichert,AfD,19,685621.783607,127.916066,17.36459,0.308852,0.113443,0.051803,0.080656,0.035037,0.001815,0.002826,0.211148,0.113443,1,2
1251,11004862,Martin Erwin,Renner,AfD,19,757995.405063,121.220253,17.286076,0.387342,0.010127,0.012658,0.136709,0.003922,5e-05,0.000536,0.198734,0.010127,0,3
1332,11004962,Janosch,Dahmen,Grüne,19,297482.333333,232.142857,20.678571,0.321429,0.071429,0.0,0.0,0.022959,0.0,0.0,0.196429,0.071429,1,4
1153,11004749,Martin,Hess,AfD,19,604936.716374,133.256579,16.232456,0.318713,0.059211,0.003655,0.097953,0.018871,6.9e-05,0.001848,0.188962,0.059211,1,5


In [16]:
df = show_output(df_pol, "pop_bollen")

df = (
    df.rename({"abbreviation": "party"}, axis=1)
    .loc[:, ["electoral_term", "rank", "first_name", "last_name", "party"]]
    .set_index(["electoral_term", "rank"])
)

tex = df.style.to_latex(hrules=True)

print(format_latex(tex))

\begin{tabular}{lllll}
\toprule
\headrow Term & Rank & First Name & Last Name & Party \\
\midrule
\textbf{18} & 1 & Peter & Bleser & CDU/CSU \\\midrule
 & 2 & Sahra & Wagenknecht & DIE LINKE \\\midrule
 & 3 & Jörg Diether & Dehm-Desoi & DIE LINKE \\\midrule
 & 4 & Sevim & Dağdelen & DIE LINKE \\\midrule
 & 5 & Alexander & Ulrich & DIE LINKE \\\midrule
& & & & \\\midrule
\textbf{19} & 1 & Alice & Weidel & AfD \\\midrule
 & 2 & Martin & Sichert & AfD \\\midrule
 & 3 & Martin Erwin & Renner & AfD \\\midrule
 & 4 & Janosch & Dahmen & Grüne \\\midrule
 & 5 & Martin & Hess & AfD \\
\bottomrule
\end{tabular}
