In [None]:
%load_ext rpy2.ipython

In [None]:
import matplotlib as mlp
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn import preprocessing
from sqlalchemy import case
from sqlalchemy import literal
from sqlalchemy.orm import Query

import src
import src.db.models.bert_data as bm
import src.db.models.open_discourse as od

In [None]:
engine = src.db.connect.make_engine("DB")

# Load data

In [None]:
thresh = {"elite": 0.5013018, "centr": 0.5017193, "left": 0.42243505, "right": 0.38281676}

In [None]:
colormap = {
    "CDU/CSU": "#000000",
    "Grüne": "#1AA037",
    "DIE LINKE.": "#8B008B",  # SPD complementary for visual disambiguation
    "FDP": "#FFEF00",
    "AfD": "#0489DB",
    "SPD": "#E3000F",
}

In [None]:
query = (
    Query(bm.Sample)
    .join(bm.Prediction)
    .join(od.Speech)
    .join(od.Faction)
    .filter(
        bm.Sample.sentence_length > 2,
        od.Speech.electoral_term.in_([18, 19]),
        od.Faction.abbreviation != "Fraktionslos",
    )
    .with_entities(
        bm.Sample.text,
        od.Speech.date,
        od.Speech.electoral_term,
        od.Faction.abbreviation,
        bm.Sample.pop_dict_score,
        case(
            (bm.Prediction.elite >= thresh["elite"], literal(1)),
            (bm.Prediction.elite < thresh["elite"], literal(0)),
        ).label("antielite"),
        case(
            (bm.Prediction.pplcentr >= thresh["centr"], literal(1)),
            (bm.Prediction.pplcentr < thresh["centr"], literal(0)),
        ).label("pplcentr"),
        case(
            (bm.Prediction.left >= thresh["left"], literal(1)),
            (bm.Prediction.left < thresh["left"], literal(0)),
        ).label("left"),
        case(
            (bm.Prediction.right >= thresh["right"], literal(1)),
            (bm.Prediction.right < thresh["right"], literal(0)),
        ).label("right"),
    )
)

In [None]:
with engine.connect() as conn:
    df = pd.read_sql(query.statement, conn)
df.date = pd.to_datetime(df.date)

In [None]:
df.sum(numeric_only=True)

electoral_term    23420419
pop_dict_score       15136
antielite            84514
pplcentr             27406
left                 13971
right                 7872
dtype: int64

# Avg over full time period per faction

# Means per Year per faction

In [None]:
plot = df.drop("text", axis=1).dropna()
plot.abbreviation = plot.abbreviation.astype("category")

In [None]:
plot_df = (
    df.groupby(["electoral_term", "abbreviation"])
    .mean(numeric_only=True)
    .reset_index()
    .rename(
        {
            "abbreviation": "Party",
            "antielite": "(a) Anti-Elitism",
            "pplcentr": "(b) People-Centrism",
            "left": "(c) Left-Wing Host-Ideology",
            "right": "(d) Right-Wing Host-Ideology",
        },
        axis=1,
    )
)

cols = [
    "(a) Anti-Elitism",
    "(b) People-Centrism",
    "(c) Left-Wing Host-Ideology",
    "(d) Right-Wing Host-Ideology",
]

plot_df_norm = plot_df.copy()
for col in cols:
    plot_df_norm[col] = preprocessing.maxabs_scale(plot_df_norm[col])


def reshape_df(df):
    new = pd.melt(df, id_vars=["Party", "electoral_term"], value_vars=cols, var_name="variable")
    new["Party"] = new.Party.astype("category")
    return new


plot_df = reshape_df(plot_df)
plot_df_norm = reshape_df(plot_df_norm)

In [None]:
plot_df.to_csv(src.PATH / "data/figure_1_numbers.csv", index=False)

In [None]:
color_names = list(colormap.keys())
color_vals = list(colormap.values())

In [None]:
%%R -i plot_df -i plot_df_norm -i color_names -i color_vals

library(tidyverse)
library(ggplot2)
library(ggpattern)
theme_set(theme_minimal())

colors <- setNames(color_vals, color_names)

create_plot <- function(df) {
    df$electoral_term <- as_factor(df$electoral_term)
    df$variable <- fct_relevel(df$variable, c("(a) Anti-Elitism", "(b) People-Centrism", "(c) Left-Wing Host-Ideology", "(d) Right-Wing Host-Ideology"))
    df <- complete(df, Party = unique(df$Party), electoral_term = unique(df$electoral_term), variable = unique(df$variable), fill=list(value=0))
    ggplot(df, aes_string(x="Party", y="value", fill="Party", pattern="electoral_term")) +
        geom_bar_pattern(
            position=position_dodge(preserve="single"),
            stat="identity",
            color="grey",
            pattern_fill="grey",
            pattern_angle=45,
            pattern_density=0.1,
            pattern_spacing=0.025,
            pattern_key_scale_factor=0.6
            ) +
        scale_fill_manual(values=colors) +
        scale_pattern_manual(values=c("18"="stripe", "19"="none")) + 
        theme(
            text=element_text(size=18),
            axis.text=element_text(size=14),
            axis.text.x=element_blank(), 
            axis.title.y=element_blank(),
            axis.title.x=element_blank(),
            strip.text=element_text(size=20)
        ) +
        facet_wrap("~variable", scales="free") +
        guides(pattern = guide_legend(title="Term", override.aes = list(fill = "white")),
            fill = guide_legend(override.aes = list(pattern = "none")))
}
plot <- create_plot(plot_df)
plot
ggsave("/home/lukas/overleaf/bert_populism/Figures/all_dimensions_orig.pdf", width=16, height=9)

plot <- create_plot(plot_df_norm)
plot
ggsave("/home/lukas/overleaf/bert_populism/Figures/all_dimensions_normalized.pdf", width=16, height=9)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.2     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.2     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors


1: In (function (package, help, pos = 2, lib.loc = NULL, character.only = FALSE,  :
  libraries ‘/usr/local/lib/R/site-library’, ‘/usr/lib/R/site-library’ contain no packages
2: `aes_string()` was deprecated in ggplot2 3.0.0.
ℹ Please use tidy evaluation idioms with `aes()`.
ℹ See also `vignette("ggplot2-in-packages")` for more information.
generated. 
