In [None]:
import pandas as pd
from scipy.stats.contingency import association
from sqlalchemy import case
from sqlalchemy import literal
from sqlalchemy.orm import Query

import src
import src.db.models.bert_data as bm
import src.db.models.open_discourse as od

In [None]:
pd.set_option("display.max_colwidth", 2048)
pd.set_option("display.max_rows", 256)

engine = src.db.connect.make_engine("DB")

# Load data


In [None]:
thresh = {"elite": 0.415961, "centr": 0.295400, "left": 0.429109, "right": 0.302714}

In [None]:
colormap = {
    "CDU/CSU": "#000000",
    "Grüne": "#1AA037",
    "DIE LINKE": "#8B008B",
    "FDP": "#FFEF00",
    "AfD": "#0489DB",
    "SPD": "#E3000F",
}

In [None]:
query = (
    Query(bm.Sample)
    .join(bm.Prediction)
    .join(od.Speech)
    .join(od.Faction)
    .join(od.Politician)
    .filter(
        bm.Sample.sentence_length > 2,
        od.Faction.abbreviation != "Fraktionslos",
    )
    .with_entities(
        od.Speech.id.label("speech_id"),
        od.Speech.electoral_term,
        od.Speech.date,
        bm.Sample.sentence_no,
        bm.Sample.pop_dict_score.label("gruendl"),
        od.Politician.id.label("pol_id"),
        od.Politician.first_name,
        od.Politician.last_name,
        case(
            (od.Faction.abbreviation == "DIE LINKE.", literal("DIE LINKE")),
            else_=od.Faction.abbreviation,
        ).label("abbreviation"),
        case(
            (bm.Prediction.elite >= thresh["elite"], literal(1)),
            (bm.Prediction.elite < thresh["elite"], literal(0)),
        ).label("antielite"),
        case(
            (bm.Prediction.pplcentr >= thresh["centr"], literal(1)),
            (bm.Prediction.pplcentr < thresh["centr"], literal(0)),
        ).label("pplcentr"),
        case(
            (bm.Prediction.left >= thresh["left"], literal(1)),
            (bm.Prediction.left < thresh["left"], literal(0)),
        ).label("left"),
        case(
            (bm.Prediction.right >= thresh["right"], literal(1)),
            (bm.Prediction.right < thresh["right"], literal(0)),
        ).label("right"),
    )
)

In [None]:
with engine.connect() as conn:
    df = pd.read_sql(query.statement, conn)
df.gruendl = df.gruendl.astype(int)
df.date = pd.to_datetime(df.date)

In [None]:
def show_stats(col1, col2):
    print(pd.crosstab(col1, col2, margins=True))
    print()
    print(pd.crosstab(col1, col2, normalize=True, margins=True))
    print()
    print(pd.crosstab(col1, col2, normalize="columns", margins=True))
    print()
    cramer = association(pd.crosstab(col1, col2).to_numpy(), method="cramer")
    print(f"Cramers V: {cramer}")

# Anti-Elite

In [None]:
show_stats(df.antielite, df.gruendl)

gruendl          0      1      All
antielite                         
0          1160037  10746  1170783
1            88681   4390    93071
All        1248718  15136  1263854

gruendl           0         1       All
antielite                              
0          0.917857  0.008503  0.926359
1          0.070167  0.003474  0.073641
All        0.988024  0.011976  1.000000

gruendl           0         1       All
antielite                              
0          0.928982  0.709963  0.926359
1          0.071018  0.290037  0.073641

Cramers V: 0.09121680607124179


# People-Centrism

In [None]:
show_stats(df.pplcentr, df.gruendl)

gruendl         0      1      All
pplcentr                         
0         1223149  14077  1237226
1           25569   1059    26628
All       1248718  15136  1263854

gruendl          0         1       All
pplcentr                              
0         0.967793  0.011138  0.978931
1         0.020231  0.000838  0.021069
All       0.988024  0.011976  1.000000

gruendl          0         1       All
pplcentr                              
0         0.979524  0.930034  0.978931
1         0.020476  0.069966  0.021069

Cramers V: 0.03748491503741777


# Either Anti-Elite or People-Centrism

In [None]:
show_stats(df.pplcentr | df.antielite, df.gruendl)

gruendl        0      1      All
row_0                           
0        1136656   9911  1146567
1         112062   5225   117287
All      1248718  15136  1263854

gruendl         0         1       All
row_0                                
0        0.899357  0.007842  0.907199
1        0.088667  0.004134  0.092801
All      0.988024  0.011976  1.000000

gruendl         0         1       All
row_0                                
0        0.910258  0.654797  0.907199
1        0.089742  0.345203  0.092801

Cramers V: 0.09577212846870437


# Both Anti-Elite and People-Centrism

In [None]:
show_stats(df.pplcentr & df.antielite, df.gruendl)

gruendl        0      1      All
row_0                           
0        1246530  14912  1261442
1           2188    224     2412
All      1248718  15136  1263854

gruendl         0         1       All
row_0                                
0        0.986293  0.011799  0.998092
1        0.001731  0.000177  0.001908
All      0.988024  0.011976  1.000000

gruendl         0         1       All
row_0                                
0        0.998248  0.985201  0.998092
1        0.001752  0.014799  0.001908

Cramers V: 0.032518064696773284
