In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, recall_score, f1_score, confusion_matrix, precision_score
from scipy.stats import chi2_contingency
import matplotlib.pyplot as plt
import seaborn as sns

### Analysis of the Left Model

RILE:
neutral = 0
left = 1
right = 2

Parties:
    center = 0
    left = 1
    right = 2

In [None]:
df_test = pd.read_csv("data/model_splits/left_right_split/left_as_train/left_test_predictions.csv")
df_inference = pd.read_csv("data/model_splits/left_right_split/left_as_train/left_inference_right_predictions.csv")
df_inference_center = pd.read_csv("data/model_splits/left_right_split/left_as_train/left_inference_center_predictions.csv")
df_train = pd.read_csv("data/model_splits/left_right_split/left_as_train/train-00000-of-00001.csv")

In [None]:
df_test.shape

In [None]:
df_inference.shape

In [None]:
df_inference_center.shape

In [None]:
df_train.shape

In [None]:
df_test.head(1)

## Test predictions: How well does the model perform? Are the predictions significantly different than the actual codes regarding RILE?

In [None]:
# model training graph:
val_f1s = [0.7050, 0.7093, 0.7035, 0.7061, 0.7004, 0.6936, 0.7046, 0.6979, 0.7019, 0.7046,
           0.7030, 0.7085, 0.6997, 0.7046, 0.7032, 0.7036, 0.7038, 0.7072, 0.7051, 0.7066]
val_loss = [0.6205, 0.7307, 0.8032, 1.0851, 1.3747, 1.7668, 1.7892, 2.0678, 2.2576, 2.2779,
            2.4922, 2.6629, 2.7028, 2.7458, 2.7886, 2.9100, 2.8942, 2.9562, 3.0273, 3.0530]
epochs = range(1,21)

fig, ax1 = plt.subplots()

# Plotting the first dataset with left y-axis
ax1.plot(epochs, val_f1s, 'g-')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('F1 Score (macro)', color='g')

# Creating a second y-axis with shared x-axis
ax2 = ax1.twinx()
ax2.plot(epochs, val_loss, 'b-')
ax2.set_ylabel('Validation Loss', color='b')

# Setting x-axis ticks every two steps
ax1.set_xticks(range(0, len(epochs)+1, 2))
ax2.set_xticks(range(0, len(epochs)+1, 2))

plt.title('Validation F1-Score and Validation Loss\nfor Left Party Model training')
plt.show()

In [None]:
print("Test set accuracy:", accuracy_score(df_test["label"], df_test["preds"]))
print("Test set precision:", precision_score(df_test["label"], df_test["preds"], average="macro"))
print("Test set recall:", recall_score(df_test["label"], df_test["preds"], average="macro"))
print("Test set F1-score:", f1_score(df_test["label"], df_test["preds"], average="macro"))

In [None]:
print("RILE distribution in training data:")
df_train["RILE"].value_counts()/df_train.shape[0]

In [None]:
print("RILE distribution in test predictions:")
df_test["preds"].value_counts()/df_test.shape[0]

In [None]:
print("RILE distribution in real test labels:")
df_test["label"].value_counts()/df_test.shape[0]

In [None]:
#                    Pred. Neg:   Pred. Pos
#    Real Neg:       True Neg --- False Pos
#    Real Pos:       False Neg --- True Pos
print("                Pred. Neutral   Pred. Left  Pred. Right")
print("Real Neutral:")
print("Real Left:")
print("Real Right:")
print("\nAbsolut confusion matrix\n", confusion_matrix(df_test["label"], df_test["preds"]))
#print("Relativ confusion matrix\n", confusion_matrix(df_test["label"], df_test["preds"])/df_test.shape[0])

So, higher chance of predicting Neutral by mistake than going from left to right/right to left (makes sense!)

In [None]:
# Better look at the False Positives: PREDICTED LEFT but REAL RIGHT/CENTER
df_false_pos = df_test[(df_test["preds"] == 1) & (df_test["label"] != 1)]
codes_distributions = df_false_pos["main_codes"].value_counts()/df_false_pos.shape[0]
print(codes_distributions[0:10])

# Create a bar plot
plt.figure(figsize=(7, 4))
codes_distributions[0:5].plot(kind='bar')

# Add labels and title
plt.xlabel('Codes')
plt.ylabel('Frequency')
plt.title('Distribution of real codes for false left predictions in Left Model test set')

# Show the plot
plt.show()

--> 305 is a RIGHT code (Political Authority: Positive)

In [None]:
# looking at examples
pd.set_option('display.max_colwidth', 200)
target_code = 305
df_false_pos[df_false_pos["main_codes"] == target_code].tail(5)

In [None]:
# Better look at the False Positives: PREDICTED RIGHT but REAL LEFT/CENTER
df_false_pos = df_test[(df_test["preds"] == 2) & (df_test["label"] != 2)]
codes_distributions = df_false_pos["main_codes"].value_counts()/df_false_pos.shape[0]
print(codes_distributions[0:10])

# Create a bar plot
plt.figure(figsize=(7, 4))
codes_distributions[0:5].plot(kind='bar')

# Add labels and title
plt.xlabel('Codes')
plt.ylabel('Frequency')
plt.title('Distribution of real codes for false right predictions in Left Model test set')

# Show the plot
plt.show()

--> 504 (Welfare State Expansion), 202 (Democracy), 403 (Market Regulation) are LEFT categories

In [None]:
# looking at examples
pd.set_option('display.max_colwidth', 200)
target_code = 202
df_false_pos[df_false_pos["main_codes"] == target_code].tail(5)

In [None]:
# Testing whether the predictions significantly differ from the real codes

# set up contingency table
contingency_table = pd.DataFrame({#"Group": ["# 501 codes", "# non-501 codes"],
                                  "Model": [df_test[df_test["preds"] == 0].shape[0],
                                            df_test[df_test["preds"] == 1].shape[0],
                                            df_test[df_test["preds"] == 2].shape[0]],
                                  "Coders": [df_test[df_test["label"] == 0].shape[0],
                                             df_test[df_test["label"] == 1].shape[0],
                                             df_test[df_test["label"] == 2].shape[0]]})


contingency_table

In [None]:
chi2_contingency(contingency_table)

Model predictions on test set are significantly different to real labels (more left/right, less neutral)

### Inference Predictions (Right)

In [None]:
print("Inf set accuracy:", accuracy_score(df_inference["label"], df_inference["preds"]))
print("Inf set precision:", precision_score(df_inference["label"], df_inference["preds"], average="macro"))
print("Inf set recall:", recall_score(df_inference["label"], df_inference["preds"], average="macro"))
print("Inf set F1-score:", f1_score(df_inference["label"], df_inference["preds"], average="macro"))

In [None]:
print("RILE distribution in training data:")
df_train["RILE"].value_counts()/df_train.shape[0]

In [None]:
print("RILE distribution in inference (Right) predictions:")
df_inference["preds"].value_counts()/df_inference.shape[0]

In [None]:
print("RILE distribution in inference (Right) real labels:")
df_inference["label"].value_counts()/df_inference.shape[0]

Occurence of right stable, but more left and less neutral

In [None]:
#                    Pred. Neg:   Pred. Pos
#    Real Neg:       True Neg --- False Pos
#    Real Pos:       False Neg --- True Pos
print("                Pred. Neutral   Pred. Left  Pred. Right")
print("Real Neutral:")
print("Real Left:")
print("Real Right:")
print("\nAbsolut confusion matrix\n", confusion_matrix(df_inference["label"], df_inference["preds"]))
#print("Relativ confusion matrix\n", confusion_matrix(df_test["label"], df_test["preds"])/df_test.shape[0])

In [None]:
# Better look at the False Positives: PREDICTED LEFT but REAL RIGHT/CENTER
df_false_pos = df_inference[(df_inference["preds"] == 1) & (df_inference["label"] != 1)]
codes_distributions = df_false_pos["main_codes"].value_counts()/df_false_pos.shape[0]
print(codes_distributions[0:10])

# Create a bar plot
plt.figure(figsize=(7, 4))
codes_distributions[0:5].plot(kind='bar')

# Add labels and title
plt.xlabel('Codes')
plt.ylabel('Frequency')
plt.title('Distribution of real codes for false left predictions in Left Model Inference-Right set')

# Show the plot
plt.show()

In [None]:
# looking at examples
pd.set_option('display.max_colwidth', 200)
target_code = 505
df_false_pos[df_false_pos["main_codes"] == target_code].tail(5)

In [None]:
# Better look at the False Positives: PREDICTED RIGHT but REAL LEFT/CENTER
df_false_pos = df_inference[(df_inference["preds"] == 2) & (df_inference["label"] != 2)]
codes_distributions = df_false_pos["main_codes"].value_counts()/df_false_pos.shape[0]
print(codes_distributions[0:10])

# Create a bar plot
plt.figure(figsize=(7, 4))
codes_distributions[0:5].plot(kind='bar')

# Add labels and title
plt.xlabel('Codes')
plt.ylabel('Frequency')
plt.title('Distribution of real codes for false right predictions in Left Model Inference-Right set')

# Show the plot
plt.show()

Except for 403, rather big changes here... (maybe make graph showing risers/fallers in false positive distributions between test and inference sets?)

In [None]:
# looking at examples
pd.set_option('display.max_colwidth', 200)
target_code = 504
df_false_pos[df_false_pos["main_codes"] == target_code].tail(5)

In [None]:
# Testing whether the predictions significantly differ from the real codes

# set up contingency table
contingency_table = pd.DataFrame({#"Group": ["# 501 codes", "# non-501 codes"],
                                  "Model": [df_inference[df_inference["preds"] == 0].shape[0],
                                            df_inference[df_inference["preds"] == 1].shape[0],
                                            df_inference[df_inference["preds"] == 2].shape[0]],
                                  "Coders": [df_inference[df_inference["label"] == 0].shape[0],
                                             df_inference[df_inference["label"] == 1].shape[0],
                                             df_inference[df_inference["label"] == 2].shape[0]]})


contingency_table

Very close in number of right predictions! But large differences between left and neutral (Maybe count the concrete number going from which to what and compare these between test/inf sets)

In [None]:
chi2_contingency(contingency_table)

So the model predictions are very clearly significantly different that the real predictions!

### Inference Predictions (Center)

In [None]:
print("Inf set accuracy:", accuracy_score(df_inference_center["label"], df_inference_center["preds"]))
print("Inf set precision:", precision_score(df_inference_center["label"], df_inference_center["preds"], average="macro"))
print("Inf set recall:", recall_score(df_inference_center["label"], df_inference_center["preds"], average="macro"))
print("Inf set F1-score:", f1_score(df_inference_center["label"], df_inference_center["preds"], average="macro"))

In [None]:
print("RILE distribution in training data:")
df_train["RILE"].value_counts()/df_train.shape[0]

In [None]:
print("RILE distribution in inference (Center) predictions:")
df_inference_center["preds"].value_counts()/df_inference_center.shape[0]

In [None]:
print("RILE distribution in inference (Center) real labels:")
df_inference_center["label"].value_counts()/df_inference_center.shape[0]

More left predictions, less neutral predictions

In [None]:
#                    Pred. Neg:   Pred. Pos
#    Real Neg:       True Neg --- False Pos
#    Real Pos:       False Neg --- True Pos
print("                Pred. Neutral   Pred. Left  Pred. Right")
print("Real Neutral:")
print("Real Left:")
print("Real Right:")
print("\nAbsolut confusion matrix\n", confusion_matrix(df_inference_center["label"], df_inference_center["preds"]))
#print("Relativ confusion matrix\n", confusion_matrix(df_test["label"], df_test["preds"])/df_test.shape[0])


In [None]:
# Better look at the False Positives: PREDICTED LEFT but REAL RIGHT/CENTER
df_false_pos = df_inference_center[(df_inference_center["preds"] == 1) & (df_inference_center["label"] != 1)]
codes_distributions = df_false_pos["main_codes"].value_counts()/df_false_pos.shape[0]
print(codes_distributions[0:10])

# Create a bar plot
plt.figure(figsize=(7, 4))
codes_distributions[0:5].plot(kind='bar')

# Add labels and title
plt.xlabel('Codes')
plt.ylabel('Frequency')
plt.title('Distribution of real codes for false left predictions in Left Model Inference-Center set')

# Show the plot
plt.show()


In [None]:
# looking at examples
pd.set_option('display.max_colwidth', 200)
target_code = 505
df_false_pos[df_false_pos["main_codes"] == target_code].tail(5)


In [None]:
# Better look at the False Positives: PREDICTED RIGHT but REAL LEFT/CENTER
df_false_pos = df_inference_center[(df_inference_center["preds"] == 2) & (df_inference_center["label"] != 2)]
codes_distributions = df_false_pos["main_codes"].value_counts()/df_false_pos.shape[0]
print(codes_distributions[0:10])

# Create a bar plot
plt.figure(figsize=(7, 4))
codes_distributions[0:5].plot(kind='bar')

# Add labels and title
plt.xlabel('Codes')
plt.ylabel('Frequency')
plt.title('Distribution of real codes for false right predictions in Left Model Inference-Center set')

# Show the plot
plt.show()


In [None]:
# looking at examples
pd.set_option('display.max_colwidth', 200)
target_code = 504
df_false_pos[df_false_pos["main_codes"] == target_code].tail(5)


In [None]:
# Testing whether the predictions significantly differ from the real codes

# set up contingency table
contingency_table = pd.DataFrame({#"Group": ["# 501 codes", "# non-501 codes"],
                                  "Model": [df_inference_center[df_inference_center["preds"] == 0].shape[0],
                                            df_inference_center[df_inference_center["preds"] == 1].shape[0],
                                            df_inference_center[df_inference_center["preds"] == 2].shape[0]],
                                  "Coders": [df_inference_center[df_inference_center["label"] == 0].shape[0],
                                             df_inference_center[df_inference_center["label"] == 1].shape[0],
                                             df_inference_center[df_inference_center["label"] == 2].shape[0]]})


contingency_table


Quite close in number of right predictions! But large differences between left and neutral (Maybe count the concrete number going from which to what and compare these between test/inf sets)


In [None]:
chi2_contingency(contingency_table)

## Unterschiede in den False-Positive Distributions von Test zu Center zu Right (Inference)

## False left predictions:

In [None]:
all_codes = set(df_train["main_codes"].unique())

# test:
tmp = df_test[(df_test["preds"] == 1) & (df_test["label"] != 1)]
false_left_test = tmp["main_codes"].value_counts()/tmp.shape[0]
# add missing codes:
false_left_test = pd.concat([false_left_test, pd.Series(0, index=all_codes-set(false_left_test.index))]).sort_index()

# inference center
tmp = df_inference_center[(df_inference_center["preds"] == 1) & (df_inference_center["label"] != 1)]
false_left_center = tmp["main_codes"].value_counts()/tmp.shape[0]
false_left_center = pd.concat([false_left_center, pd.Series(0, index=all_codes-set(false_left_center.index))]).sort_index()

# inference right
tmp = df_inference[(df_inference["preds"] == 1) & (df_inference["label"] != 1)]
false_left_right = tmp["main_codes"].value_counts()/tmp.shape[0]
false_left_right = pd.concat([false_left_right, pd.Series(0, index=all_codes-set(false_left_right.index))]).sort_index()

In [None]:
# sort by the main codes and calculate the difference (so going from test to center and test to right)
test_to_center = (false_left_center - false_left_test).sort_values(ascending=False)
test_to_right = (false_left_right - false_left_test).sort_values(ascending=False)

In [None]:
test_to_center

In [None]:
test_to_right

In [None]:
interesting_codes = [505, 401, 501, 706]
d_test_selection = false_left_test.loc[interesting_codes]
d_inf_center_selection = false_left_center.loc[interesting_codes]
d_inf_right_selection = false_left_right.loc[interesting_codes]
df_tmp = pd.DataFrame({"Test set (Left manifestos)": d_test_selection*100,
                       "Inference set (Center manifestos)": d_inf_center_selection*100,
                       "Inference set (Right manifestos)": d_inf_right_selection*100})
df_tmp

In [None]:
# Plotting
ax = df_tmp.plot(kind='bar', color=['lightgrey', 'grey', 'black'], figsize=(10, 6))

# Customizing labels and title
ax.set_xlabel('Code')
ax.set_ylabel('Frequency')
ax.set_title('Left model: Frequency of select codes in the false left predictions')

# add % to y axis ticks
ticks = ax.get_yticks()
percent_ticks = [f'{int(t)}%' for t in ticks]
ax.set_yticklabels(percent_ticks)

new_labels = ['505\nWelfare State Limitation', '401\nFree Market Economy:\nPositive',
              '501\nEnvironmental Protection:\nPositive', '706\nNon-Economic\nDemographic Groups:\nPositive']
ax.set_xticklabels(new_labels, rotation=0)

plt.show()

## False Right predictions:

In [None]:
all_codes = set(df_train["main_codes"].unique())

# test:
tmp = df_test[(df_test["preds"] == 2) & (df_test["label"] != 2)]
false_left_test = tmp["main_codes"].value_counts()/tmp.shape[0]
# add missing codes:
false_left_test = pd.concat([false_left_test, pd.Series(0, index=all_codes-set(false_left_test.index))]).sort_index()

# inference center
tmp = df_inference_center[(df_inference_center["preds"] == 2) & (df_inference_center["label"] != 2)]
false_left_center = tmp["main_codes"].value_counts()/tmp.shape[0]
false_left_center = pd.concat([false_left_center, pd.Series(0, index=all_codes-set(false_left_center.index))]).sort_index()

# inference right
tmp = df_inference[(df_inference["preds"] == 2) & (df_inference["label"] != 2)]
false_left_right = tmp["main_codes"].value_counts()/tmp.shape[0]
false_left_right = pd.concat([false_left_right, pd.Series(0, index=all_codes-set(false_left_right.index))]).sort_index()

# sort by the main codes and calculate the difference (so going from test to center and test to right)
test_to_center = (false_left_center - false_left_test).sort_values(ascending=False)
test_to_right = (false_left_right - false_left_test).sort_values(ascending=False)

In [None]:
test_to_center

In [None]:
test_to_right

In [None]:
interesting_codes = [410, 703, 504, 705]
d_test_selection = false_left_test.loc[interesting_codes]
d_inf_center_selection = false_left_center.loc[interesting_codes]
d_inf_right_selection = false_left_right.loc[interesting_codes]
df_tmp = pd.DataFrame({"Test set (Left manifestos)": d_test_selection*100,
                       "Inference set (Center manifestos)": d_inf_center_selection*100,
                       "Inference set (Right manifestos)": d_inf_right_selection*100})
df_tmp

In [None]:
# Plotting
ax = df_tmp.plot(kind='bar', color=['lightgrey', 'grey', 'black'], figsize=(10, 6))

# Customizing labels and title
ax.set_xlabel('Code')
ax.set_ylabel('Frequency')
ax.set_title('Left model: Frequency of select codes in the false right predictions')

# add % to y axis ticks
ticks = ax.get_yticks()
percent_ticks = [f'{int(t)}%' for t in ticks]
ax.set_yticklabels(percent_ticks)

new_labels = ['410\nEconomic Growth', '703\nAgriculture and Farmers',
              '504\nWelfare State Expansion', '705\nMinority Groups:\nPositive']
ax.set_xticklabels(new_labels, rotation=0)

plt.show()