In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, recall_score, f1_score, confusion_matrix, precision_score
from scipy.stats import chi2_contingency
import matplotlib.pyplot as plt
import seaborn as sns

### Analysis of the Green Model

In [None]:
corpus_df = pd.read_csv("data/english_annotated_full_df.csv")

In [None]:
df_test = pd.read_csv("data/model_splits/green_split/green_as_train/green_test_predictions.csv")
df_inference = pd.read_csv("data/model_splits/green_split/green_as_train/green_inference_predictions.csv")
df_train = pd.read_csv("data/model_splits/green_split/green_as_train/train-00000-of-00001.csv")

In [None]:
df_test.shape

In [None]:
df_inference.shape

In [None]:
df_train.shape

In [None]:
df_test.head(1)

## Test predictions (green): How well does the model perform? Are the predictions significantly different than the actual codes regarding environmental protection?

In [None]:
# model training graph:
val_f1s = [0.6430, 0.6784, 0.6810, 0.6644, 0.6455, 0.6777,  0.6700, 0.6644, 0.6552, 0.6667,
           0.6473, 0.6265, 0.6655, 0.6259, 0.6395, 0.6661, 0.6706, 0.6562, 0.6626, 0.6632]
val_loss = [0.4035, 0.4631, 0.5046, 0.7762, 0.8961, 0.8864, 0.8025, 1.1034, 1.1414, 1.2555,
            1.2923, 1.3150, 1.1883, 1.3822, 1.4309, 1.3541, 1.3666, 1.4396, 1.4340, 1.4371]
epochs = range(1,21)

fig, ax1 = plt.subplots()

# Plotting the first dataset with left y-axis
ax1.plot(epochs, val_f1s, 'g-')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('F1 Score (binary)', color='g')

# Creating a second y-axis with shared x-axis
ax2 = ax1.twinx()
ax2.plot(epochs, val_loss, 'b-')
ax2.set_ylabel('Validation Loss', color='b')

# Setting x-axis ticks every two steps
ax1.set_xticks(range(0, len(epochs)+1, 2))
ax2.set_xticks(range(0, len(epochs)+1, 2))

plt.title('Validation F1-Score and Validation Loss\nfor Green Party Model training')
plt.show()

In [None]:
print("Test set accuracy:", accuracy_score(df_test["label"], df_test["preds"]))
print("Test set precision:", precision_score(df_test["label"], df_test["preds"]))
print("Test set recall:", recall_score(df_test["label"], df_test["preds"]))
print("Test set F1-score:", f1_score(df_test["label"], df_test["preds"]))

In [None]:
print("Percentage of target code in the training data:", df_train[df_train["green_code"] == 1].shape[0]/df_train.shape[0])
print("Percentage of target code in real codes:", df_test[df_test["label"] == 1].shape[0]/df_test.shape[0])
print("Percentage of target code in predicted codes:", df_test[df_test["preds"] == 1].shape[0]/df_test.shape[0])

In [None]:
#                    Pred. Neg:   Pred. Pos
#    Real Neg:       True Neg --- False Pos
#    Real Pos:       False Neg --- True Pos
print("[[True Neg -- False Pos]\n[ False Neg -- True Pos]]")
print("\nAbsolut confusion matrix\n", confusion_matrix(df_test["label"], df_test["preds"]))
#print("Relativ confusion matrix\n", confusion_matrix(df_test["label"], df_test["preds"])/df_test.shape[0])

In [None]:
# Better look at the False Positives: What real codes do they have?
df_false_pos = df_test[(df_test["preds"] == 1) & (df_test["label"] == 0)]
codes_distributions = df_false_pos["main_codes"].value_counts()/df_false_pos.shape[0]
codes_distributions

In [None]:
# Create a bar plot
plt.figure(figsize=(7, 4))
codes_distributions[0:5].plot(kind='bar')

# Add labels and title
plt.xlabel('Codes')
plt.ylabel('Frequency')
plt.title('Distribution of real codes for false positives in Green Model test set')

# Show the plot
plt.show()

In [None]:
# looking at examples
pd.set_option('display.max_colwidth', 80)
target_code = 411
df_false_pos[df_false_pos["main_codes"] == target_code].head(5)

In [None]:
# Testing whether the predictions significantly differ from the real codes

# set up contingency table
contingency_table = pd.DataFrame({#"Group": ["# 501 codes", "# non-501 codes"],
                                  "Model": [df_test[df_test["preds"] == 0].shape[0],
                                            df_test[df_test["preds"] == 1].shape[0]],
                                  "Coders": [df_test[df_test["label"] == 0].shape[0],
                                             df_test[df_test["label"] == 1].shape[0]]})


contingency_table

In [None]:
chi2_contingency(contingency_table)

This shows: the model creates predictions that are very similarly distributed compared to the original codes, as we would have expected.

### Inference Predictions (green)

In [None]:
print("Test set accuracy:", accuracy_score(df_inference["label"], df_inference["preds"]))
print("Test set precision:", precision_score(df_inference["label"], df_inference["preds"]))
print("Test set recall:", recall_score(df_inference["label"], df_inference["preds"]))
print("Test set F1-score:", f1_score(df_inference["label"], df_inference["preds"]))

In [None]:
print("Percentage of target code in the training data:", df_train[df_train["green_code"] == 1].shape[0]/df_train.shape[0])
print("Percentage of target code in real codes:", df_inference[df_inference["label"] == 1].shape[0]/df_inference.shape[0])
print("Percentage of target code in predicted codes:", df_inference[df_inference["preds"] == 1].shape[0]/df_inference.shape[0])

In [None]:
#                    Pred. Neg:   Pred. Pos
#    Real Neg:       True Neg --- False Pos
#    Real Pos:       False Neg --- True Pos
print("[[True Neg -- False Pos]\n[ False Neg -- True Pos]]")
print("\nAbsolut confusion matrix\n", confusion_matrix(df_inference["label"], df_inference["preds"]))
#print("Relativ confusion matrix\n", confusion_matrix(df_test["label"], df_test["preds"])/df_test.shape[0])

In [None]:
# This now has a lot of false positives (as we would expect)
# Better look at the False Positives: What real codes do they have?
df_false_pos = df_inference[(df_inference["preds"] == 1) & (df_inference["label"] == 0)]
codes_distributions = df_false_pos["main_codes"].value_counts()/df_false_pos.shape[0]
codes_distributions

In [None]:
# Create a bar plot
plt.figure(figsize=(7, 4))
codes_distributions[0:5].plot(kind='bar')

# Add labels and title
plt.xlabel('Codes')
plt.ylabel('Frequency')
plt.title('Distribution of real codes for false positives in Green Model inference set')

# Show the plot
plt.show()

In [None]:
# looking at examples
pd.set_option('display.max_colwidth', 80)
target_code = 703
df_false_pos[df_false_pos["main_codes"] == target_code].head(5)

In [None]:
# Testing whether the predictions significantly differ from the real codes

# set up contingency table
contingency_table = pd.DataFrame({#"Group": ["# 501 codes", "# non-501 codes"],
                                  "Model": [df_inference[df_inference["preds"] == 0].shape[0],
                                            df_inference[df_inference["preds"] == 1].shape[0]],
                                  "Coders": [df_inference[df_inference["label"] == 0].shape[0],
                                             df_inference[df_inference["label"] == 1].shape[0]]})


contingency_table

In [None]:
chi2_contingency(contingency_table)

So the model predictions are very clearly significantly different that the real predictions!

In [None]:
# graph showing change in false positive predictions
all_codes = set(corpus_df["main_codes"].unique())

df_false_pos = df_test[(df_test["preds"] == 1) & (df_test["label"] == 0)]
d_test = df_false_pos["main_codes"].value_counts()/df_false_pos.shape[0]
# add missing codes:
d_test = pd.concat([d_test, pd.Series(0, index=all_codes-set(d_test.index))]).sort_index()

df_false_pos = df_inference[(df_inference["preds"] == 1) & (df_inference["label"] == 0)]
d_inf = df_false_pos["main_codes"].value_counts()/df_false_pos.shape[0]
# add missing codes:
d_inf = pd.concat([d_inf, pd.Series(0, index=all_codes-set(d_inf.index))]).sort_index()

# difference going from test (green) to inf (non-green)
d_diff = (d_inf - d_test).sort_values()*100

In [None]:
d_diff

In [None]:
interesting_codes = [416, 703]
d_test_selection = d_test.loc[interesting_codes]
d_inf_selection = d_inf.loc[interesting_codes]
df_tmp = pd.DataFrame({"Test set (Green manifestos)": d_test_selection*100, "Inference set (non-Green manifestos)": d_inf_selection*100})

In [None]:
df_tmp

In [None]:
# Plotting
ax = df_tmp.plot(kind='bar', color=['darkgreen', 'grey'], figsize=(10, 6))

# Customizing labels and title
ax.set_xlabel('Code')
ax.set_ylabel('Frequency')
ax.set_title('Green model: Frequency of select codes in the false positives')

# add % to y axis ticks
ticks = ax.get_yticks()
percent_ticks = [f'{int(t)}%' for t in ticks]
ax.set_yticklabels(percent_ticks)

new_labels = ['416\nAnti Growth Economy: Positive', '703\nAgriculture and Farmers']
ax.set_xticklabels(new_labels, rotation=0)

plt.show()

## Now the same thing, but for the NonGreen Model

In [None]:
# model training graph:
val_f1s = [0.7055, 0.7137, 0.7095, 0.7247, 0.6759, 0.7188, 0.7112, 0.7202, 0.7147, 0.7049,
           0.7059, 0.7013, 0.6979, 0.7301, 0.7189, 0.7112, 0.7099, 0.7217, 0.7269, 0.7215]
val_loss = [0.0767, 0.0995, 0.1216, 0.1275, 0.1521, 0.1643, 0.1660, 0.1855, 0.1845, 0.2165,
            0.2271, 0.2345, 0.2636, 0.2493, 0.2563, 0.2856, 0.2809, 0.3033, 0.3207, 0.3214]
epochs = range(1,21)

fig, ax1 = plt.subplots()

# Plotting the first dataset with left y-axis
ax1.plot(epochs, val_f1s, 'g-')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('F1 Score (binary)', color='g')

# Creating a second y-axis with shared x-axis
ax2 = ax1.twinx()
ax2.plot(epochs, val_loss, 'b-')
ax2.set_ylabel('Validation Loss', color='b')

# Setting x-axis ticks every two steps
ax1.set_xticks(range(0, len(epochs)+1, 2))
ax2.set_xticks(range(0, len(epochs)+1, 2))

plt.title('Validation F1-Score and Validation Loss\nfor non-Green Party Model training')
plt.show()

In [None]:
df_test = pd.read_csv("data/model_splits/green_split/non_green_as_train/nongreen_test_predictions.csv")
df_inference = pd.read_csv("data/model_splits/green_split/non_green_as_train/nongreen_inference_predictions.csv")
df_train = pd.read_csv("data/model_splits/green_split/non_green_as_train/train-00000-of-00001.csv")


In [None]:
df_test.shape

In [None]:
df_inference.shape

In [None]:
df_train.shape

In [None]:
df_test.head(1)

### Test predictions (NonGreen): How well does the model perform? Are the predictions significantly different than the actual codes regarding environmental protection?


In [None]:
print("Test set accuracy:", accuracy_score(df_test["label"], df_test["preds"]))
print("Test set precision:", precision_score(df_test["label"], df_test["preds"]))
print("Test set recall:", recall_score(df_test["label"], df_test["preds"]))
print("Test set F1-score:", f1_score(df_test["label"], df_test["preds"]))

In [None]:
print("Percentage of target code in the training data:", df_train[df_train["green_code"] == 1].shape[0]/df_train.shape[0])
print("Percentage of target code in real codes:", df_test[df_test["label"] == 1].shape[0]/df_test.shape[0])
print("Percentage of target code in predicted codes:", df_test[df_test["preds"] == 1].shape[0]/df_test.shape[0])

In [None]:
#                    Pred. Neg:   Pred. Pos
#    Real Neg:       True Neg --- False Pos
#    Real Pos:       False Neg --- True Pos
print("[[True Neg -- False Pos]\n[ False Neg -- True Pos]]")
print("\nAbsolut confusion matrix\n", confusion_matrix(df_test["label"], df_test["preds"]))
#print("Relativ confusion matrix\n", confusion_matrix(df_test["label"], df_test["preds"])/df_test.shape[0])

In [None]:
# Better look at the False Positives: What real codes do they have?
df_false_pos = df_test[(df_test["preds"] == 1) & (df_test["label"] == 0)]
codes_distributions = df_false_pos["main_codes"].value_counts()/df_false_pos.shape[0]
codes_distributions

In [None]:
# Create a bar plot
plt.figure(figsize=(7, 4))
codes_distributions[0:5].plot(kind='bar')

# Add labels and title
plt.xlabel('Codes')
plt.ylabel('Frequency')
plt.title('Distribution of real codes for false positives in Non-Green Model test set')

# Show the plot
plt.show()

In [None]:
# looking at examples
pd.set_option('display.max_colwidth', 80)
target_code = 411
df_false_pos[df_false_pos["main_codes"] == target_code].head(5)

In [None]:
# Testing whether the predictions significantly differ from the real codes

# set up contingency table
contingency_table = pd.DataFrame({#"Group": ["# 501 codes", "# non-501 codes"],
                                  "Model": [df_test[df_test["preds"] == 0].shape[0],
                                            df_test[df_test["preds"] == 1].shape[0]],
                                  "Coders": [df_test[df_test["label"] == 0].shape[0],
                                             df_test[df_test["label"] == 1].shape[0]]})


contingency_table

Our model overpredicts 501 already here.

In [None]:
chi2_contingency(contingency_table)

So here the model does differ significantly from the predictions, but not an insane amount.

### Inference predictions (Non Green)

In [None]:
print("Inference set accuracy:", accuracy_score(df_inference["label"], df_inference["preds"]))
print("Inference set precision:", precision_score(df_inference["label"], df_inference["preds"]))
print("Inference set recall:", recall_score(df_inference["label"], df_inference["preds"]))
print("Inference set F1-score:", f1_score(df_inference["label"], df_inference["preds"]))

In [None]:
print("Percentage of target code in the training data:", df_train[df_train["green_code"] == 1].shape[0]/df_train.shape[0])
print("Percentage of target code in real codes:", df_inference[df_inference["label"] == 1].shape[0]/df_inference.shape[0])
print("Percentage of target code in predicted codes:", df_inference[df_inference["preds"] == 1].shape[0]/df_inference.shape[0])

Very interesting that our model overpredicts in this case as well. We would definitely expect it to underpredict (if coders give less 501 codes to Non-Green parties)

In [None]:
#                    Pred. Neg:   Pred. Pos
#    Real Neg:       True Neg --- False Pos
#    Real Pos:       False Neg --- True Pos
print("[[True Neg -- False Pos]\n[ False Neg -- True Pos]]")
print("\nAbsolut confusion matrix\n", confusion_matrix(df_inference["label"], df_inference["preds"]))
#print("Relativ confusion matrix\n", confusion_matrix(df_test["label"], df_test["preds"])/df_test.shape[0])

Looking at the False Negatives (need to look at the actual texts to find patterns) might be good!

In [None]:
# Better look at the False Positives: What real codes do they have?
df_false_pos = df_inference[(df_inference["preds"] == 1) & (df_inference["label"] == 0)]
codes_distributions = df_false_pos["main_codes"].value_counts()/df_false_pos.shape[0]
codes_distributions

In [None]:
# Create a bar plot
plt.figure(figsize=(7, 4))
codes_distributions[0:5].plot(kind='bar')

# Add labels and title
plt.xlabel('Codes')
plt.ylabel('Frequency')
plt.title('Distribution of real codes for false positives in Green Model inference set')

# Show the plot
plt.show()

416 (anti-growth/sustainability): likely coded as 501 more often when party is Green!

In [None]:
# looking at examples
pd.set_option('display.max_colwidth', 80)
target_code = 416
df_false_pos[df_false_pos["main_codes"] == target_code].head(5)

In [None]:
# Testing whether the predictions significantly differ from the real codes

# set up contingency table
contingency_table = pd.DataFrame({#"Group": ["# 501 codes", "# non-501 codes"],
                                  "Model": [df_inference[df_inference["preds"] == 0].shape[0],
                                            df_inference[df_inference["preds"] == 1].shape[0]],
                                  "Coders": [df_inference[df_inference["label"] == 0].shape[0],
                                             df_inference[df_inference["label"] == 1].shape[0]]})


contingency_table

We are predicting MORE environmental codes compared to the coders. This is not what we would expect tbh...

In [None]:
chi2_contingency(contingency_table)

This is less significantly different than when comparing to the test set, also not what we would expect.

In [None]:
# graph showing change in false positive predictions
all_codes = set(corpus_df["main_codes"].unique())

df_false_pos = df_test[(df_test["preds"] == 1) & (df_test["label"] == 0)]
d_test = df_false_pos["main_codes"].value_counts()/df_false_pos.shape[0]
# add missing codes:
d_test = pd.concat([d_test, pd.Series(0, index=all_codes-set(d_test.index))]).sort_index()

df_false_pos = df_inference[(df_inference["preds"] == 1) & (df_inference["label"] == 0)]
d_inf = df_false_pos["main_codes"].value_counts()/df_false_pos.shape[0]
# add missing codes:
d_inf = pd.concat([d_inf, pd.Series(0, index=all_codes-set(d_inf.index))]).sort_index()

# difference going from test (non-green) to inf (green)
d_diff = (d_inf - d_test).sort_values()*100

In [None]:
d_diff

In [None]:
interesting_codes = [416, 703]
d_test_selection = d_test.loc[interesting_codes]
d_inf_selection = d_inf.loc[interesting_codes]
df_tmp = pd.DataFrame({"Test set (non-Green manifestos)": d_test_selection*100, "Inference set (Green manifestos)": d_inf_selection*100})

In [None]:
# Plotting
ax = df_tmp.plot(kind='bar', color=['grey', 'darkgreen'], figsize=(10, 6))

# Customizing labels and title
ax.set_xlabel('Code')
ax.set_ylabel('Frequency')
ax.set_title('Non-Green model: Frequency of select codes in the false positives')

# add % to y axis ticks
ticks = ax.get_yticks()
percent_ticks = [f'{int(t)}%' for t in ticks]
ax.set_yticklabels(percent_ticks)

new_labels = ['416\nAnti Growth Economy: Positive', '703\nAgriculture and Farmers']
ax.set_xticklabels(new_labels, rotation=0)

plt.show()

### Left party model

In [None]:
# model training graph:
val_f1s = [0.7050, 0.7093, 0.7035, 0.7061, 0.7004, 0.6936, 0.7046, 0.6979, 0.7019, 0.7046,
           0.7030, 0.7085, 0.6997, 0.7046, 0.7032, 0.7036, 0.7038, 0.7072, 0.7051, 0.7066]
val_loss = [0.6205, 0.7307, 0.8032, 1.0851, 1.3747, 1.7668, 1.7892, 2.0678, 2.2576, 2.2779,
            2.4922, 2.6629, 2.7028, 2.7458, 2.7886, 2.9100, 2.8942, 2.9562, 3.0273, 3.0530]
epochs = range(1,21)

fig, ax1 = plt.subplots()

# Plotting the first dataset with left y-axis
ax1.plot(epochs, val_f1s, 'g-')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('F1 Score (macro)', color='g')

# Creating a second y-axis with shared x-axis
ax2 = ax1.twinx()
ax2.plot(epochs, val_loss, 'b-')
ax2.set_ylabel('Validation Loss', color='b')

# Setting x-axis ticks every two steps
ax1.set_xticks(range(0, len(epochs)+1, 2))
ax2.set_xticks(range(0, len(epochs)+1, 2))

plt.title('Validation F1-Score and Validation Loss\nfor Left Party Model training')
plt.show()

### Right party model

In [None]:
# model training graph:
val_f1s = [0.7832, 0.8012, 0.8108, 0.8148, 0.8045, 0.8170, 0.8123, 0.8187, 0.8165, 0.8121,
           0.8155, 0.8216, 0.8128, 0.8247, 0.8271, 0.8251, 0.8234, 0.8218, 0.8218, 0.8243]
val_loss = [0.5295, 0.5219, 0.6031, 0.7936, 1.0773, 1.1831, 1.3600, 1.4785, 1.6175, 1.6854,
            1.6336, 1.6960, 1.8910, 1.8448, 1.8517, 1.9199, 1.9848, 2.0593, 2.0637, 2.0698]
epochs = range(1,21)

fig, ax1 = plt.subplots()

# Plotting the first dataset with left y-axis
ax1.plot(epochs, val_f1s, 'g-')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('F1 Score (macro)', color='g')

# Creating a second y-axis with shared x-axis
ax2 = ax1.twinx()
ax2.plot(epochs, val_loss, 'b-')
ax2.set_ylabel('Validation Loss', color='b')

# Setting x-axis ticks every two steps
ax1.set_xticks(range(0, len(epochs)+1, 2))
ax2.set_xticks(range(0, len(epochs)+1, 2))

plt.title('Validation F1-Score and Validation Loss\nfor Right Party Model training')
plt.show()