In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Democracy/mdb_all.csv')

In [None]:
print("--- ALL COLUMNS ---")
print(list(df.columns))

In [None]:
print("\n--- SAMPLE DATA ---")
display(df.head())

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer

In [None]:
# =SETUP: Define our Subjective Target vs. Objective Features
TARGET_COL = 'v2x_polyarchy' # V-Dem Democracy Score (0 to 1)

In [None]:
FEATURE_COLS = [
    'partylose',            # Did the incumbent party lose? (1=Yes, 0=No)
    'eiec',                 # Executive Electoral Competitiveness (1-7 scale)
    'liec',                 # Legislative Electoral Competitiveness (1-7 scale)
    'journalists_imprison', # Count of jailed journalists
    'termlimit.x',          # Existence of term limits
    'oppvote',              # Opposition vote share
    'year'                  # Time trend
]

In [None]:
df_model = df[[TARGET_COL, 'country_name'] + FEATURE_COLS].copy()

In [None]:
df_model = df_model.dropna(subset=[TARGET_COL])

In [None]:
imputer = SimpleImputer(strategy='median')
X_full = df_model[FEATURE_COLS]
X_imputed = pd.DataFrame(imputer.fit_transform(X_full), columns=FEATURE_COLS)

In [None]:
df_clean = pd.concat([
    df_model[[TARGET_COL, 'country_name']].reset_index(drop=True),
    X_imputed.reset_index(drop=True)
], axis=1)

In [None]:
# THE "TIME TRAVEL" SPLIT
# Train on the "Good Times" (1990-2010)
# Test on the "Backsliding Era" (2011-2020)
train_data = df_clean[(df_clean['year'] >= 1990) & (df_clean['year'] <= 2010)]
test_data = df_clean[df_clean['year'] > 2010].copy()

In [None]:
print(f"Training on {len(train_data)} country-years (1990-2010)...")
print(f"Testing on {len(test_data)} country-years (2011+)...")

In [None]:
# TRAIN THE MODEL
rf = RandomForestRegressor(n_estimators=100, random_state=42, max_depth=10)
rf.fit(train_data[FEATURE_COLS], train_data[TARGET_COL])

In [None]:
# PREDICT & CALCULATE THE "PANIC GAP"
# We predict what the democracy score *should* be based on 1990s standards
test_data['predicted_score_objective'] = rf.predict(test_data[FEATURE_COLS])

In [None]:
# Calculate the Residual (Gap)
# Gap = Predicted (Objective) - Actual (Subjective)
# Positive Gap = Experts are giving lower scores than the objective facts justify.
test_data['subjectivity_gap'] = test_data['predicted_score_objective'] - test_data[TARGET_COL]

In [None]:
# ISUALIZE THE GLOBAL TREND
plt.figure(figsize=(12, 6))

# Plot the average gap per year
sns.lineplot(data=test_data, x='year', y='subjectivity_gap',
             color='red', linewidth=3, label='Coder Bias (Objective - Subjective)')

plt.axhline(0, color='black', linestyle='--', label='No Bias')
plt.title('The "Subjectivity Gap": Are Experts Being Harsher?', fontsize=16)
plt.ylabel('Score Discrepancy (Points)', fontsize=12)
plt.xlabel('Year', fontsize=12)
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# 7. IDENTIFY THE OUTLIERS (2019 Data)
# Who are the experts "punishing" the most?
print("\n--- TOP 10 COUNTRIES WITH 'HARSH' RATINGS (2019) ---")
print("Positive Gap = Model thinks country is more democratic than V-Dem says")
latest_data = test_data[test_data['year'] == 2019]
print(latest_data.sort_values('subjectivity_gap', ascending=False)
      [['country_name', 'v2x_polyarchy', 'predicted_score_objective', 'subjectivity_gap']]
      .head(10).to_string(index=False))

In [None]:
import geopandas as gpd

In [None]:
df_map_source = pd.read_csv('/content/drive/MyDrive/Democracy/mdb_all.csv')
iso_lookup = df_map_source[['country_name', 'country_text_id']].drop_duplicates()

In [None]:
map_data = test_data.merge(iso_lookup, on='country_name', how='left')

In [None]:
latest_year_data = map_data[map_data['year'] == 2019].copy()

In [None]:
url = "https://naciscdn.org/naturalearth/110m/cultural/ne_110m_admin_0_countries.zip"

In [None]:
world = gpd.read_file(url)

In [None]:
world['iso_a3'] = world['ADM0_A3']

In [None]:
world_merged = world.merge(latest_year_data, left_on='iso_a3', right_on='country_text_id', how='left')

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(20, 10))

# 2. Plot the Map on that Canvas ('ax')
world_merged.plot(column='subjectivity_gap',
                  ax=ax,
                  legend=True,
                  cmap='coolwarm',
                  missing_kwds={'color': '#f0f0f0'}, # Light grey for missing data
                  legend_kwds={'label': "Subjectivity Gap (Red = Experts Too Harsh)",
                               'orientation': "horizontal"})

# 3. Add Titles and Save
plt.title('The "Subjectivity Gap": Where do Experts Disagree with Objective Data? (2019)', fontsize=20)
ax.set_axis_off()
plt.savefig('subjectivity_map.png', dpi=300, bbox_inches='tight')
plt.show()