# 01 Predicting Armed Conflict Using Protest Data - Query sets

Content: This notebook is part of the "Predicting Armed Conflict Using Protest Data" paper. It inculdes the specification of the query sets needed for the analysis. The analysis is executed in the "predicting_armed_conflict_using_protest_data_02_analysis" jupyter notebook. Additional transformations are defined in "predicting_armed_conflict_using_protest_data_transforms" .py file.

Last updated: 01.09.2022 (includes 24 queries)

## Overview
* [Importing modules](#modules)
* [Defining query sets](#queries)

## Loading modules<a class="anchor" id="modules"></a>

In [1]:
# Basics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
import geopandas as gpd

# Models
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

# Views 3
from viewser.operations import fetch
from viewser import Queryset, Column
import views_runs
from views_partitioning import data_partitioner, legacy
from stepshift import views
import views_dataviz
from views_runs import storage
from ingester3.config import source_db_path

# Additional transforms from views2
from views_transformation_library.views_2 import ln

# Other packages
import pickle as pkl
from datetime import datetime
import sqlalchemy as sa

  from pandas import MultiIndex, Int64Index


## Views3 overview

In [2]:
!viewser tables list


  > Tables
  ---------------------------------------------------------------------------------------
  | name                                    | path                                    |
  |:----------------------------------------|:----------------------------------------|
  | values_actor_pgm_actor_qlag2_tlag24     | values_actor_pgm_actor_qlag2_tlag24     |
  | values_actor_pgm_actor                  | values_actor_pgm_actor                  |
  | values_actor_pgm_actor_qlag1            | values_actor_pgm_actor_qlag1            |
  | values_actor_pgm_actor_qlag1_persistent | values_actor_pgm_actor_qlag1_persistent |
  | values_actor_pgm_actor_qlag1_tlag24     | values_actor_pgm_actor_qlag1_tlag24     |
  | values_actor_pgm_actor_qlag1_tlag6      | values_actor_pgm_actor_qlag1_tlag6      |
  | values_actor_pgm_actor_qlag2_persistent | values_actor_pgm_actor_qlag2_persistent |
  | values_actor_pgm_actor_persistent       | values_actor_pgm_actor_persistent       |
  | tbl_734eevdem_

## Define querysets<a class="anchor" id="queries"></a>

### Slim baseline 

(Can be removed after revision.)

Features specified in our previous baseline model: 

baseline_model_sb = [
    'tlag_0_ged_dummy_sb', 
    'ln_splag_1_1_ged_best_sb',
    'ln_ged_best_sb',
    'decay_12_ts_ged_dummy_sb'
]

In [3]:
qs = (Queryset("protest_paper_old_baseline_incidence", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 (i.e. tlag 1 in pred framework) of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.ops.ln()
        )
      
    # Decay function
    # 12 months
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
            )
      
    .with_theme("protest_paper")
        .describe("""Protest simple baseline model, pgm level

            Predicting armed conflict (dummy) using protest data, simple- very short - baseline

            """)
    )
df_baseline_slim = qs.publish().fetch()

print(f"A dataset with {len(df_baseline_slim.columns)} columns, with "
      f"data between t = {min(df_baseline_slim.index.get_level_values(0))} "
      f"and {max(df_baseline_slim.index.get_level_values(0))}. "
      f"({len(np.unique(df_baseline_slim.index.get_level_values(1)))} units)"
     )

 .      o      O  A dataset with 5 columns, with data between t = 1 and 852. (13110 units)


### Extended Baseline

In [4]:
!viewser tables show ged2_pgm


  > Ged2_pgm
  -------------------------------------------------------
  | name                    | path                    |
  |:------------------------|:------------------------|
  | priogrid_month_id       | priogrid_month_id       |
  | ged_sb_best_sum_nokgi   | ged_sb_best_sum_nokgi   |
  | ged_ns_best_sum_nokgi   | ged_ns_best_sum_nokgi   |
  | ged_os_best_sum_nokgi   | ged_os_best_sum_nokgi   |
  | ged_sb_best_count_nokgi | ged_sb_best_count_nokgi |
  | ged_ns_best_count_nokgi | ged_ns_best_count_nokgi |
  | ged_os_best_count_nokgi | ged_os_best_count_nokgi |
  | ged_sb_high_sum_nokgi   | ged_sb_high_sum_nokgi   |
  | ged_ns_high_sum_nokgi   | ged_ns_high_sum_nokgi   |
  | ged_os_high_sum_nokgi   | ged_os_high_sum_nokgi   |
  | ged_sb_high_count_nokgi | ged_sb_high_count_nokgi |
  | ged_ns_high_count_nokgi | ged_ns_high_count_nokgi |
  | ged_os_high_count_nokgi | ged_os_high_count_nokgi |
  
  -------------------------------------------------------



In [5]:
qs = (Queryset("protest_paper_extended_baseline_incidence", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.ops.ln()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
            )
      
    # Moving sum over 12 months
    .with_column(Column("ln_mov_sum_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
               .transform.missing.replace_na()            
               .transform.temporal.moving_sum(12)
               .transform.ops.ln()
                    )
      
    # Moving sum over 24 months
    .with_column(Column("ln_mov_sum_24_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
           .transform.missing.replace_na()            
           .transform.temporal.moving_sum(24)
           .transform.ops.ln()
                )
      
      
    .with_theme("protest_paper")
        .describe("""Protest extended baseline model, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )
df_baseline = qs.publish().fetch()

print(f"A dataset with {len(df_baseline.columns)} columns, with "
      f"data between t = {min(df_baseline.index.get_level_values(0))} "
      f"and {max(df_baseline.index.get_level_values(0))}. "
      f"({len(np.unique(df_baseline.index.get_level_values(1)))} units)"
     )

 .      o      O      O      oA dataset with 8 columns, with data between t = 1 and 852. (13110 units)


### Extended Baseline + economic development & political instution models

In [None]:
!viewser tables show wdi_cy

#### Baseline + Economic development, country level

In [None]:
qs = (Queryset("protest_paper_econ_national_bl", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      ### ECONOMIC DEVELOPMENT, Country level
      .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      
    .with_theme("protest_paper")
        .describe("""Protest economic development model (country-level variables) including extended baseline, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )
df_econ_nat_bl = qs.publish().fetch()

print(f"A dataset with {len(df_econ_nat_bl.columns)} columns, with "
      f"data between t = {min(df_econ_nat_bl.index.get_level_values(0))} "
      f"and {max(df_econ_nat_bl.index.get_level_values(0))}. "
      f"({len(np.unique(df_econ_nat_bl.index.get_level_values(1)))} units)"
     )

#### Baseline + Economic development, national and sub-national level

In [None]:
!viewser tables show priogrid_year

In [None]:
qs = (Queryset("protest_paper_econ_full_bl", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na() 
             .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      ### ECONOMIC DEVELOPMENT, Country level
      .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      
      ### ECONOMIC DEVELOPMENT, Sub-national level
      .with_column(Column("pgd_gcp_mer", from_table = "priogrid_year", from_column = "gcp_mer")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_imr_mean", from_table = "priogrid_year", from_column = "imr_mean")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_urban_ih", from_table = "priogrid_year", from_column = "urban_ih")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_agri_ih", from_table = "priogrid_year", from_column = "agri_ih")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
    .with_theme("protest_paper")
        .describe("""Protest economic development model (sub-national variables) including extended baseline and economic development variables on the country level, pgm level
        
            Note that additional transformations need to be implement after the queryset was performed. 

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )
df_econ_full_bl = qs.publish().fetch()

print(f"A dataset with {len(df_econ_full_bl.columns)} columns, with "
      f"data between t = {min(df_econ_full_bl.index.get_level_values(0))} "
      f"and {max(df_econ_full_bl.index.get_level_values(0))}. "
      f"({len(np.unique(df_econ_full_bl.index.get_level_values(1)))} units)"
     )

#### Baseline + Political instiutions model (I)

In [None]:
!viewser tables show vdem_v12_cy

In [None]:
qs = (Queryset("protest_paper_inst_elecdemo_bl", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      # POL. INSTIUTIONS
      .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      .with_theme("protest_paper")
      .describe("""Protest political instiutions I (elecdemo) including extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )
df_inst_elecdemo_bl = qs.publish().fetch()

print(f"A dataset with {len(df_inst_elecdemo_bl.columns)} columns, with "
      f"data between t = {min(df_inst_elecdemo_bl.index.get_level_values(0))} "
      f"and {max(df_inst_elecdemo_bl.index.get_level_values(0))}. "
      f"({len(np.unique(df_inst_elecdemo_bl.index.get_level_values(1)))} units)"
     )

#### Baseline + Political instiutions model (II)

In [None]:
qs = (Queryset("protest_paper_inst_civlib_bl", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
                 .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      # POL. INSTIUTIONS
      # Electoral democracy
      .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      # Civil liberties
      .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      #.with_column(Column("vdem_v2clrgunev_tlag12", from_table = "vdem_v11_cy", from_column = "vdem_v2clrgunev")
                         #.transform.missing.fill()
                         #.transform.temporal.tlag(12)
                         #.transform.missing.fill()
                  #)
      
      
      .with_column(Column("vdem_v2clrgunev_tlag12", from_table = "tbl_734eevdem_v12_cy", from_column = "vdem_v12_v2clrgunev")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                  )
      
      
      
      .with_theme("protest_paper")
      .describe("""Protest political instiutions II (electoral democracy + civil liberties) including extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )
df_inst_civlib_bl = qs.publish().fetch()

print(f"A dataset with {len(df_inst_civlib_bl.columns)} columns, with "
      f"data between t = {min(df_inst_civlib_bl.index.get_level_values(0))} "
      f"and {max(df_inst_civlib_bl.index.get_level_values(0))}. "
      f"({len(np.unique(df_inst_civlib_bl.index.get_level_values(1)))} units)"
     )

#### Baseline + Political instiutions model (III)

In [None]:
!viewser tables show reign_cm

In [None]:
qs = (Queryset("protest_paper_inst_elect_bl", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
                 .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      # POL. INSTIUTIONS
      # Electoral democracy
      .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      # Civil liberties
      .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("vdem_v2clrgunev_tlag12", from_table = "tbl_734eevdem_v12_cy", from_column = "vdem_v12_v2clrgunev")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                  )
      
      # Elections.
      .with_column(Column("lastelection", from_table = "reign_cm", from_column = "lastelection")
                         .transform.missing.replace_na()
                        )
      .with_column(Column("anticipation", from_table = "reign_cm", from_column = "anticipation")
                         .transform.missing.replace_na()
                        )
      
      .with_theme("protest_paper")
      .describe("""Protest political instiutions III (electoral democracy + civil liberties + elections) including extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )
df_inst_elect_bl = qs.publish().fetch()

print(f"A dataset with {len(df_inst_elect_bl.columns)} columns, with "
      f"data between t = {min(df_inst_elect_bl.index.get_level_values(0))} "
      f"and {max(df_inst_elect_bl.index.get_level_values(0))}. "
      f"({len(np.unique(df_inst_elect_bl.index.get_level_values(1)))} units)"
     )

#### Baseline + Political instiutions model (IV)

In [None]:
qs = (Queryset("protest_paper_inst_devi_bl", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
                 .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      ## DEVIATION FEATURES
      
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )

      .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
            )
      
      .with_column(Column("geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
            )
      
      .with_column(Column("splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_ged_os_dummy_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("ln_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                         .transform.ops.ln()
                        )
      
      .with_column(Column("ln_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                         .transform.ops.ln()
                        )
      
      .with_column(Column("ln_geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()     
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_1_ged_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()   
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_2_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,2,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()   
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_2_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,2,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()     
             .transform.ops.ln()
            )
      
      
      .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
            
      .with_theme("protest_paper")
      .describe("""Variables to estimate deviation model model, also including extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )

df_inst_devi_bl = qs.publish().fetch()

print(f"A dataset with {len(df_inst_devi_bl.columns)} columns, with "
      f"data between t = {min(df_inst_devi_bl.index.get_level_values(0))} "
      f"and {max(df_inst_devi_bl.index.get_level_values(0))}. "
      f"({len(np.unique(df_inst_devi_bl.index.get_level_values(1)))} units)"
     )

#### Baseline + Economic development +  Political instiutions model

Overview:
* Elections + Econ dev, national + Baseline
* Elections + Econ dev, full + Baseline
* Deviations + Econ dev, national + Baseline
* Deviations + Econ dev, full + Baseline

##### Baseline + Economic development, country level +  Political instiutions model

In [None]:
qs = (Queryset("protest_paper_elect_econ_national_bl", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
                 .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      ### ECONOMIC DEVELOPMENT, Country level
      .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                  )
      
      # POL. INSTIUTIONS
      # Electoral democracy
      .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      # Civil liberties
      .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      #.with_column(Column("vdem_v2clrgunev_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v2clrgunev")
                         #.transform.missing.fill()
                         #.transform.temporal.tlag(12)
                         #.transform.missing.fill()
                  #)
      
      .with_column(Column("vdem_v2clrgunev_tlag12", from_table = "tbl_734eevdem_v12_cy", from_column = "vdem_v12_v2clrgunev")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                  )
      
      # Elections.
      .with_column(Column("lastelection", from_table = "reign_cm", from_column = "lastelection")
                         .transform.missing.replace_na()
                        )
      .with_column(Column("anticipation", from_table = "reign_cm", from_column = "anticipation")
                         .transform.missing.replace_na()
                        )
      
      .with_theme("protest_paper")
      .describe("""Protest political instiutions III  (electoral democracy + civil liberties + elections) and economic development (national) model including extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )
df_inst_elect_econ_nat_bl = qs.publish().fetch()

print(f"A dataset with {len(df_inst_elect_econ_nat_bl.columns)} columns, with "
      f"data between t = {min(df_inst_elect_econ_nat_bl.index.get_level_values(0))} "
      f"and {max(df_inst_elect_econ_nat_bl.index.get_level_values(0))}. "
      f"({len(np.unique(df_inst_elect_econ_nat_bl.index.get_level_values(1)))} units)"
     )

##### Baseline + Economic development, full +  Political instiutions model

In [None]:
qs = (Queryset("protest_paper_elect_econ_full_bl", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
                 .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      ### ECONOMIC DEVELOPMENT, Country level
      .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                  )
                   
      ### ECONOMIC DEVELOPMENT, Sub-national level
      .with_column(Column("pgd_gcp_mer", from_table = "priogrid_year", from_column = "gcp_mer")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_imr_mean", from_table = "priogrid_year", from_column = "imr_mean")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_urban_ih", from_table = "priogrid_year", from_column = "urban_ih")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_agri_ih", from_table = "priogrid_year", from_column = "agri_ih")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      # POL. INSTIUTIONS
      # Electoral democracy
      .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      # Civil liberties
      .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      #.with_column(Column("vdem_v2clrgunev_tlag12", from_table = "vdem_v11_cy", from_column = "vdem_v2clrgunev")
                         #.transform.missing.fill()
                         #.transform.temporal.tlag(12)
                         #.transform.missing.fill()
                  #)
      
      .with_column(Column("vdem_v2clrgunev_tlag12", from_table = "tbl_734eevdem_v12_cy", from_column = "vdem_v12_v2clrgunev")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                  )
      
      # Elections.
      .with_column(Column("lastelection", from_table = "reign_cm", from_column = "lastelection")
                         .transform.missing.replace_na()
                        )
      .with_column(Column("anticipation", from_table = "reign_cm", from_column = "anticipation")
                         .transform.missing.replace_na()
                        )
      
      .with_theme("protest_paper")
      .describe("""Protest political instiutions III  (electoral democracy + civil liberties + elections) and economic development (full) model including extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )

df_inst_elect_econ_full_bl = qs.publish().fetch()

print(f"A dataset with {len(df_inst_elect_econ_full_bl.columns)} columns, with "
      f"data between t = {min(df_inst_elect_econ_full_bl.index.get_level_values(0))} "
      f"and {max(df_inst_elect_econ_full_bl.index.get_level_values(0))}. "
      f"({len(np.unique(df_inst_elect_econ_full_bl.index.get_level_values(1)))} units)"
     )

##### Baseline + Economic development, country level +  Political instiutions model (deviation)

In [None]:
qs = (Queryset("protest_paper_devi_econ_national_bl", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
                 .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      ### ECONOMIC DEVELOPMENT, Country level
      .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                  )
      
      # POL. INSTIUTIONS
      ## DEVIATION FEATURES
      
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )

      .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
            )
      
      .with_column(Column("geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
            )
      
      .with_column(Column("splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_ged_os_dummy_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("ln_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                         .transform.ops.ln()
                        )
      
      .with_column(Column("ln_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                         .transform.ops.ln()
                        )
      
      .with_column(Column("ln_geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()     
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_1_ged_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()   
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_2_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,2,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()   
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_2_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,2,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()     
             .transform.ops.ln()
            )
      
      
      .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      
      .with_theme("protest_paper")
      .describe("""Protest political instiutions IV  (deviation) and economic development (national) model including extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )
df_inst_devi_econ_nat_bl = qs.publish().fetch()

print(f"A dataset with {len(df_inst_devi_econ_nat_bl.columns)} columns, with "
      f"data between t = {min(df_inst_devi_econ_nat_bl.index.get_level_values(0))} "
      f"and {max(df_inst_devi_econ_nat_bl.index.get_level_values(0))}. "
      f"({len(np.unique(df_inst_devi_econ_nat_bl.index.get_level_values(1)))} units)"
     )

##### Baseline + Economic development, full +  Political instiutions model (deviation)

In [None]:
qs = (Queryset("protest_paper_devi_econ_full_bl", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
                 .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      ### ECONOMIC DEVELOPMENT, Country level
      .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                  )
                   
      ### ECONOMIC DEVELOPMENT, Sub-national level
      .with_column(Column("pgd_gcp_mer", from_table = "priogrid_year", from_column = "gcp_mer")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_imr_mean", from_table = "priogrid_year", from_column = "imr_mean")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_urban_ih", from_table = "priogrid_year", from_column = "urban_ih")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_agri_ih", from_table = "priogrid_year", from_column = "agri_ih")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      # POL. INSTIUTIONS
      # POL. INSTIUTIONS
      ## DEVIATION FEATURES
      
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )

      .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
            )
      
      .with_column(Column("geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
            )
      
      .with_column(Column("splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_ged_os_dummy_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("ln_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                         .transform.ops.ln()
                        )
      
      .with_column(Column("ln_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                         .transform.ops.ln()
                        )
      
      .with_column(Column("ln_geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()     
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_1_ged_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()   
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_2_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,2,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()   
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_2_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,2,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()     
             .transform.ops.ln()
            )
      
      
      .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      
      .with_theme("protest_paper")
      .describe("""Protest political instiutions IV  (deviation) and economic development (full) model including extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )

df_inst_devi_econ_full_bl = qs.publish().fetch()

print(f"A dataset with {len(df_inst_devi_econ_full_bl.columns)} columns, with "
      f"data between t = {min(df_inst_devi_econ_full_bl.index.get_level_values(0))} "
      f"and {max(df_inst_devi_econ_full_bl.index.get_level_values(0))}. "
      f"({len(np.unique(df_inst_devi_econ_full_bl.index.get_level_values(1)))} units)"
     )

### Extendend baseline + Protest models

In [None]:
!viewser tables show acled2_pgm
!viewser tables show acled2_cm

#### Baseline + Naive protest model

In [None]:
qs = (Queryset("protest_paper_pr_naive_bl", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
                 .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      ## PROTEST FEATURES
      
      .with_column(Column("decay_ts_6_acled_pr_dummy", from_table = "acled2_pgm", from_column = "acled_pr_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_pr_dummy", from_table = "acled2_pgm", from_column = "acled_pr_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_pr_count", from_table = "acled2_pgm", from_column = "acled_pr_count")
                         .transform.missing.replace_na()
                        )
      
      .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      .with_theme("protest_paper")
      .describe("""Naive protest model including extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )

df_pr_naive_bl = qs.publish().fetch()

print(f"A dataset with {len(df_pr_naive_bl.columns)} columns, with "
      f"data between t = {min(df_pr_naive_bl.index.get_level_values(0))} "
      f"and {max(df_pr_naive_bl.index.get_level_values(0))}. "
      f"({len(np.unique(df_pr_naive_bl.index.get_level_values(1)))} units)"
     )

#### Baseline + Local dynamic model

selected_categories = ['ex','pe','in','ri']

    for protest in selected_categories: 
        #dynamic_local.append(f"decay_6_ts_acled_dummy_pr{protest}")
        dynamic_local.append(f"ln_tlag_0_acled_pop_pr{protest}")
        dynamic_local.append(f"ln_cumsum_3_acled_pop_pr{protest}")
        #dynamic_local.append(f"decay_6_ts_splag_1_2_acled_dummy_pr{protest}")
        dynamic_local.append(f"ln_tlag_0_splag_1_2_acled_pop_pr{protest}")
        dynamic_local.append(f"ln_cumsum_3_splag_1_2_acled_pop_pr{protest}")
        dynamic_local.append(f"ln_min_dist_3_acled_count_pr{protest}")

    dynamic_local_bl = baseline_model + dynamic_local

In [None]:
qs = (Queryset("protest_paper_pr_dynamic_local_bl", "priogrid_month")

   # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
                 .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      ## PROTEST FEATURES
      
      .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      
            
      .with_theme("protest_paper")
      .describe("""Local dynamic protest model including extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )

df_pr_dynamic_loc_bl = qs.publish().fetch()

print(f"A dataset with {len(df_pr_dynamic_loc_bl.columns)} columns, with "
      f"data between t = {min(df_pr_dynamic_loc_bl.index.get_level_values(0))} "
      f"and {max(df_pr_dynamic_loc_bl.index.get_level_values(0))}. "
      f"({len(np.unique(df_pr_dynamic_loc_bl.index.get_level_values(1)))} units)"
     )

#### Baseline + National dynamic model

dynamic_national = []

    for protest in selected_categories: 
        dynamic_national.append(f"country_decay_6_ts_acled_dummy_pr{protest}")
        dynamic_national.append(f"ln_country_tlag_0_acled_pop_pr{protest}")
        dynamic_national.append(f"ln_country_cumsum_3_acled_pop_pr{protest}")

    print(dynamic_national)

In [None]:
!viewser tables show wdi_cy

In [None]:
qs = (Queryset("protest_paper_pr_dynamic_national_bl", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
                 .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      ## PROTEST FEATURES - Dynamic Local
      
      .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      ## PROTEST FEATURES - Dynamic national
      
      .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      # population
      .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      
            
      .with_theme("protest_paper")
      .describe("""National dynamic protest model including local dynamic model and extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )

df_pr_dynamic_nat_bl = qs.publish().fetch()

print(f"A dataset with {len(df_pr_dynamic_nat_bl.columns)} columns, with "
      f"data between t = {min(df_pr_dynamic_nat_bl.index.get_level_values(0))} "
      f"and {max(df_pr_dynamic_nat_bl.index.get_level_values(0))}. "
      f"({len(np.unique(df_pr_dynamic_nat_bl.index.get_level_values(1)))} units)"
     )

### Full models: extended baseline + protest models + political instiutions + economic models

#### Full protest model + political institutions

##### Full protest model + political institutions model I

In [None]:
qs = (Queryset("protest_paper_pr_elecdemo_bl", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
                 .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      ## PROTEST FEATURES - Dynamic Local
      
      .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      ## PROTEST FEATURES - Dynamic national
      
      .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
       # population
      .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      # POL. INSTIUTIONS
      .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
            
      .with_theme("protest_paper")
      .describe("""National dynamic protest model including local dynamic model, electoral democracy model and extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )

df_pr_elecdemo_bl = qs.publish().fetch()

print(f"A dataset with {len(df_pr_elecdemo_bl.columns)} columns, with "
      f"data between t = {min(df_pr_elecdemo_bl.index.get_level_values(0))} "
      f"and {max(df_pr_elecdemo_bl.index.get_level_values(0))}. "
      f"({len(np.unique(df_pr_elecdemo_bl.index.get_level_values(1)))} units)"
     )

##### Full protest model + political institutions model II

In [None]:
qs = (Queryset("protest_paper_pr_civlib_bl", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
                 .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      ## PROTEST FEATURES - Dynamic Local
      
      .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      ## PROTEST FEATURES - Dynamic national
      
      .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      # population
      .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      # POL. INSTIUTIONS
      .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      # Civil liberties
      .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      #.with_column(Column("vdem_v2clrgunev_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v2clrgunev")
                         #.transform.missing.fill()
                         #.transform.temporal.tlag(12)
                         #.transform.missing.fill()
                  #)
      
      .with_column(Column("vdem_v2clrgunev_tlag12", from_table = "tbl_734eevdem_v12_cy", from_column = "vdem_v12_v2clrgunev")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                  )
            
      .with_theme("protest_paper")
      .describe("""National dynamic protest model including local dynamic model, civil liberties and elecdemo model and extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )

df_pr_civlib_bl = qs.publish().fetch()

print(f"A dataset with {len(df_pr_civlib_bl.columns)} columns, with "
      f"data between t = {min(df_pr_civlib_bl.index.get_level_values(0))} "
      f"and {max(df_pr_civlib_bl.index.get_level_values(0))}. "
      f"({len(np.unique(df_pr_civlib_bl.index.get_level_values(1)))} units)"
     )

##### Full protest model + political institutions model III

In [None]:
qs = (Queryset("protest_paper_pr_elect_bl", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
                 .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      ## PROTEST FEATURES - Dynamic Local
      
      .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      ## PROTEST FEATURES - Dynamic national
      
      .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      # population
      .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      # POL. INSTIUTIONS
      # Electoral democracy.
      .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      # Civil liberties
      .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      #.with_column(Column("vdem_v2clrgunev_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v2clrgunev")
                         #.transform.missing.fill()
                         #.transform.temporal.tlag(12)
                         #.transform.missing.fill()
                  #)
      
      .with_column(Column("vdem_v2clrgunev_tlag12", from_table = "tbl_734eevdem_v12_cy", from_column = "vdem_v12_v2clrgunev")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                  )
      
      # Elections.
      .with_column(Column("lastelection", from_table = "reign_cm", from_column = "lastelection")
                         .transform.missing.replace_na()
                        )
      .with_column(Column("anticipation", from_table = "reign_cm", from_column = "anticipation")
                         .transform.missing.replace_na()
                        )
            
      .with_theme("protest_paper")
      .describe("""National dynamic protest model including local dynamic model, election, civil liberties and electoral democracy model and extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )

df_pr_elect_bl = qs.publish().fetch()

print(f"A dataset with {len(df_pr_elect_bl.columns)} columns, with "
      f"data between t = {min(df_pr_elect_bl.index.get_level_values(0))} "
      f"and {max(df_pr_elect_bl.index.get_level_values(0))}. "
      f"({len(np.unique(df_pr_elect_bl.index.get_level_values(1)))} units)"
     )

##### Full protest model + political institutions model IV

In [None]:
qs = (Queryset("protest_paper_pr_devi_bl", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
                 .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      ## PROTEST FEATURES - Dynamic Local
      
      .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      ## PROTEST FEATURES - Dynamic national
      
      .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      # population
      .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      # POL. INSTIUTIONS
      # Deviation
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )

      .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
            )
      
      .with_column(Column("geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
            )
      
      .with_column(Column("splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_ged_os_dummy_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("ln_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                         .transform.ops.ln()
                        )
      
      .with_column(Column("ln_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                         .transform.ops.ln()
                        )
      
      .with_column(Column("ln_geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()     
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_1_ged_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()   
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_2_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,2,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()   
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_2_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,2,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()     
             .transform.ops.ln()
            )
      
      .with_theme("protest_paper")
      .describe("""National dynamic protest model including local dynamic model, deviation model and extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )

df_pr_devi_bl = qs.publish().fetch()

print(f"A dataset with {len(df_pr_devi_bl.columns)} columns, with "
      f"data between t = {min(df_pr_devi_bl.index.get_level_values(0))} "
      f"and {max(df_pr_devi_bl.index.get_level_values(0))}. "
      f"({len(np.unique(df_pr_devi_bl.index.get_level_values(1)))} units)"
     )

In [None]:
qs = (Queryset("protest_paper_pr_devi_bl_01", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
                 .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      ## PROTEST FEATURES - Dynamic Local
      
      .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      ## PROTEST FEATURES - Dynamic national
      
      .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      # population
      .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      .with_theme("protest_paper")
      .describe("""First haf of query: national dynamic protest model including local dynamic model, deviation model and extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )

df_pr_devi_bl_01 = qs.publish().fetch()

print(f"A dataset with {len(df_pr_devi_bl_01.columns)} columns, with "
      f"data between t = {min(df_pr_devi_bl_01.index.get_level_values(0))} "
      f"and {max(df_pr_devi_bl_01.index.get_level_values(0))}. "
      f"({len(np.unique(df_pr_devi_bl_01.index.get_level_values(1)))} units)"
     )

In [None]:
qs = (Queryset("protest_paper_pr_devi_bl_02", "priogrid_month")
      
      # POL. INSTIUTIONS
      # Deviation
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )

      .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
            )
      
      .with_column(Column("geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
            )
      
      .with_column(Column("splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_ged_os_dummy_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("ln_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                         .transform.ops.ln()
                        )
      
      .with_column(Column("ln_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                         .transform.ops.ln()
                        )
      
      .with_column(Column("ln_geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()     
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_1_ged_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()   
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_2_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,2,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()   
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_2_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,2,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()     
             .transform.ops.ln()
            )
      
      .with_theme("protest_paper")
      .describe("""Second half of query: National dynamic protest model including local dynamic model, deviation model and extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )

df_pr_devi_bl_02 = qs.publish().fetch()

print(f"A dataset with {len(df_pr_devi_bl_02.columns)} columns, with "
      f"data between t = {min(df_pr_devi_bl_02.index.get_level_values(0))} "
      f"and {max(df_pr_devi_bl_02.index.get_level_values(0))}. "
      f"({len(np.unique(df_pr_devi_bl_02.index.get_level_values(1)))} units)"
     )

#### Full protest model + economic development

##### Full protest model + economic development, country level

In [None]:
qs = (Queryset("protest_paper_pr_econ_national_bl", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
                 .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      ## PROTEST FEATURES - Dynamic Local
      
      .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      ## PROTEST FEATURES - Dynamic national
      
      .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      # population
      .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      ### ECONOMIC DEVELOPMENT, Country level
      .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      
            
      .with_theme("protest_paper")
      .describe("""National dynamic protest model including local dynamic model, economic delveopment (natonal) and extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )

df_pr_econ_national_bl = qs.publish().fetch()

print(f"A dataset with {len(df_pr_econ_national_bl.columns)} columns, with "
      f"data between t = {min(df_pr_econ_national_bl.index.get_level_values(0))} "
      f"and {max(df_pr_econ_national_bl.index.get_level_values(0))}. "
      f"({len(np.unique(df_pr_econ_national_bl.index.get_level_values(1)))} units)"
     )

##### Full protest model + economic development, full (country and subnational level)

In [None]:
qs = (Queryset("protest_paper_pr_econ_full_bl", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
                 .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      ## PROTEST FEATURES - Dynamic Local
      
      .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      ## PROTEST FEATURES - Dynamic national
      
      .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      # population
      .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      ### ECONOMIC DEVELOPMENT, Country level
      .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      
      ### ECONOMIC DEVELOPMENT, Sub-national level
      .with_column(Column("pgd_gcp_mer", from_table = "priogrid_year", from_column = "gcp_mer")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_imr_mean", from_table = "priogrid_year", from_column = "imr_mean")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_urban_ih", from_table = "priogrid_year", from_column = "urban_ih")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_agri_ih", from_table = "priogrid_year", from_column = "agri_ih")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
            
      .with_theme("protest_paper")
      .describe("""National dynamic protest model including local dynamic model, full economic delveopment (national and subnational) and extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )

df_pr_econ_full_bl = qs.publish().fetch()

print(f"A dataset with {len(df_pr_econ_full_bl.columns)} columns, with "
      f"data between t = {min(df_pr_econ_full_bl.index.get_level_values(0))} "
      f"and {max(df_pr_econ_full_bl.index.get_level_values(0))}. "
      f"({len(np.unique(df_pr_econ_full_bl.index.get_level_values(1)))} units)"
     )

#### Full protest model + economic development + political instiutions

##### Full protest model + political institutions III + economic development (country level)

In [None]:
qs = (Queryset("protest_paper_pr_elect_econ_national_bl", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
                 .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      ## PROTEST FEATURES - Dynamic Local
      
      .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      ## PROTEST FEATURES - Dynamic national
      
      .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      # population
      .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      ### ECONOMIC DEVELOPMENT, Country level
      .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      
      # POL. INSTIUTIONS
      # Electoral democracy.
      .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      # Civil liberties
      .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      #.with_column(Column("vdem_v2clrgunev_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v2clrgunev")
                         #.transform.missing.fill()
                         #.transform.temporal.tlag(12)
                         #.transform.missing.fill()
                  #)
      
      .with_column(Column("vdem_v2clrgunev_tlag12", from_table = "tbl_734eevdem_v12_cy", from_column = "vdem_v12_v2clrgunev")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                  )
      
      # Elections.
      .with_column(Column("lastelection", from_table = "reign_cm", from_column = "lastelection")
                         .transform.missing.replace_na()
                        )
      .with_column(Column("anticipation", from_table = "reign_cm", from_column = "anticipation")
                         .transform.missing.replace_na()
                        )
      
            
      .with_theme("protest_paper")
      .describe("""National dynamic protest model including local dynamic model, economic delveopment (national), political institutions III (full) and extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )

df_pr_elect_econ_national_bl = qs.publish().fetch()

print(f"A dataset with {len(df_pr_elect_econ_national_bl.columns)} columns, with "
      f"data between t = {min(df_pr_elect_econ_national_bl.index.get_level_values(0))} "
      f"and {max(df_pr_elect_econ_national_bl.index.get_level_values(0))}. "
      f"({len(np.unique(df_pr_elect_econ_national_bl.index.get_level_values(1)))} units)"
     )

##### Full protest model + political institutions III + full economic development (country and subnational level)

In [None]:
qs = (Queryset("protest_paper_pr_elect_econ_full_bl", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
                 .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      ## PROTEST FEATURES - Dynamic Local
      
      .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      ## PROTEST FEATURES - Dynamic national
      
      .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      # population
      .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      ### ECONOMIC DEVELOPMENT, Country level
      .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      
      ### ECONOMIC DEVELOPMENT, Sub-national level
      .with_column(Column("pgd_gcp_mer", from_table = "priogrid_year", from_column = "gcp_mer")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_imr_mean", from_table = "priogrid_year", from_column = "imr_mean")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_urban_ih", from_table = "priogrid_year", from_column = "urban_ih")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_agri_ih", from_table = "priogrid_year", from_column = "agri_ih")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      # POL. INSTIUTIONS
      # Electoral democracy.
      .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      # Civil liberties
      .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      #.with_column(Column("vdem_v2clrgunev_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v2clrgunev")
                         #.transform.missing.fill()
                         #.transform.temporal.tlag(12)
                         #.transform.missing.fill()
                  #)
      
      .with_column(Column("vdem_v2clrgunev_tlag12", from_table = "tbl_734eevdem_v12_cy", from_column = "vdem_v12_v2clrgunev")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                  )
      
      # Elections.
      .with_column(Column("lastelection", from_table = "reign_cm", from_column = "lastelection")
                         .transform.missing.replace_na()
                        )
      .with_column(Column("anticipation", from_table = "reign_cm", from_column = "anticipation")
                         .transform.missing.replace_na()
                        )
      
            
      .with_theme("protest_paper")
      .describe("""National dynamic protest model including local dynamic model, full economic delveopment (national, sub-national), political institutions (full) and extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )

df_pr_elect_econ_full_bl = qs.publish().fetch()

print(f"A dataset with {len(df_pr_elect_econ_full_bl.columns)} columns, with "
      f"data between t = {min(df_pr_elect_econ_full_bl.index.get_level_values(0))} "
      f"and {max(df_pr_elect_econ_full_bl.index.get_level_values(0))}. "
      f"({len(np.unique(df_pr_elect_econ_full_bl.index.get_level_values(1)))} units)"
     )

##### Full protest model + political institutions IV + economic development (country level)

In [None]:
qs = (Queryset("protest_paper_pr_devi_econ_national_bl", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
                 .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      ## PROTEST FEATURES - Dynamic Local
      
      .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      ## PROTEST FEATURES - Dynamic national
      
      .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      # population
      .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      ### ECONOMIC DEVELOPMENT, Country level
      .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      
      # POL. INSTIUTIONS
      # Deviation
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )

      .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
            )
      
      .with_column(Column("geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
            )
      
      .with_column(Column("splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_ged_os_dummy_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("ln_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                         .transform.ops.ln()
                        )
      
      .with_column(Column("ln_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                         .transform.ops.ln()
                        )
      
      .with_column(Column("ln_geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()     
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_1_ged_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()   
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_2_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,2,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()   
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_2_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,2,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()     
             .transform.ops.ln()
            )
      
      
      
            
      .with_theme("protest_paper")
      .describe("""National dynamic protest model including local dynamic model, economic delveopment (national), political institutions IV (deviation) and extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )

df_pr_devi_econ_national_bl = qs.publish().fetch()

print(f"A dataset with {len(df_pr_devi_econ_national_bl.columns)} columns, with "
      f"data between t = {min(df_pr_devi_econ_national_bl.index.get_level_values(0))} "
      f"and {max(df_pr_devi_econ_national_bl.index.get_level_values(0))}. "
      f"({len(np.unique(df_pr_devi_econ_national_bl.index.get_level_values(1)))} units)"
     )

In [None]:
qs = (Queryset("protest_paper_pr_devi_econ_national_bl_01", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
                 .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      ## PROTEST FEATURES - Dynamic Local
      
      .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      ## PROTEST FEATURES - Dynamic national
      
      .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      # population
      .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      ### ECONOMIC DEVELOPMENT, Country level
      .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      
      
      
            
      .with_theme("protest_paper")
      .describe("""First half of query: National dynamic protest model including local dynamic model, economic delveopment (national), political institutions IV (deviation) and extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )

df_pr_devi_econ_national_bl_01 = qs.publish().fetch()

print(f"A dataset with {len(df_pr_devi_econ_national_bl_01.columns)} columns, with "
      f"data between t = {min(df_pr_devi_econ_national_bl_01.index.get_level_values(0))} "
      f"and {max(df_pr_devi_econ_national_bl_01.index.get_level_values(0))}. "
      f"({len(np.unique(df_pr_devi_econ_national_bl_01.index.get_level_values(1)))} units)"
     )

In [None]:
qs = (Queryset("protest_paper_pr_devi_econ_national_bl_02", "priogrid_month")

      
      # POL. INSTIUTIONS
      # Deviation
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )

      .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
            )
      
      .with_column(Column("geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
            )
      
      .with_column(Column("splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_ged_os_dummy_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("ln_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                         .transform.ops.ln()
                        )
      
      .with_column(Column("ln_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                         .transform.ops.ln()
                        )
      
      .with_column(Column("ln_geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()     
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_1_ged_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()   
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_2_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,2,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()   
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_2_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,2,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()     
             .transform.ops.ln()
            )
      
      
            
      .with_theme("protest_paper")
      .describe("""Second half of query: National dynamic protest model including local dynamic model, economic delveopment (national), political institutions IV (deviation) and extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )

df_pr_devi_econ_national_bl_02 = qs.publish().fetch()

print(f"A dataset with {len(df_pr_devi_econ_national_bl_02.columns)} columns, with "
      f"data between t = {min(df_pr_devi_econ_national_bl_02.index.get_level_values(0))} "
      f"and {max(df_pr_devi_econ_national_bl_02.index.get_level_values(0))}. "
      f"({len(np.unique(df_pr_devi_econ_national_bl_02.index.get_level_values(1)))} units)"
     )

##### Full protest model + political institutions III +full economic development (country and sub-national level)

In [None]:
qs = (Queryset("protest_paper_pr_devi_econ_full_bl", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
                 .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      ## PROTEST FEATURES - Dynamic Local
      
      .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      ## PROTEST FEATURES - Dynamic national
      
      .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      # population
      .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      ### ECONOMIC DEVELOPMENT, Country level
      .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      
      ### ECONOMIC DEVELOPMENT, Sub-national level
      .with_column(Column("pgd_gcp_mer", from_table = "priogrid_year", from_column = "gcp_mer")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_imr_mean", from_table = "priogrid_year", from_column = "imr_mean")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_urban_ih", from_table = "priogrid_year", from_column = "urban_ih")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_agri_ih", from_table = "priogrid_year", from_column = "agri_ih")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      # POL. INSTIUTIONS
      # Deviation
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )

      .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
            )
      
      .with_column(Column("geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
            )
      
      .with_column(Column("splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_ged_os_dummy_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("ln_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                         .transform.ops.ln()
                        )
      
      .with_column(Column("ln_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                         .transform.ops.ln()
                        )
      
      .with_column(Column("ln_geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()     
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_1_ged_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()   
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_2_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,2,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()   
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_2_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,2,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()     
             .transform.ops.ln()
            )
      
            
      .with_theme("protest_paper")
      .describe("""National dynamic protest model including local dynamic model, full economic delveopment (national, sub-national), political institutions IV (deviation) and extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )

df_pr_devi_econ_full_bl = qs.publish().fetch()

print(f"A dataset with {len(df_pr_devi_econ_full_bl.columns)} columns, with "
      f"data between t = {min(df_pr_devi_econ_full_bl.index.get_level_values(0))} "
      f"and {max(df_pr_devi_econ_full_bl.index.get_level_values(0))}. "
      f"({len(np.unique(df_pr_devi_econ_full_bl.index.get_level_values(1)))} units)"
     )

In [None]:
qs = (Queryset("protest_paper_pr_devi_econ_full_bl_01", "priogrid_month")

    # target variable
    .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of target variable
    .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
        .transform.missing.replace_na()
        .transform.bool.gte(1)
        )
      
    # timelag 0 of fatalities
    .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.ops.ln()
         .transform.missing.fill()
        )
      
    # Decay function
    ## 12 months 
    .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(12)
         .transform.missing.fill()
        )
      
     ## 24 months 
    .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
         .transform.missing.replace_na()
         .transform.bool.gte(1)
         .transform.temporal.time_since()
         .transform.temporal.decay(24)
         .transform.missing.fill()
        )
   
    # Spatial lag function
    .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
                 .transform.missing.fill()
            )
      
      # Decay of spatial lag
    .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )
      
      # Moving average over 24 months
      .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                         .transform.ops.ln()
                         .transform.missing.replace_na()
                         .transform.temporal.moving_average(24)
                         .transform.missing.fill()
                        )
      
      ## PROTEST FEATURES - Dynamic Local
      
      .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.spatial.lag(1,2,0,0)
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      ## PROTEST FEATURES - Dynamic national
      
      .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                         .transform.bool.gte(1)
                         .transform.temporal.time_since()
                         .transform.temporal.decay(6)
                         .transform.missing.fill()
                        )
      
      # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                         .transform.missing.replace_na()
                        )
      # peaceful protest (interaction codes: 60, 66, or 67.)
      .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                        )
      # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
      .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                         .transform.missing.replace_na()
                        )
      # protest with riots (interaction codes: inter 1 or 2 has 5)
      .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                         .transform.missing.replace_na()
                        )
      
      
      
            
      .with_theme("protest_paper")
      .describe("""National dynamic protest model including local dynamic model, full economic delveopment (national, sub-national), political institutions IV (deviation) and extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )

df_pr_devi_econ_full_bl_01 = qs.publish().fetch()

print(f"A dataset with {len(df_pr_devi_econ_full_bl_01.columns)} columns, with "
      f"data between t = {min(df_pr_devi_econ_full_bl_01.index.get_level_values(0))} "
      f"and {max(df_pr_devi_econ_full_bl_01.index.get_level_values(0))}. "
      f"({len(np.unique(df_pr_devi_econ_full_bl_01.index.get_level_values(1)))} units)"
     )

In [None]:
qs = (Queryset("protest_paper_pr_devi_econ_full_bl_02", "priogrid_month")

      
      # population
      .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      ### ECONOMIC DEVELOPMENT, Country level
      .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                         .transform.missing.fill()
                        )
      
      ### ECONOMIC DEVELOPMENT, Sub-national level
      .with_column(Column("pgd_gcp_mer", from_table = "priogrid_year", from_column = "gcp_mer")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_imr_mean", from_table = "priogrid_year", from_column = "imr_mean")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_urban_ih", from_table = "priogrid_year", from_column = "urban_ih")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_agri_ih", from_table = "priogrid_year", from_column = "agri_ih")
                         .transform.missing.fill()
                         .transform.missing.replace_na()
                        )
      .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                         .transform.missing.fill()
                         .transform.missing.extrapolate()
                         .transform.missing.replace_na()
                        )
      
      # POL. INSTIUTIONS
      # Deviation
      .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )

      .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                         .transform.missing.fill()
                         .transform.temporal.tlag(12)
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                        )
      
      .with_column(Column("geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
            )
      
      .with_column(Column("geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
            )
      
      .with_column(Column("splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_ged_os_dummy_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("splag_1_1_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()      
            )
      
      .with_column(Column("ln_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                         .transform.ops.ln()
                        )
      
      .with_column(Column("ln_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                         .transform.missing.replace_na()
                         .transform.temporal.tlag(1)
                         .transform.missing.replace_na()
                         .transform.missing.fill()
                         .transform.ops.ln()
                        )
      
      .with_column(Column("ln_geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()     
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_1_ged_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,1,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()   
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_2_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,2,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()   
             .transform.ops.ln()
            )
      
      .with_column(Column("ln_splag_1_2_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
             .transform.missing.replace_na()
             .transform.spatial.lag(1,2,0,0)
             .transform.missing.replace_na()
             .transform.temporal.tlag(1)
             .transform.missing.replace_na()
             .transform.missing.fill()     
             .transform.ops.ln()
            )
      
            
      .with_theme("protest_paper")
      .describe("""National dynamic protest model including local dynamic model, full economic delveopment (national, sub-national), political institutions IV (deviation) and extended baseline variables, pgm level

            Predicting armed conflict (dummy) using protest data, extended baseline

            """)
    )

df_pr_devi_econ_full_bl_02 = qs.publish().fetch()

print(f"A dataset with {len(df_pr_devi_econ_full_bl_02.columns)} columns, with "
      f"data between t = {min(df_pr_devi_econ_full_bl_02.index.get_level_values(0))} "
      f"and {max(df_pr_devi_econ_full_bl_02.index.get_level_values(0))}. "
      f"({len(np.unique(df_pr_devi_econ_full_bl_02.index.get_level_values(1)))} units)"
     )