# 01 Predicting Armed Conflict Using Protest Data - Query sets & data management

<b>Notebook Description:</b> This notebook is part of the replication files for the article "Predicting Armed Conflict Using Protest Data." The first part includes specifying the query sets to fetch data from the database. The second part loads the query sets, applies additional transformations and saves the data to be used in the analysis. The main analysis and evaluation is executed in the "predicting_armed_conflict_using_protest_data_02_analysis" Jupyter notebook.

<b>Note:</b> To run this notebook, you need to install viewser and have access to the database.

## Overview
* [Importing modules](#modules)
* [Setting up folder structure](#folders)
* [Defining query sets](#defineq)
* [Loading queries](#loadq)
* [Apply transformations](#applytransnforms)
* [Filter only relevant features](#filterfeats)
* [Export data](#savedf)

## Loading modules<a class="anchor" id="modules"></a>

In [None]:
# Basics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
import geopandas as gpd
import os

# Views 3
from viewser.operations import fetch
from viewser import Queryset, Column
import views_runs
from views_partitioning import data_partitioner, legacy
from stepshift import views
import views_dataviz
from views_runs import storage
from ingester3.config import source_db_path

# Additional transforms from views2
from views_transformation_library.views_2 import ln

# Other packages
import pickle as pkl
from datetime import datetime
import sqlalchemy as sa

# Local py files
import predicting_armed_conflict_using_protest_data_models as organize 

## Set up folder structure<a class="anchor" id="folders"></a>

In [None]:
username = 'username'

In [None]:
# Define path.
folder_path = f'/Users/{username}/Dropbox (ViEWS)/Protest article replication'  # Change path
print('Folder path:', folder_path)

if not os.path.isdir(folder_path):
    os.makedirs(folder_path)

# Set up directory for output
folder_path = os.path.join(folder_path, '{sub}')

# Define output paths
output_paths = {
    'data':folder_path.format(sub=f'data'),

}

# Create new folders if they do not already exist.
for k, v in output_paths.items():
    if not os.path.isdir(v):
        os.makedirs(v)

## Views3 overview

In [None]:
!viewser tables list

## Define querysets<a class="anchor" id="defineq"></a>

In [None]:
update_queries = False

### Baseline

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_old_baseline_incidence", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 (i.e. tlag 1 in pred framework) of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.ops.ln()
            )

        # Decay function
        # 12 months
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                )

        .with_theme("protest_paper")
            .describe("""Protest simple baseline model, pgm level

                Predicting armed conflict (dummy) using protest data, simple- very short - baseline

                """)
        )
    df_baseline_slim = qs.publish().fetch()

    print(f"A dataset with {len(df_baseline_slim.columns)} columns, with "
          f"data between t = {min(df_baseline_slim.index.get_level_values(0))} "
          f"and {max(df_baseline_slim.index.get_level_values(0))}. "
          f"({len(np.unique(df_baseline_slim.index.get_level_values(1)))} units)"
         )

### Baseline + economic development & political instution models

In [None]:
!viewser tables show wdi_cy

#### Baseline + Economic development, country level

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_econ_national_bl", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          ### ECONOMIC DEVELOPMENT, Country level
          .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )

        .with_theme("protest_paper")
            .describe("""Protest economic development model (country-level variables) including extended baseline, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )
    df_econ_nat_bl = qs.publish().fetch()

    print(f"A dataset with {len(df_econ_nat_bl.columns)} columns, with "
          f"data between t = {min(df_econ_nat_bl.index.get_level_values(0))} "
          f"and {max(df_econ_nat_bl.index.get_level_values(0))}. "
          f"({len(np.unique(df_econ_nat_bl.index.get_level_values(1)))} units)"
         )

#### Baseline + Economic development, national and sub-national level

In [None]:
!viewser tables show priogrid_year

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_econ_full_bl", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na() 
                 .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          ### ECONOMIC DEVELOPMENT, Country level
          .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )

          ### ECONOMIC DEVELOPMENT, Sub-national level
          .with_column(Column("pgd_gcp_mer", from_table = "priogrid_year", from_column = "gcp_mer")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_imr_mean", from_table = "priogrid_year", from_column = "imr_mean")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_urban_ih", from_table = "priogrid_year", from_column = "urban_ih")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_agri_ih", from_table = "priogrid_year", from_column = "agri_ih")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

        .with_theme("protest_paper")
            .describe("""Protest economic development model (sub-national variables) including extended baseline and economic development variables on the country level, pgm level

                Note that additional transformations need to be implement after the queryset was performed. 

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )
    df_econ_full_bl = qs.publish().fetch()

    print(f"A dataset with {len(df_econ_full_bl.columns)} columns, with "
          f"data between t = {min(df_econ_full_bl.index.get_level_values(0))} "
          f"and {max(df_econ_full_bl.index.get_level_values(0))}. "
          f"({len(np.unique(df_econ_full_bl.index.get_level_values(1)))} units)"
         )

#### Baseline + Political instiutions model (I)

In [None]:
!viewser tables show vdem_v12_cy

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_inst_elecdemo_bl", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          # POL. INSTIUTIONS
          .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          .with_theme("protest_paper")
          .describe("""Protest political instiutions I (elecdemo) including extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )
    df_inst_elecdemo_bl = qs.publish().fetch()

    print(f"A dataset with {len(df_inst_elecdemo_bl.columns)} columns, with "
          f"data between t = {min(df_inst_elecdemo_bl.index.get_level_values(0))} "
          f"and {max(df_inst_elecdemo_bl.index.get_level_values(0))}. "
          f"({len(np.unique(df_inst_elecdemo_bl.index.get_level_values(1)))} units)"
         )

#### Baseline + Political instiutions model (II)

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_inst_civlib_bl", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                     .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          # POL. INSTIUTIONS
          # Electoral democracy
          .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          # Civil liberties
          .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          #.with_column(Column("vdem_v2clrgunev_tlag12", from_table = "vdem_v11_cy", from_column = "vdem_v2clrgunev")
                             #.transform.missing.fill()
                             #.transform.temporal.tlag(12)
                             #.transform.missing.fill()
                      #)


          .with_column(Column("vdem_v2clrgunev_tlag12", from_table = "tbl_734eevdem_v12_cy", from_column = "vdem_v12_v2clrgunev")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                      )



          .with_theme("protest_paper")
          .describe("""Protest political instiutions II (electoral democracy + civil liberties) including extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )
    df_inst_civlib_bl = qs.publish().fetch()

    print(f"A dataset with {len(df_inst_civlib_bl.columns)} columns, with "
          f"data between t = {min(df_inst_civlib_bl.index.get_level_values(0))} "
          f"and {max(df_inst_civlib_bl.index.get_level_values(0))}. "
          f"({len(np.unique(df_inst_civlib_bl.index.get_level_values(1)))} units)"
         )

#### Baseline + Political instiutions model (III)

In [None]:
!viewser tables show reign_cm

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_inst_elect_bl", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                     .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          # POL. INSTIUTIONS
          # Electoral democracy
          .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          # Civil liberties
          .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          .with_column(Column("vdem_v2clrgunev_tlag12", from_table = "tbl_734eevdem_v12_cy", from_column = "vdem_v12_v2clrgunev")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                      )

          # Elections.
          .with_column(Column("lastelection", from_table = "reign_cm", from_column = "lastelection")
                             .transform.missing.replace_na()
                            )
          .with_column(Column("anticipation", from_table = "reign_cm", from_column = "anticipation")
                             .transform.missing.replace_na()
                            )

          .with_theme("protest_paper")
          .describe("""Protest political instiutions III (electoral democracy + civil liberties + elections) including extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )
    df_inst_elect_bl = qs.publish().fetch()

    print(f"A dataset with {len(df_inst_elect_bl.columns)} columns, with "
          f"data between t = {min(df_inst_elect_bl.index.get_level_values(0))} "
          f"and {max(df_inst_elect_bl.index.get_level_values(0))}. "
          f"({len(np.unique(df_inst_elect_bl.index.get_level_values(1)))} units)"
         )

#### Baseline + Political instiutions model (IV)

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_inst_devi_bl", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                     .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          ## DEVIATION FEATURES

          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                )

          .with_column(Column("geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                )

          .with_column(Column("splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_ged_os_dummy_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("ln_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                             .transform.ops.ln()
                            )

          .with_column(Column("ln_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                             .transform.ops.ln()
                            )

          .with_column(Column("ln_geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                 .transform.ops.ln()
                )

          .with_column(Column("ln_geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()     
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_1_ged_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()   
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_2_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,2,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()   
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_2_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,2,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()     
                 .transform.ops.ln()
                )


          .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

          .with_theme("protest_paper")
          .describe("""Variables to estimate deviation model model, also including extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )

    df_inst_devi_bl = qs.publish().fetch()

    print(f"A dataset with {len(df_inst_devi_bl.columns)} columns, with "
          f"data between t = {min(df_inst_devi_bl.index.get_level_values(0))} "
          f"and {max(df_inst_devi_bl.index.get_level_values(0))}. "
          f"({len(np.unique(df_inst_devi_bl.index.get_level_values(1)))} units)"
         )

#### Baseline + Economic development +  Political instiutions model

Overview:
* Elections + Econ dev, national + Baseline
* Elections + Econ dev, full + Baseline
* Deviations + Econ dev, national + Baseline
* Deviations + Econ dev, full + Baseline

##### Baseline + Economic development, country level +  Political instiutions model

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_elect_econ_national_bl", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                     .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          ### ECONOMIC DEVELOPMENT, Country level
          .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                      )

          # POL. INSTIUTIONS
          # Electoral democracy
          .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          # Civil liberties
          .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          #.with_column(Column("vdem_v2clrgunev_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v2clrgunev")
                             #.transform.missing.fill()
                             #.transform.temporal.tlag(12)
                             #.transform.missing.fill()
                      #)

          .with_column(Column("vdem_v2clrgunev_tlag12", from_table = "tbl_734eevdem_v12_cy", from_column = "vdem_v12_v2clrgunev")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                      )

          # Elections.
          .with_column(Column("lastelection", from_table = "reign_cm", from_column = "lastelection")
                             .transform.missing.replace_na()
                            )
          .with_column(Column("anticipation", from_table = "reign_cm", from_column = "anticipation")
                             .transform.missing.replace_na()
                            )

          .with_theme("protest_paper")
          .describe("""Protest political instiutions III  (electoral democracy + civil liberties + elections) and economic development (national) model including extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )
    df_inst_elect_econ_nat_bl = qs.publish().fetch()

    print(f"A dataset with {len(df_inst_elect_econ_nat_bl.columns)} columns, with "
          f"data between t = {min(df_inst_elect_econ_nat_bl.index.get_level_values(0))} "
          f"and {max(df_inst_elect_econ_nat_bl.index.get_level_values(0))}. "
          f"({len(np.unique(df_inst_elect_econ_nat_bl.index.get_level_values(1)))} units)"
         )

##### Baseline + Economic development, full +  Political instiutions model

In [None]:
if update_queries:  
    qs = (Queryset("protest_paper_elect_econ_full_bl", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                     .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          ### ECONOMIC DEVELOPMENT, Country level
          .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                      )

          ### ECONOMIC DEVELOPMENT, Sub-national level
          .with_column(Column("pgd_gcp_mer", from_table = "priogrid_year", from_column = "gcp_mer")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_imr_mean", from_table = "priogrid_year", from_column = "imr_mean")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_urban_ih", from_table = "priogrid_year", from_column = "urban_ih")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_agri_ih", from_table = "priogrid_year", from_column = "agri_ih")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

          # POL. INSTIUTIONS
          # Electoral democracy
          .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          # Civil liberties
          .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          #.with_column(Column("vdem_v2clrgunev_tlag12", from_table = "vdem_v11_cy", from_column = "vdem_v2clrgunev")
                             #.transform.missing.fill()
                             #.transform.temporal.tlag(12)
                             #.transform.missing.fill()
                      #)

          .with_column(Column("vdem_v2clrgunev_tlag12", from_table = "tbl_734eevdem_v12_cy", from_column = "vdem_v12_v2clrgunev")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                      )

          # Elections.
          .with_column(Column("lastelection", from_table = "reign_cm", from_column = "lastelection")
                             .transform.missing.replace_na()
                            )
          .with_column(Column("anticipation", from_table = "reign_cm", from_column = "anticipation")
                             .transform.missing.replace_na()
                            )

          .with_theme("protest_paper")
          .describe("""Protest political instiutions III  (electoral democracy + civil liberties + elections) and economic development (full) model including extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )

    df_inst_elect_econ_full_bl = qs.publish().fetch()

    print(f"A dataset with {len(df_inst_elect_econ_full_bl.columns)} columns, with "
          f"data between t = {min(df_inst_elect_econ_full_bl.index.get_level_values(0))} "
          f"and {max(df_inst_elect_econ_full_bl.index.get_level_values(0))}. "
          f"({len(np.unique(df_inst_elect_econ_full_bl.index.get_level_values(1)))} units)"
         )

##### Baseline + Economic development, country level +  Political instiutions model (deviation)

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_devi_econ_national_bl", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                     .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          ### ECONOMIC DEVELOPMENT, Country level
          .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                      )

          # POL. INSTIUTIONS
          ## DEVIATION FEATURES

          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                )

          .with_column(Column("geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                )

          .with_column(Column("splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_ged_os_dummy_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("ln_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                             .transform.ops.ln()
                            )

          .with_column(Column("ln_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                             .transform.ops.ln()
                            )

          .with_column(Column("ln_geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                 .transform.ops.ln()
                )

          .with_column(Column("ln_geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()     
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_1_ged_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()   
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_2_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,2,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()   
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_2_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,2,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()     
                 .transform.ops.ln()
                )


          .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )


          .with_theme("protest_paper")
          .describe("""Protest political instiutions IV  (deviation) and economic development (national) model including extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )
    df_inst_devi_econ_nat_bl = qs.publish().fetch()

    print(f"A dataset with {len(df_inst_devi_econ_nat_bl.columns)} columns, with "
          f"data between t = {min(df_inst_devi_econ_nat_bl.index.get_level_values(0))} "
          f"and {max(df_inst_devi_econ_nat_bl.index.get_level_values(0))}. "
          f"({len(np.unique(df_inst_devi_econ_nat_bl.index.get_level_values(1)))} units)"
         )

##### Baseline + Economic development, full +  Political instiutions model (deviation)

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_devi_econ_full_bl", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                     .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          ### ECONOMIC DEVELOPMENT, Country level
          .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                      )

          ### ECONOMIC DEVELOPMENT, Sub-national level
          .with_column(Column("pgd_gcp_mer", from_table = "priogrid_year", from_column = "gcp_mer")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_imr_mean", from_table = "priogrid_year", from_column = "imr_mean")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_urban_ih", from_table = "priogrid_year", from_column = "urban_ih")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_agri_ih", from_table = "priogrid_year", from_column = "agri_ih")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

          # POL. INSTIUTIONS
          # POL. INSTIUTIONS
          ## DEVIATION FEATURES

          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                )

          .with_column(Column("geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                )

          .with_column(Column("splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_ged_os_dummy_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("ln_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                             .transform.ops.ln()
                            )

          .with_column(Column("ln_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                             .transform.ops.ln()
                            )

          .with_column(Column("ln_geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                 .transform.ops.ln()
                )

          .with_column(Column("ln_geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()     
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_1_ged_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()   
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_2_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,2,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()   
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_2_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,2,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()     
                 .transform.ops.ln()
                )


          .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )


          .with_theme("protest_paper")
          .describe("""Protest political instiutions IV  (deviation) and economic development (full) model including extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )

    df_inst_devi_econ_full_bl = qs.publish().fetch()

    print(f"A dataset with {len(df_inst_devi_econ_full_bl.columns)} columns, with "
          f"data between t = {min(df_inst_devi_econ_full_bl.index.get_level_values(0))} "
          f"and {max(df_inst_devi_econ_full_bl.index.get_level_values(0))}. "
          f"({len(np.unique(df_inst_devi_econ_full_bl.index.get_level_values(1)))} units)"
         )

### Extendend baseline + Protest models

In [None]:
!viewser tables show acled2_pgm
!viewser tables show acled2_cm

#### Baseline + Naive protest model

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_pr_naive_bl", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                     .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          ## PROTEST FEATURES

          .with_column(Column("decay_ts_6_acled_pr_dummy", from_table = "acled2_pgm", from_column = "acled_pr_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_pr_dummy", from_table = "acled2_pgm", from_column = "acled_pr_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_pr_count", from_table = "acled2_pgm", from_column = "acled_pr_count")
                             .transform.missing.replace_na()
                            )

          .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

          .with_theme("protest_paper")
          .describe("""Naive protest model including extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )

    df_pr_naive_bl = qs.publish().fetch()

    print(f"A dataset with {len(df_pr_naive_bl.columns)} columns, with "
          f"data between t = {min(df_pr_naive_bl.index.get_level_values(0))} "
          f"and {max(df_pr_naive_bl.index.get_level_values(0))}. "
          f"({len(np.unique(df_pr_naive_bl.index.get_level_values(1)))} units)"
         )

#### Baseline + Local dynamic model

selected_categories = ['ex','pe','in','ri']

    for protest in selected_categories: 
        #dynamic_local.append(f"decay_6_ts_acled_dummy_pr{protest}")
        dynamic_local.append(f"ln_tlag_0_acled_pop_pr{protest}")
        dynamic_local.append(f"ln_cumsum_3_acled_pop_pr{protest}")
        #dynamic_local.append(f"decay_6_ts_splag_1_2_acled_dummy_pr{protest}")
        dynamic_local.append(f"ln_tlag_0_splag_1_2_acled_pop_pr{protest}")
        dynamic_local.append(f"ln_cumsum_3_splag_1_2_acled_pop_pr{protest}")
        dynamic_local.append(f"ln_min_dist_3_acled_count_pr{protest}")

    dynamic_local_bl = baseline_model + dynamic_local

In [None]:
if update_queries:  
    qs = (Queryset("protest_paper_pr_dynamic_local_bl", "priogrid_month")

       # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                     .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          ## PROTEST FEATURES

          .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )



          .with_theme("protest_paper")
          .describe("""Local dynamic protest model including extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )

    df_pr_dynamic_loc_bl = qs.publish().fetch()

    print(f"A dataset with {len(df_pr_dynamic_loc_bl.columns)} columns, with "
          f"data between t = {min(df_pr_dynamic_loc_bl.index.get_level_values(0))} "
          f"and {max(df_pr_dynamic_loc_bl.index.get_level_values(0))}. "
          f"({len(np.unique(df_pr_dynamic_loc_bl.index.get_level_values(1)))} units)"
         )

#### Baseline + National dynamic model

dynamic_national = []

    for protest in selected_categories: 
        dynamic_national.append(f"country_decay_6_ts_acled_dummy_pr{protest}")
        dynamic_national.append(f"ln_country_tlag_0_acled_pop_pr{protest}")
        dynamic_national.append(f"ln_country_cumsum_3_acled_pop_pr{protest}")

    print(dynamic_national)

In [None]:
!viewser tables show wdi_cy

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_pr_dynamic_national_bl", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                     .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          ## PROTEST FEATURES - Dynamic Local

          .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          ## PROTEST FEATURES - Dynamic national

          .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          # population
          .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )



          .with_theme("protest_paper")
          .describe("""National dynamic protest model including local dynamic model and extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )

    df_pr_dynamic_nat_bl = qs.publish().fetch()

    print(f"A dataset with {len(df_pr_dynamic_nat_bl.columns)} columns, with "
          f"data between t = {min(df_pr_dynamic_nat_bl.index.get_level_values(0))} "
          f"and {max(df_pr_dynamic_nat_bl.index.get_level_values(0))}. "
          f"({len(np.unique(df_pr_dynamic_nat_bl.index.get_level_values(1)))} units)"
         )

### Full models: extended baseline + protest models + political instiutions + economic models

#### Full protest model + political institutions

##### Full protest model + political institutions model I

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_pr_elecdemo_bl", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                     .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          ## PROTEST FEATURES - Dynamic Local

          .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          ## PROTEST FEATURES - Dynamic national

          .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

           # population
          .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

          # POL. INSTIUTIONS
          .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          .with_theme("protest_paper")
          .describe("""National dynamic protest model including local dynamic model, electoral democracy model and extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )

    df_pr_elecdemo_bl = qs.publish().fetch()

    print(f"A dataset with {len(df_pr_elecdemo_bl.columns)} columns, with "
          f"data between t = {min(df_pr_elecdemo_bl.index.get_level_values(0))} "
          f"and {max(df_pr_elecdemo_bl.index.get_level_values(0))}. "
          f"({len(np.unique(df_pr_elecdemo_bl.index.get_level_values(1)))} units)"
         )

##### Full protest model + political institutions model II

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_pr_civlib_bl", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                     .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          ## PROTEST FEATURES - Dynamic Local

          .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          ## PROTEST FEATURES - Dynamic national

          .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          # population
          .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

          # POL. INSTIUTIONS
          .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          # Civil liberties
          .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          #.with_column(Column("vdem_v2clrgunev_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v2clrgunev")
                             #.transform.missing.fill()
                             #.transform.temporal.tlag(12)
                             #.transform.missing.fill()
                      #)

          .with_column(Column("vdem_v2clrgunev_tlag12", from_table = "tbl_734eevdem_v12_cy", from_column = "vdem_v12_v2clrgunev")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                      )

          .with_theme("protest_paper")
          .describe("""National dynamic protest model including local dynamic model, civil liberties and elecdemo model and extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )

    df_pr_civlib_bl = qs.publish().fetch()

    print(f"A dataset with {len(df_pr_civlib_bl.columns)} columns, with "
          f"data between t = {min(df_pr_civlib_bl.index.get_level_values(0))} "
          f"and {max(df_pr_civlib_bl.index.get_level_values(0))}. "
          f"({len(np.unique(df_pr_civlib_bl.index.get_level_values(1)))} units)"
         )

##### Full protest model + political institutions model III

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_pr_elect_bl", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                     .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          ## PROTEST FEATURES - Dynamic Local

          .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          ## PROTEST FEATURES - Dynamic national

          .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          # population
          .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

          # POL. INSTIUTIONS
          # Electoral democracy.
          .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          # Civil liberties
          .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          #.with_column(Column("vdem_v2clrgunev_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v2clrgunev")
                             #.transform.missing.fill()
                             #.transform.temporal.tlag(12)
                             #.transform.missing.fill()
                      #)

          .with_column(Column("vdem_v2clrgunev_tlag12", from_table = "tbl_734eevdem_v12_cy", from_column = "vdem_v12_v2clrgunev")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                      )

          # Elections.
          .with_column(Column("lastelection", from_table = "reign_cm", from_column = "lastelection")
                             .transform.missing.replace_na()
                            )
          .with_column(Column("anticipation", from_table = "reign_cm", from_column = "anticipation")
                             .transform.missing.replace_na()
                            )

          .with_theme("protest_paper")
          .describe("""National dynamic protest model including local dynamic model, election, civil liberties and electoral democracy model and extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )

    df_pr_elect_bl = qs.publish().fetch()

    print(f"A dataset with {len(df_pr_elect_bl.columns)} columns, with "
          f"data between t = {min(df_pr_elect_bl.index.get_level_values(0))} "
          f"and {max(df_pr_elect_bl.index.get_level_values(0))}. "
          f"({len(np.unique(df_pr_elect_bl.index.get_level_values(1)))} units)"
         )

##### Full protest model + political institutions model IV

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_pr_devi_bl", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                     .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          ## PROTEST FEATURES - Dynamic Local

          .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          ## PROTEST FEATURES - Dynamic national

          .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          # population
          .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

          # POL. INSTIUTIONS
          # Deviation
          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                )

          .with_column(Column("geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                )

          .with_column(Column("splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_ged_os_dummy_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("ln_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                             .transform.ops.ln()
                            )

          .with_column(Column("ln_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                             .transform.ops.ln()
                            )

          .with_column(Column("ln_geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                 .transform.ops.ln()
                )

          .with_column(Column("ln_geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()     
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_1_ged_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()   
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_2_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,2,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()   
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_2_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,2,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()     
                 .transform.ops.ln()
                )

          .with_theme("protest_paper")
          .describe("""National dynamic protest model including local dynamic model, deviation model and extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )

    df_pr_devi_bl = qs.publish().fetch()

    print(f"A dataset with {len(df_pr_devi_bl.columns)} columns, with "
          f"data between t = {min(df_pr_devi_bl.index.get_level_values(0))} "
          f"and {max(df_pr_devi_bl.index.get_level_values(0))}. "
          f"({len(np.unique(df_pr_devi_bl.index.get_level_values(1)))} units)"
         )

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_pr_devi_bl_01", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                     .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          ## PROTEST FEATURES - Dynamic Local

          .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          ## PROTEST FEATURES - Dynamic national

          .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          # population
          .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

          .with_theme("protest_paper")
          .describe("""First haf of query: national dynamic protest model including local dynamic model, deviation model and extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )

    df_pr_devi_bl_01 = qs.publish().fetch()

    print(f"A dataset with {len(df_pr_devi_bl_01.columns)} columns, with "
          f"data between t = {min(df_pr_devi_bl_01.index.get_level_values(0))} "
          f"and {max(df_pr_devi_bl_01.index.get_level_values(0))}. "
          f"({len(np.unique(df_pr_devi_bl_01.index.get_level_values(1)))} units)"
         )

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_pr_devi_bl_02", "priogrid_month")

          # POL. INSTIUTIONS
          # Deviation
          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                )

          .with_column(Column("geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                )

          .with_column(Column("splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_ged_os_dummy_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("ln_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                             .transform.ops.ln()
                            )

          .with_column(Column("ln_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                             .transform.ops.ln()
                            )

          .with_column(Column("ln_geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                 .transform.ops.ln()
                )

          .with_column(Column("ln_geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()     
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_1_ged_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()   
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_2_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,2,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()   
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_2_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,2,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()     
                 .transform.ops.ln()
                )

          .with_theme("protest_paper")
          .describe("""Second half of query: National dynamic protest model including local dynamic model, deviation model and extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )

    df_pr_devi_bl_02 = qs.publish().fetch()

    print(f"A dataset with {len(df_pr_devi_bl_02.columns)} columns, with "
          f"data between t = {min(df_pr_devi_bl_02.index.get_level_values(0))} "
          f"and {max(df_pr_devi_bl_02.index.get_level_values(0))}. "
          f"({len(np.unique(df_pr_devi_bl_02.index.get_level_values(1)))} units)"
         )

#### Full protest model + economic development

##### Full protest model + economic development, country level

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_pr_econ_national_bl", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                     .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          ## PROTEST FEATURES - Dynamic Local

          .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          ## PROTEST FEATURES - Dynamic national

          .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          # population
          .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

          ### ECONOMIC DEVELOPMENT, Country level
          .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )


          .with_theme("protest_paper")
          .describe("""National dynamic protest model including local dynamic model, economic delveopment (natonal) and extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )

    df_pr_econ_national_bl = qs.publish().fetch()

    print(f"A dataset with {len(df_pr_econ_national_bl.columns)} columns, with "
          f"data between t = {min(df_pr_econ_national_bl.index.get_level_values(0))} "
          f"and {max(df_pr_econ_national_bl.index.get_level_values(0))}. "
          f"({len(np.unique(df_pr_econ_national_bl.index.get_level_values(1)))} units)"
         )

##### Full protest model + economic development, full (country and subnational level)

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_pr_econ_full_bl", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                     .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          ## PROTEST FEATURES - Dynamic Local

          .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          ## PROTEST FEATURES - Dynamic national

          .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          # population
          .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

          ### ECONOMIC DEVELOPMENT, Country level
          .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )

          ### ECONOMIC DEVELOPMENT, Sub-national level
          .with_column(Column("pgd_gcp_mer", from_table = "priogrid_year", from_column = "gcp_mer")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_imr_mean", from_table = "priogrid_year", from_column = "imr_mean")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_urban_ih", from_table = "priogrid_year", from_column = "urban_ih")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_agri_ih", from_table = "priogrid_year", from_column = "agri_ih")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )


          .with_theme("protest_paper")
          .describe("""National dynamic protest model including local dynamic model, full economic delveopment (national and subnational) and extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )

    df_pr_econ_full_bl = qs.publish().fetch()

    print(f"A dataset with {len(df_pr_econ_full_bl.columns)} columns, with "
          f"data between t = {min(df_pr_econ_full_bl.index.get_level_values(0))} "
          f"and {max(df_pr_econ_full_bl.index.get_level_values(0))}. "
          f"({len(np.unique(df_pr_econ_full_bl.index.get_level_values(1)))} units)"
         )

#### Full protest model + economic development + political instiutions

##### Full protest model + political institutions III + economic development (country level)

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_pr_elect_econ_national_bl", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                     .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          ## PROTEST FEATURES - Dynamic Local

          .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          ## PROTEST FEATURES - Dynamic national

          .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          # population
          .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

          .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

          ### ECONOMIC DEVELOPMENT, Country level
          .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )

          # POL. INSTIUTIONS
          # Electoral democracy.
          .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          # Civil liberties
          .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          #.with_column(Column("vdem_v2clrgunev_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v2clrgunev")
                             #.transform.missing.fill()
                             #.transform.temporal.tlag(12)
                             #.transform.missing.fill()
                      #)

          .with_column(Column("vdem_v2clrgunev_tlag12", from_table = "tbl_734eevdem_v12_cy", from_column = "vdem_v12_v2clrgunev")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                      )

          # Elections.
          .with_column(Column("lastelection", from_table = "reign_cm", from_column = "lastelection")
                             .transform.missing.replace_na()
                            )
          .with_column(Column("anticipation", from_table = "reign_cm", from_column = "anticipation")
                             .transform.missing.replace_na()
                            )


          .with_theme("protest_paper")
          .describe("""National dynamic protest model including local dynamic model, economic delveopment (national), political institutions III (full) and extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )

    df_pr_elect_econ_national_bl = qs.publish().fetch()

    print(f"A dataset with {len(df_pr_elect_econ_national_bl.columns)} columns, with "
          f"data between t = {min(df_pr_elect_econ_national_bl.index.get_level_values(0))} "
          f"and {max(df_pr_elect_econ_national_bl.index.get_level_values(0))}. "
          f"({len(np.unique(df_pr_elect_econ_national_bl.index.get_level_values(1)))} units)"
         )

##### Full protest model + political institutions III + full economic development (country and subnational level)

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_pr_elect_econ_full_bl", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                     .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          ## PROTEST FEATURES - Dynamic Local

          .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          ## PROTEST FEATURES - Dynamic national

          .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          # population
          .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

          ### ECONOMIC DEVELOPMENT, Country level
          .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )

          ### ECONOMIC DEVELOPMENT, Sub-national level
          .with_column(Column("pgd_gcp_mer", from_table = "priogrid_year", from_column = "gcp_mer")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_imr_mean", from_table = "priogrid_year", from_column = "imr_mean")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_urban_ih", from_table = "priogrid_year", from_column = "urban_ih")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_agri_ih", from_table = "priogrid_year", from_column = "agri_ih")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

          # POL. INSTIUTIONS
          # Electoral democracy.
          .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          # Civil liberties
          .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          #.with_column(Column("vdem_v2clrgunev_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v2clrgunev")
                             #.transform.missing.fill()
                             #.transform.temporal.tlag(12)
                             #.transform.missing.fill()
                      #)

          .with_column(Column("vdem_v2clrgunev_tlag12", from_table = "tbl_734eevdem_v12_cy", from_column = "vdem_v12_v2clrgunev")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                      )

          # Elections.
          .with_column(Column("lastelection", from_table = "reign_cm", from_column = "lastelection")
                             .transform.missing.replace_na()
                            )
          .with_column(Column("anticipation", from_table = "reign_cm", from_column = "anticipation")
                             .transform.missing.replace_na()
                            )


          .with_theme("protest_paper")
          .describe("""National dynamic protest model including local dynamic model, full economic delveopment (national, sub-national), political institutions (full) and extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )

    df_pr_elect_econ_full_bl = qs.publish().fetch()

    print(f"A dataset with {len(df_pr_elect_econ_full_bl.columns)} columns, with "
          f"data between t = {min(df_pr_elect_econ_full_bl.index.get_level_values(0))} "
          f"and {max(df_pr_elect_econ_full_bl.index.get_level_values(0))}. "
          f"({len(np.unique(df_pr_elect_econ_full_bl.index.get_level_values(1)))} units)"
         )

##### Full protest model + political institutions IV + economic development (country level)

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_pr_devi_econ_national_bl", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                     .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          ## PROTEST FEATURES - Dynamic Local

          .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          ## PROTEST FEATURES - Dynamic national

          .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          # population
          .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

          .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

          ### ECONOMIC DEVELOPMENT, Country level
          .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )

          # POL. INSTIUTIONS
          # Deviation
          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                )

          .with_column(Column("geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                )

          .with_column(Column("splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_ged_os_dummy_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("ln_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                             .transform.ops.ln()
                            )

          .with_column(Column("ln_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                             .transform.ops.ln()
                            )

          .with_column(Column("ln_geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                 .transform.ops.ln()
                )

          .with_column(Column("ln_geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()     
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_1_ged_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()   
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_2_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,2,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()   
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_2_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,2,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()     
                 .transform.ops.ln()
                )




          .with_theme("protest_paper")
          .describe("""National dynamic protest model including local dynamic model, economic delveopment (national), political institutions IV (deviation) and extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )

    df_pr_devi_econ_national_bl = qs.publish().fetch()

    print(f"A dataset with {len(df_pr_devi_econ_national_bl.columns)} columns, with "
          f"data between t = {min(df_pr_devi_econ_national_bl.index.get_level_values(0))} "
          f"and {max(df_pr_devi_econ_national_bl.index.get_level_values(0))}. "
          f"({len(np.unique(df_pr_devi_econ_national_bl.index.get_level_values(1)))} units)"
         )

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_pr_devi_econ_national_bl_01", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                     .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          ## PROTEST FEATURES - Dynamic Local

          .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          ## PROTEST FEATURES - Dynamic national

          .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          # population
          .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

          .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

          ### ECONOMIC DEVELOPMENT, Country level
          .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )




          .with_theme("protest_paper")
          .describe("""First half of query: National dynamic protest model including local dynamic model, economic delveopment (national), political institutions IV (deviation) and extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )

    df_pr_devi_econ_national_bl_01 = qs.publish().fetch()

    print(f"A dataset with {len(df_pr_devi_econ_national_bl_01.columns)} columns, with "
          f"data between t = {min(df_pr_devi_econ_national_bl_01.index.get_level_values(0))} "
          f"and {max(df_pr_devi_econ_national_bl_01.index.get_level_values(0))}. "
          f"({len(np.unique(df_pr_devi_econ_national_bl_01.index.get_level_values(1)))} units)"
         )

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_pr_devi_econ_national_bl_02", "priogrid_month")


          # POL. INSTIUTIONS
          # Deviation
          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                )

          .with_column(Column("geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                )

          .with_column(Column("splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_ged_os_dummy_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("ln_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                             .transform.ops.ln()
                            )

          .with_column(Column("ln_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                             .transform.ops.ln()
                            )

          .with_column(Column("ln_geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                 .transform.ops.ln()
                )

          .with_column(Column("ln_geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()     
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_1_ged_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()   
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_2_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,2,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()   
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_2_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,2,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()     
                 .transform.ops.ln()
                )



          .with_theme("protest_paper")
          .describe("""Second half of query: National dynamic protest model including local dynamic model, economic delveopment (national), political institutions IV (deviation) and extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )

    df_pr_devi_econ_national_bl_02 = qs.publish().fetch()

    print(f"A dataset with {len(df_pr_devi_econ_national_bl_02.columns)} columns, with "
          f"data between t = {min(df_pr_devi_econ_national_bl_02.index.get_level_values(0))} "
          f"and {max(df_pr_devi_econ_national_bl_02.index.get_level_values(0))}. "
          f"({len(np.unique(df_pr_devi_econ_national_bl_02.index.get_level_values(1)))} units)"
         )

##### Full protest model + political institutions III +full economic development (country and sub-national level)

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_pr_devi_econ_full_bl", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                     .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          ## PROTEST FEATURES - Dynamic Local

          .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          ## PROTEST FEATURES - Dynamic national

          .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          # population
          .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

          ### ECONOMIC DEVELOPMENT, Country level
          .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )

          ### ECONOMIC DEVELOPMENT, Sub-national level
          .with_column(Column("pgd_gcp_mer", from_table = "priogrid_year", from_column = "gcp_mer")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_imr_mean", from_table = "priogrid_year", from_column = "imr_mean")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_urban_ih", from_table = "priogrid_year", from_column = "urban_ih")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_agri_ih", from_table = "priogrid_year", from_column = "agri_ih")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

          # POL. INSTIUTIONS
          # Deviation
          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                )

          .with_column(Column("geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                )

          .with_column(Column("splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_ged_os_dummy_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("ln_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                             .transform.ops.ln()
                            )

          .with_column(Column("ln_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                             .transform.ops.ln()
                            )

          .with_column(Column("ln_geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                 .transform.ops.ln()
                )

          .with_column(Column("ln_geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()     
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_1_ged_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()   
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_2_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,2,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()   
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_2_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,2,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()     
                 .transform.ops.ln()
                )


          .with_theme("protest_paper")
          .describe("""National dynamic protest model including local dynamic model, full economic delveopment (national, sub-national), political institutions IV (deviation) and extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )

    df_pr_devi_econ_full_bl = qs.publish().fetch()

    print(f"A dataset with {len(df_pr_devi_econ_full_bl.columns)} columns, with "
          f"data between t = {min(df_pr_devi_econ_full_bl.index.get_level_values(0))} "
          f"and {max(df_pr_devi_econ_full_bl.index.get_level_values(0))}. "
          f"({len(np.unique(df_pr_devi_econ_full_bl.index.get_level_values(1)))} units)"
         )

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_pr_devi_econ_full_bl_01", "priogrid_month")

        # target variable
        .with_column(Column("ged_sb_dummy_dep", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of target variable
        .with_column(Column("ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
            .transform.missing.replace_na()
            .transform.bool.gte(1)
            )

        # timelag 0 of fatalities
        .with_column(Column("ln_ged_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.ops.ln()
             .transform.missing.fill()
            )

        # Decay function
        ## 12 months 
        .with_column(Column("decay_ts_12_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(12)
             .transform.missing.fill()
            )

         ## 24 months 
        .with_column(Column("decay_ts_24_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
             .transform.missing.replace_na()
             .transform.bool.gte(1)
             .transform.temporal.time_since()
             .transform.temporal.decay(24)
             .transform.missing.fill()
            )

        # Spatial lag function
        .with_column(Column("splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                     .transform.missing.fill()
                )

          # Decay of spatial lag
        .with_column(Column("decay_ts_12_splag_1_1_ged_sb_dummy", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.bool.gte(1)
                 .transform.temporal.time_since()
                 .transform.temporal.decay(12)
                 .transform.missing.fill()
                )

          # Moving average over 24 months
          .with_column(Column("mov_avg_12_ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                             .transform.ops.ln()
                             .transform.missing.replace_na()
                             .transform.temporal.moving_average(24)
                             .transform.missing.fill()
                            )

          ## PROTEST FEATURES - Dynamic Local

          .with_column(Column("decay_ts_6_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prpe_dummy", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prex_dummy", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prin_dummy", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_splag_1_2_acled_prri_dummy", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.spatial.lag(1,2,0,0)
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count", from_table = "acled2_pgm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count", from_table = "acled2_pgm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )

          ## PROTEST FEATURES - Dynamic national

          .with_column(Column("decay_ts_6_acled_prex_dummy_cm", from_table = "acled2_cm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          .with_column(Column("decay_ts_6_acled_prpe_dummy_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prin_dummy_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )
          .with_column(Column("decay_ts_6_acled_prri_dummy_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                             .transform.bool.gte(1)
                             .transform.temporal.time_since()
                             .transform.temporal.decay(6)
                             .transform.missing.fill()
                            )

          # protest with excessive violence against protester (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prex_count_cm", from_table = "acled2_cm", from_column = "acled_c3_count") 
                             .transform.missing.replace_na()
                            )
          # peaceful protest (interaction codes: 60, 66, or 67.)
          .with_column(Column("acled_prpe_count_cm", from_table = "acled2_cm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                            )
          # protest with intervention (interaction codes: 16, 26, 36, 46, 56, 68.)
          .with_column(Column("acled_prin_count_cm", from_table = "acled2_cm", from_column = "acled_c2_count")
                             .transform.missing.replace_na()
                            )
          # protest with riots (interaction codes: inter 1 or 2 has 5)
          .with_column(Column("acled_prri_count_cm", from_table = "acled2_cm", from_column = "acled_c5_count")
                             .transform.missing.replace_na()
                            )




          .with_theme("protest_paper")
          .describe("""National dynamic protest model including local dynamic model, full economic delveopment (national, sub-national), political institutions IV (deviation) and extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )

    df_pr_devi_econ_full_bl_01 = qs.publish().fetch()

    print(f"A dataset with {len(df_pr_devi_econ_full_bl_01.columns)} columns, with "
          f"data between t = {min(df_pr_devi_econ_full_bl_01.index.get_level_values(0))} "
          f"and {max(df_pr_devi_econ_full_bl_01.index.get_level_values(0))}. "
          f"({len(np.unique(df_pr_devi_econ_full_bl_01.index.get_level_values(1)))} units)"
         )

In [None]:
if update_queries: 
    qs = (Queryset("protest_paper_pr_devi_econ_full_bl_02", "priogrid_month")


          # population
          .with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

          ### ECONOMIC DEVELOPMENT, Country level
          .with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_ny_gdp_pcap_kd_zg", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )
          .with_column(Column("wdi_sl_uem_totl_zs", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd_zg")
                             .transform.missing.fill()
                            )

          ### ECONOMIC DEVELOPMENT, Sub-national level
          .with_column(Column("pgd_gcp_mer", from_table = "priogrid_year", from_column = "gcp_mer")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_imr_mean", from_table = "priogrid_year", from_column = "imr_mean")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_urban_ih", from_table = "priogrid_year", from_column = "urban_ih")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_agri_ih", from_table = "priogrid_year", from_column = "agri_ih")
                             .transform.missing.fill()
                             .transform.missing.replace_na()
                            )
          .with_column(Column("pgd_pop_gpw_sum", from_table = "priogrid_year", from_column = "pop_gpw_sum")
                             .transform.missing.fill()
                             .transform.missing.extrapolate()
                             .transform.missing.replace_na()
                            )

          # POL. INSTIUTIONS
          # Deviation
          .with_column(Column("acled_prpe_count", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prex_count", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("vdem_v2x_polyarchy_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_polyarchy")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          .with_column(Column("vdem_v2x_civlib_tlag12", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib")
                             .transform.missing.fill()
                             .transform.temporal.tlag(12)
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                            )

          .with_column(Column("geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                )

          .with_column(Column("geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                )

          .with_column(Column("splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_ged_os_dummy_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("splag_1_1_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()      
                )

          .with_column(Column("ln_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                             .transform.ops.ln()
                            )

          .with_column(Column("ln_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                             .transform.missing.replace_na()
                             .transform.temporal.tlag(1)
                             .transform.missing.replace_na()
                             .transform.missing.fill()
                             .transform.ops.ln()
                            )

          .with_column(Column("ln_geb_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                 .transform.ops.ln()
                )

          .with_column(Column("ln_geb_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_1_ged_sb_best_tlag1", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()     
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_1_ged_os_best_tlag1", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,1,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()   
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_2_acled_prpe_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c1_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,2,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()   
                 .transform.ops.ln()
                )

          .with_column(Column("ln_splag_1_2_acled_prex_count_tlag1", from_table = "acled2_pgm", from_column = "acled_c3_count")
                 .transform.missing.replace_na()
                 .transform.spatial.lag(1,2,0,0)
                 .transform.missing.replace_na()
                 .transform.temporal.tlag(1)
                 .transform.missing.replace_na()
                 .transform.missing.fill()     
                 .transform.ops.ln()
                )


          .with_theme("protest_paper")
          .describe("""National dynamic protest model including local dynamic model, full economic delveopment (national, sub-national), political institutions IV (deviation) and extended baseline variables, pgm level

                Predicting armed conflict (dummy) using protest data, extended baseline

                """)
        )

    df_pr_devi_econ_full_bl_02 = qs.publish().fetch()

    print(f"A dataset with {len(df_pr_devi_econ_full_bl_02.columns)} columns, with "
          f"data between t = {min(df_pr_devi_econ_full_bl_02.index.get_level_values(0))} "
          f"and {max(df_pr_devi_econ_full_bl_02.index.get_level_values(0))}. "
          f"({len(np.unique(df_pr_devi_econ_full_bl_02.index.get_level_values(1)))} units)"
         )

## Fetch querysets<a class="anchor" id="loadq"></a>

In [None]:
fetchid_01 = 'protest_paper_incidence_01'
fetchid_02 = 'protest_paper_incidence_02'
fetchid_03_01 = 'protest_paper_incidence_03_01'
fetchid_03_02 = 'protest_paper_incidence_03_02'

In [None]:
# Fetch datasets. Fetching breaks down if not split into multiple parts.
datasets_01 = organize.FetchData(fetchid_01)

In [None]:
# Fetch datasets. Fetching breaks down if not split into multiple parts.
datasets_02 = organize.FetchData(fetchid_02)

In [None]:
# Fetch datasets. Fetching breaks down if not split into multiple parts.
datasets_03_01 = organize.FetchData(fetchid_03_01)

In [None]:
# Fetch datasets. Fetching breaks down if not split into multiple parts.
datasets_03_02 = organize.FetchData(fetchid_03_02)

In [None]:
# Merge 03_01 and 03_02
datasets_03 = []
datasets_03.append(organize.MergeQueries(datasets_03_01[0]['df'],datasets_03_02[0]['df'],datasets_03_01[0]['Name']))

In [None]:
datasets_03.append(organize.MergeQueries(datasets_03_01[2]['df'],datasets_03_02[2]['df'],datasets_03_01[2]['Name']))

In [None]:
# Append list of dictonaries.
datasets = datasets_01 + datasets_02 + datasets_03

### Limiting geographical and temporal scope

In [None]:
df_pg = organize.fetch_africa_ids()

for df in datasets:
    organize.crop_africa(df,df_pg)

In [None]:
for df in datasets:
    print(df['Name'])
    df['df'] = organize.crop_months(df['df'],200,500)

In [None]:
# Reindex dataset.
for df in datasets:
    print(df['Name'])
    df['df'] = organize.reindex_df(df['df'])

In [None]:
# Check for Nas.
for df in datasets:
    print(df['df'].isna().any().any())

### Applying transformations <a class="anchor" id="applytransforms"></a>

#### Full economic development, country level and subnational level

Apply transformation to all the models that include the full economic development features

In [None]:
input_var = 'pgd_gcp_mer'
transf_var = 'pgd_gcp_mer_pc'

In [None]:
# Apply transformation only to first df.
applied_to_df = []
apply_to_dfs = []

first = True
for df in datasets:
    if 'econ_full' in df['Name']:
        if first:
            print(df['Name'])
            applied_to_df.append(df)

            # Normalise.
            df['df'][f'{transf_var}'] = transforms.divide_by_pop(df['df'],f'{input_var}',10000)

            # Nat log.
            df['df'][f'ln_{transf_var}'] = ln(df['df'][f'{transf_var}'])

            if df['df'][f'ln_{transf_var}'].equals(df['df'][f'{transf_var}']) == True:
                print('Warning, check log transformation')

            # Drop other variables from df. Keep population variable for the moment as it will be needed for additional transformations. 
            print('n cols before:', len(df['df'].columns))
            df['df'] = df['df'].drop(labels=[input_var,transf_var], axis=1)
            print('n cols after:', len(df['df'].columns))

            first = False
        else:
            print(df['Name'])
            apply_to_dfs.append(df)

In [None]:
# Concat to other dfs.
for df in apply_to_dfs:
    print(df['Name'])
    df['df'] = pd.concat([df['df'],applied_to_df[0]['df']['ln_pgd_gcp_mer_pc']],axis=1)    

#### Protest models

In [None]:
pr_naive_cat = ['']
pr_categories = ['ri','in','ex','pe']

##### Applying transforms to naive and dynamic local models

In [None]:
applied_to_df = []
apply_to_dfs = []

first = True

for df in datasets:
    if 'pr' in df['Name']:

        if 'naive_bl' in df['Name']: 
            #print(df['Name'])

            for pr in pr_naive_cat:

                # Normalise.
                print('normalise variable')
                print(df['Name'])
                df['df'][f'acled_pr{pr}_pop'] = transforms.divide_by_pop(df['df'],f'acled_pr{pr}_count',10000)

                # Nat log.
                df['df'][f'ln_acled_pr{pr}_pop_tlag0'] = ln(df['df'][f'acled_pr{pr}_pop'])

                if df['df'][f'ln_acled_pr{pr}_pop_tlag0'].equals(df['df'][f'acled_pr{pr}_pop']) == True:
                    print('Warning, check log transformation')
        else:
            if first:
                applied_to_df.append(df)

                for pr in pr_categories:

                    # Normalise.
                    print('normalise variable')
                    print(df['Name'])
                    df['df'][f'acled_pr{pr}_pop'] = transforms.divide_by_pop(df['df'],f'acled_pr{pr}_count',10000)

                    # Nat log.
                    df['df'][f'ln_acled_pr{pr}_pop_tlag0'] = ln(df['df'][f'acled_pr{pr}_pop'])

                    if df['df'][f'ln_acled_pr{pr}_pop_tlag0'].equals(df['df'][f'acled_pr{pr}_pop']) == True:
                        print('Warning, check log transformation')

                first = False

            else:
                print(df['Name'])
                apply_to_dfs.append(df)

In [None]:
# Cumulative sum over three months (normalised) + nat. log.
applied_to_df = []
apply_to_dfs = []

first = True

for df in datasets:
    if 'pr' in df['Name']:
        print(df['Name'])

        if 'naive_bl' in df['Name']: 

            for pr in pr_naive_cat:

                # Cumulative sum
                print('compute cumulative sum')
                df['df'][f'cumsum_3_acled_pr{pr}_count'] = transforms.moving_sum(s=df['df'][f'acled_pr{pr}_count'], time=3)
                print('Negative values after applying cumulative sum:', df['df'][f'cumsum_3_acled_pr{pr}_count'].any()<0 == True)

                # Normalise.
                print('normalise variable')
                df['df'][f'cumsum_3_acled_pr{pr}_pop'] = transforms.divide_by_pop(df['df'],f'cumsum_3_acled_pr{pr}_count',10000)

                # Nat log.
                df['df'][f'ln_cumsum_3_acled_pr{pr}_pop'] = ln(df['df'][f'cumsum_3_acled_pr{pr}_pop'])
                if df['df'][f'ln_cumsum_3_acled_pr{pr}_pop'].equals(df['df'][f'cumsum_3_acled_pr{pr}_pop']) == True:
                    print('Warning, check log transformation') 
                else:
                    print('Ok')
        else:
            if first:
                applied_to_df.append(df)

                for pr in pr_categories:

                    # Cumulative sum
                    print('compute cumulative sum')
                    df['df'][f'cumsum_3_acled_pr{pr}_count'] = transforms.moving_sum(s=df['df'][f'acled_pr{pr}_count'], time=3)
                    print('Negative values after applying cumulative sum:', df['df'][f'cumsum_3_acled_pr{pr}_count'].any()<0 == True)

                    # Normalise.
                    print('normalise variable')
                    df['df'][f'cumsum_3_acled_pr{pr}_pop'] = transforms.divide_by_pop(df['df'],f'cumsum_3_acled_pr{pr}_count',10000)

                    # Nat log.
                    df['df'][f'ln_cumsum_3_acled_pr{pr}_pop'] = ln(df['df'][f'cumsum_3_acled_pr{pr}_pop'])
                    if df['df'][f'ln_cumsum_3_acled_pr{pr}_pop'].equals(df['df'][f'cumsum_3_acled_pr{pr}_pop']) == True:
                        print('Warning, check log transformation')
                    else:
                        print('Ok')

                first = False

            else:
                print(df['Name'])
                apply_to_dfs.append(df)
print('DONE')

In [None]:
# Splag, normalised by population
applied_to_df = []
apply_to_dfs = []

first = True

for df in datasets:
    if 'pr' in df['Name']:
        print(df['Name'])

        if 'naive_bl' in df['Name']: 

            for pr in pr_naive_cat:

                # Splag.
                print('take splag of normalised variable')
                df['df'][f'splag_1_2_acled_pr{pr}_pop'] = spl.get_splag4d(pd.DataFrame(df['df'][f'acled_pr{pr}_pop']),True,1,2,0,0).fillna(0)

                # Nat log.
                df['df'][f'ln_splag_1_2_acled_pr{pr}_pop_tlag0'] = ln(df['df'][f'splag_1_2_acled_pr{pr}_pop'])
                if df['df'][f'ln_splag_1_2_acled_pr{pr}_pop_tlag0'].equals(df['df'][f'splag_1_2_acled_pr{pr}_pop']) == True:
                    print('Warning, check log transformation') 
                else:
                    print('Ok')

        else:
            if first:
                applied_to_df.append(df)

                for pr in pr_categories:

                    # Splag.
                    print('take splag of normalised variable')
                    df['df'][f'splag_1_2_acled_pr{pr}_pop'] = spl.get_splag4d(pd.DataFrame(df['df'][f'acled_pr{pr}_pop']),True,1,2,0,0).fillna(0)

                    # Nat log.
                    df['df'][f'ln_splag_1_2_acled_pr{pr}_pop_tlag0'] = ln(df['df'][f'splag_1_2_acled_pr{pr}_pop'])
                    if df['df'][f'ln_splag_1_2_acled_pr{pr}_pop_tlag0'].equals(df['df'][f'splag_1_2_acled_pr{pr}_pop']) == True:
                        print('Warning, check log transformation') 
                    else:
                        print('Ok')

                first = False

            else:
                print(df['Name'])
                apply_to_dfs.append(df)
print('DONE')

In [None]:
# Cumulative sum of splag, normalised by population
applied_to_df = []
apply_to_dfs = []

first = True

for df in datasets:
    if 'pr' in df['Name']:
        print(df['Name'])

        if 'naive_bl' in df['Name']: 

            for pr in pr_naive_cat:

                # Splag.
                print('take splag of normalised variable')
                df['df'][f'splag_1_2_cumsum_3_acled_pr{pr}_pop'] = spl.get_splag4d(pd.DataFrame(df['df'][f'cumsum_3_acled_pr{pr}_pop']),True,1,2,0,0).fillna(0)

                # Nat log.
                df['df'][f'ln_splag_1_2_cumsum_3_acled_pr{pr}_pop'] = ln(df['df'][f'splag_1_2_cumsum_3_acled_pr{pr}_pop'])
                if df['df'][f'ln_splag_1_2_cumsum_3_acled_pr{pr}_pop'].equals(df['df'][f'splag_1_2_cumsum_3_acled_pr{pr}_pop']) == True:
                    print('Warning, check log transformation') 
                else:
                    print('Ok')

        else:
            if first:
                applied_to_df.append(df)

                for pr in pr_categories:

                    # Splag.
                    print('take splag of normalised variable')
                    df['df'][f'splag_1_2_cumsum_3_acled_pr{pr}_pop'] = spl.get_splag4d(pd.DataFrame(df['df'][f'cumsum_3_acled_pr{pr}_pop']),True,1,2,0,0).fillna(0)

                    # Nat log.
                    df['df'][f'ln_splag_1_2_cumsum_3_acled_pr{pr}_pop'] = ln(df['df'][f'splag_1_2_cumsum_3_acled_pr{pr}_pop'])
                    if df['df'][f'ln_splag_1_2_cumsum_3_acled_pr{pr}_pop'].equals(df['df'][f'splag_1_2_cumsum_3_acled_pr{pr}_pop']) == True:
                        print('Warning, check log transformation') 
                    else:
                        print('Ok')

                first = False

            else:
                print(df['Name'])
                apply_to_dfs.append(df)
print('DONE')

In [None]:
# Fetch gdf
gdf = organize.fetch_gdf()

In [None]:
# Make sure indices are equal
idx1 = gdf.index 
idx2 = datasets[1]['df'].index
idx1.equals(idx2)

In [None]:
# Concat to dataframes
for df in datasets:
    print(df['Name'])
    df['df'] = pd.concat([df['df'],gdf],axis=1)    

In [None]:
# Minimum distance to closest protset event over three months.
applied_to_df = []
apply_to_dfs = []

first = True

for df in datasets:
    if 'pr' in df['Name']:
        print(df['Name'])

        if 'naive_bl' in df['Name']: 

            for pr in pr_naive_cat:

                # Get dummy variable.
                print('get dummy')
                df['df'][f'acled_pr{pr}_dummy'] = greater_or_equal(df['df'][f'acled_pr{pr}_count'],1)

                # Compute distance.
                print('compute distance')
                df['df'][f'dist_acled_pr{pr}_dummy'] = transforms.distance_to_event(df=df['df'],col=f'acled_pr{pr}_dummy',k=1,fill_value=99)

                # Get minimum distance over three months.
                print('get minimum distance')
                df['df'][f'min_dist_acled_pr{pr}_dummy'] = transforms.moving_min(s=df['df'][f'dist_acled_pr{pr}_dummy'],t=3)

                # Nat log.
                df['df'][f'ln_min_dist_3_acled_pr{pr}'] = ln(df['df'][f'min_dist_acled_pr{pr}_dummy'])

                if df['df'][f'ln_min_dist_3_acled_pr{pr}'].equals(df['df'][f'min_dist_acled_pr{pr}_dummy']) == True:
                    print('Warning, check log transformation')
                else:
                    print('Ok')

        else:
            if first:
                applied_to_df.append(df)

                for pr in pr_categories:

                    # Get dummy variable.
                    print('get dummy')
                    df['df'][f'acled_pr{pr}_dummy'] = greater_or_equal(df['df'][f'acled_pr{pr}_count'],1)

                    # Compute distance.
                    print('compute distance')
                    df['df'][f'dist_acled_pr{pr}_dummy'] = transforms.distance_to_event(df=df['df'],col=f'acled_pr{pr}_dummy',k=1,fill_value=99)

                    # Get minimum distance over three months.
                    print('get minimum distance')
                    df['df'][f'min_dist_acled_pr{pr}_dummy'] = transforms.moving_min(s=df['df'][f'dist_acled_pr{pr}_dummy'],t=3)

                    # Nat log.
                    df['df'][f'ln_min_dist_3_acled_pr{pr}'] = ln(df['df'][f'min_dist_acled_pr{pr}_dummy'])

                    if df['df'][f'ln_min_dist_3_acled_pr{pr}'].equals(df['df'][f'min_dist_acled_pr{pr}_dummy']) == True:
                        print('Warning, check log transformation')
                    else:
                        print('Ok')

                first = False

            else:
                print(df['Name'])
                apply_to_dfs.append(df)
print('DONE')

##### Adding transforms to remaining protest models

In [None]:
feats_concat_pr = []
for pr in pr_categories:
    feats_concat_pr.append(f'decay_ts_6_acled_pr{pr}_dummy')
    feats_concat_pr.append(f'ln_acled_pr{pr}_pop_tlag0')
    feats_concat_pr.append(f'ln_cumsum_3_acled_pr{pr}_pop')
    feats_concat_pr.append(f'decay_ts_6_splag_1_2_acled_pr{pr}_dummy')
    feats_concat_pr.append(f'ln_splag_1_2_acled_pr{pr}_pop_tlag0')
    feats_concat_pr.append(f'ln_splag_1_2_cumsum_3_acled_pr{pr}_pop')
    feats_concat_pr.append(f'ln_min_dist_3_acled_pr{pr}')
feats_concat_pr

In [None]:
# Concat with other dfs.
for df in apply_to_dfs:
    print(df['Name'])
    df['df'] = pd.concat([df['df'],applied_to_df[0]['df'][feats_concat_pr]],axis=1)

#### All models besides local dynamic and naive protest model

##### Applying transforms to national models

In [None]:
applied_to_df = []
apply_to_dfs = []

first = True

for df in datasets:
    if 'pr' in df['Name']:
        if df['Name'] not in ['pr_naive_bl','pr_dynamic_loc_bl']:
            if first:
                applied_to_df.append(df)
                print(df['Name'])

                for pr in pr_categories:
                    print(pr)

                    # Normalise.
                    print('normalise variable')
                    df['df'][f'acled_pr{pr}_pop_cm'] = transforms.divide_by_pop_cm(df['df'],f'acled_pr{pr}_count',10000)

                    # Nat log.
                    df['df'][f'ln_acled_pr{pr}_pop_cm_tlag0'] = ln(df['df'][f'acled_pr{pr}_pop_cm'])

                    if df['df'][f'ln_acled_pr{pr}_pop_cm_tlag0'].equals(df['df'][f'acled_pr{pr}_pop_cm']) == True:
                        print('Warning, check log transformation')
                    else:
                        print('OK')

                first = False

            else:
                apply_to_dfs.append(df)

In [None]:
applied_to_df = []
    apply_to_dfs = []

    first = True

    for df in datasets:
        if 'pr' in df['Name']:
            if df['Name'] not in ['pr_naive_bl','pr_dynamic_loc_bl']:
                if first:
                    applied_to_df.append(df)
                    print(df['Name'])

                    for pr in pr_categories:
                        print(pr)

                        # Cumulative sum
                        print('compute cumulative sum')
                        df['df'][f'cumsum_3_acled_pr{pr}_count_cm'] = transforms.moving_sum(s=df['df'][f'acled_pr{pr}_count_cm'], time=3)
                        print('Negative values after applying cumulative sum:', df['df'][f'cumsum_3_acled_pr{pr}_count_cm'].any()<0 == True)

                        # Normalise.
                        print('normalise variable')
                        df['df'][f'cumsum_3_acled_pr{pr}_pop_cm'] = transforms.divide_by_pop_cm(df['df'],f'cumsum_3_acled_pr{pr}_count_cm',10000)

                        # Nat log.
                        df['df'][f'ln_cumsum_3_acled_pr{pr}_pop_cm'] = ln(df['df'][f'cumsum_3_acled_pr{pr}_pop_cm'])
                        if df['df'][f'ln_cumsum_3_acled_pr{pr}_pop_cm'].equals(df['df'][f'cumsum_3_acled_pr{pr}_pop_cm']) == True:
                            print('Warning, check log transformation') 
                        else:
                                print('OK')

                    first = False

                else:
                    apply_to_dfs.append(df)

##### Adding transforms to remaining protest models

In [None]:
feats_concat_pr = []
for pr in pr_categories:
    feats_concat_pr.append(f'decay_ts_6_acled_pr{pr}_dummy_cm')
    feats_concat_pr.append(f'ln_acled_pr{pr}_pop_cm_tlag0')
    feats_concat_pr.append(f'ln_cumsum_3_acled_pr{pr}_pop_cm')
feats_concat_pr

In [None]:
# Concat with other dfs.
for df in apply_to_dfs:
    print(df['Name'])
    df['df'] = pd.concat([df['df'],applied_to_df[0]['df'][feats_concat_pr]],axis=1)

#### Political instiutions models (IV)

As discussed in the theoretical section,
protests are more common and widely accepted as political behavior in democracies than
in non-democracies. To capture this, we include the residuals from a fitted negative
binomial regression model with the count of protest with excessive violence events as the
dependent variables as a proxy for an unexpected amount of protests.

Steps:
- estimate a model with peacful protests as DV 
- save residuals from model, i.e. the remaining variation in peacful protests that can not be explaiend by the variables included in our regression
- estimate a second model with protests with excessive violence as DV
- save fitted values from model

In [None]:
applied_to_df = []
apply_to_dfs = []

first = True

for df in datasets:
    if 'devi' in df['Name']:
        if first:
            applied_to_df.append(df)
            first = False

        else:
            apply_to_dfs.append(df)

print(applied_to_df[0]['Name'])

for dfname in apply_to_dfs:
    print(dfname['Name'])

df_inst_devi_bl_raw = applied_to_df[0]['df']
df_inst_devi_bl_raw

#### Estimating peaceful protests - extracting residuals

In [None]:
# Set up regression expressions
expr_peace = """acled_prpe_count ~ 
vdem_v2x_polyarchy_tlag12 + 
vdem_v2x_civlib_tlag12 + 
ln_acled_prpe_count_tlag1 + 
ln_acled_prex_count_tlag1 +
ln_geb_sb_best_tlag1 +
ln_geb_os_best_tlag1 +
ln_splag_1_1_ged_sb_best_tlag1 +
ln_splag_1_1_ged_os_best_tlag1 +
ln_splag_1_2_acled_prpe_count_tlag1 +
ln_splag_1_2_acled_prex_count_tlag1 +
pgd_pop_gpw_sum 
"""

In [None]:
# Fit poission.
y_deviant, X_deviant = dmatrices(expr_peace, df_inst_devi_bl_raw.loc[205:444], return_type='dataframe')
poi_results = sm.GLM(y_deviant, X_deviant, family=sm.families.Poisson()).fit()
print(poi_results.summary())

In [None]:
def ct_response(row):
"Calculate response observation for Cameron-Trivedi dispersion test"
y = row['acled_prpe_count']
m = row['bev_mu']
return ((y - m)**2 - y) / m

ct_data = df_inst_devi_bl_raw.loc[205:444].copy()
ct_data['bev_mu'] = poi_results.mu
ct_data['ct_resp'] = ct_data.apply(ct_response, axis=1)

# Linear regression of auxiliary formula
ct_results = smf.ols('ct_resp ~ bev_mu - 1', ct_data).fit()
# Construct confidence interval for alpha, the coefficient of bev_mu
# Overdispersion corresponds to alpha > 0 
alpha_ci95 = ct_results.conf_int(0.05).loc['bev_mu']
print('\nC-T dispersion test: alpha = {:5.3f}, 95% CI = ({:5.3f}, {:5.3f})'
    .format(ct_results.params[0], alpha_ci95.loc[0], alpha_ci95.loc[1]))

In [None]:
# Fit NB.
y_deviant, X_deviant = dmatrices(expr_peace, df_inst_devi_bl_raw.loc[205:444], return_type='dataframe')
nb_results = sm.GLM(y_deviant, X_deviant, family=sm.families.NegativeBinomial(alpha=ct_results.params[0])).fit()
print(nb_results.summary())

In [None]:
# Likelihood Ratio test whether Poission or NB is better suited. 
from scipy import stats
stats.chisqprob = lambda chisq, df: stats.chi2.sf(chisq, df)

def lrtest(llmin, llmax):
    lr = 2 * (llmax - llmin)
    p = stats.chisqprob(lr, 1) # llmax has 1 dof more than llmin
    return lr, p

llf = poi_results.llf
llflitter = nb_results.llf

# Suggest that nb is better fitted
lr, p = lrtest(llf, llflitter)
print('LR test, p value: {:.2f}, {:.4f}'.format(lr, p))

In [None]:
# Save residuals.
df_inst_devi_bl_raw['inst_resid_prpe'] = nb_results.resid_response
df_inst_devi_bl_raw['inst_resid_prpe'] = df_inst_devi_bl_raw['inst_resid_prpe'].fillna(0)

In [None]:
# Write to text file.
with open('prpe_nb_summary.txt', 'w') as fh:
    fh.write(nb_results.summary().as_text())

#### Estimating protests with excessive violence - extracting fitted values

In [None]:
# Set up regression expressions
expr_exvio = """acled_prex_count ~ 
vdem_v2x_polyarchy_tlag12 + 
vdem_v2x_civlib_tlag12 + 
ln_acled_prpe_count_tlag1 + 
ln_acled_prex_count_tlag1 +
ln_geb_sb_best_tlag1 +
ln_geb_os_best_tlag1 +
ln_splag_1_1_ged_sb_best_tlag1 +
ln_splag_1_1_ged_os_best_tlag1 +
ln_splag_1_2_acled_prpe_count_tlag1 +
ln_splag_1_2_acled_prex_count_tlag1 +
pgd_pop_gpw_sum 
"""

In [None]:
# Fit poission to get alpha.
y_deviant, X_deviant = dmatrices(expr_exvio, df_inst_devi_bl_raw.loc[205:444], return_type='dataframe')
poi_results2 = sm.GLM(y_deviant, X_deviant, family=sm.families.Poisson()).fit()
print(poi_results2.summary())

In [None]:
# Check for overdispersion
import statsmodels.formula.api as smf
def ct_response(row):
    "Calculate response observation for Cameron-Trivedi dispersion test"
    y = row['acled_prex_count']
    m = row['bev_mu']
    return ((y - m)**2 - y) / m

ct_data = df_inst_devi_bl_raw.loc[205:444].copy()
ct_data['bev_mu'] = poi_results2.mu
ct_data['ct_resp'] = ct_data.apply(ct_response, axis=1)

# Linear regression of auxiliary formula
ct_results = smf.ols('ct_resp ~ bev_mu - 1', ct_data).fit()
# Construct confidence interval for alpha, the coefficient of bev_mu
# Overdispersion corresponds to alpha > 0 
alpha_ci95 = ct_results.conf_int(0.05).loc['bev_mu']
print('\nC-T dispersion test: alpha = {:5.3f}, 95% CI = ({:5.3f}, {:5.3f})'
        .format(ct_results.params[0], alpha_ci95.loc[0], alpha_ci95.loc[1]))

In [None]:
# Fit negative binomial.
y_deviant, X_deviant = dmatrices(expr_exvio, df_inst_devi_bl_raw.loc[205:444], return_type='dataframe')
nb2_results = sm.GLM(y_deviant, X_deviant,family=sm.families.NegativeBinomial(alpha=ct_results.params[0])).fit()
print(nb2_results.summary())

In [None]:
# Save fitted values.
df_inst_devi_bl_raw['inst_yhat_prex'] = nb2_results.fittedvalues
df_inst_devi_bl_raw['inst_yhat_prex'] = df_inst_devi_bl_raw['inst_yhat_prex'].fillna(0)

In [None]:
# Write to text file.
with open('prex_nb_summary.txt', 'w') as fh:
    fh.write(nb2_results.summary().as_text())

#### Specification of deviation model: Baseline + residuals and fitted values

In [None]:
for df in apply_to_dfs:
    print('Name')
    if 'devi' in df['Name']:
        print('n cols before:', len(df['df'].columns))
        df['df'] = pd.concat([df['df'],df_inst_devi_bl_raw['inst_yhat_prex'],df_inst_devi_bl_raw['inst_resid_prpe']],axis=1)
        print('n cols after:', len(df['df'].columns))

### Descriptives

In [None]:
# Descriptive statistics for dependent variable.
save_table = False
add_protest = False

descr_start = [205,445]
descr_end = 480
cols = ['ged_sb_dummy_dep']

# Write to tex.
if save_table:
    for time in descr_start:
        tex = datasets[0]['df'].loc[time:descr_end][cols].describe().round(3).to_latex(index=True)
        now = datetime.now().strftime("%Y/%m/%d %H:%M:%S")
        meta = f"""
        %Output created by protest_paper.ipynb.
        %Descriptive Statistics.
        %Produced on {now}.
        \\
        """
        tex = meta + tex
        path_out = os.path.join(output_paths['descriptives'], f"descr_ged_dummy_sb_{time}_{descr_end}.txt")
        with open(path_out, "w") as f:
            f.write(tex)
        print(f"Written to {path_out}.")

for time in descr_start:
    print(datasets[0]['df'].loc[time:descr_end][cols].describe().round(3))

## Filter relevant features <a class="anchor" id="filterfeats"></a>

In [None]:
with open('featlist_protest_paper.yaml', 'r') as file:
    full_featlist = yaml.safe_load(file)

In [None]:
for df in datasets:
    for feats,colname in zip(full_featlist.keys(),full_featlist.values()):
        if df['Name'] == feats:
            print(df['Name'])
            print('n cols before:', len(df['df'].columns))
            df['df'] = df['df'][colname]
            print('n cols after:', len(df['df'].columns))

In [None]:
# Drop duplicated columns
for df in datasets:
    df['df'] = organize.getDuplicateColumns(df['df'])

In [None]:
for df in datasets:
    for feats,colname in zip(full_featlist.keys(),full_featlist.values()):
        if df['Name'] == feats:
            print(df['Name'])
            print('Matching length:',len(colname)==len(df['df'].columns))

In [None]:
for df in datasets:
    if df['df'].columns.duplicated().any() == True:
            print('Duplicates detected')

## Filter relevant features <a class="anchor" id="savedf"></a>

In [None]:
# Save all the features
save_dict_data = True
if save_dict_data:
    with open(os.path.join(output_paths['data'], f"data_dict_{run_outcome}.p"), 'wb') as fp:
        pickle.dump(datasets, fp, protocol=pickle.HIGHEST_PROTOCOL)