# Explore and download predictor (input) data used in `fatalities001`

**Prequisites: Access to VIEWS internal database**

This notebook allows you to query and extract a subset of the predictor data that feed into the VIEWS model each month, aggregated to the VIEWS levels of analysis. 

No other data transforms are applied. 

In [None]:
# VIEWS 3
from viewser.operations import fetch
from viewser import Queryset, Column
from views_mapper2.label_writer import *

# Specify the output folder

In [None]:
import os
home = os.path.expanduser("~")

#### Option 1: Save to your desktop

In [None]:
desktop = home+'/Desktop/' 

#### Option 2: Save to the VIEWS Dropbox

In [None]:
# Mydropbox = home + '/Dropbox (ViEWS)/ViEWS/' # (Toggle on to save to Dropbox)

# Fetch data 


### Select time period

Select the time period of interest below using VIEWS `month_ids`. 

In [None]:
StartOfHistory = 121 # 121 = Jan 1990
EndOfHistory = 519 # Update as needed

### Identify `month_ids` (if needed)

In [None]:
# Call the function

def vid2date(i):
    year=str(ViewsMonth(i).year)
    month=str(ViewsMonth(i).month)
    return year+'/'+monthb

In [None]:
# Choose year and month below to print the correlated `month_id`

print(ViewsMonth.from_year_month(year=2023, month=3)) 

## Country-month level (*cm*)

In [None]:
data_cm = (Queryset("fatalities001_API_predictors_cm", "country_month")

# Identifiers
            
.with_column(Column('year', from_table = 'month', from_column = 'year_id'))
.with_column(Column('month', from_table = 'month', from_column = 'month'))
.with_column(Column('isoab', from_table = 'country', from_column = 'isoab'))
.with_column(Column('country', from_table = 'country', from_column = 'name'))
.with_column(Column('gwcode', from_table = 'country', from_column = 'gwcode'))

# GED

.with_column(Column("ucdp_ged_best_sb", from_table = "ged2_cm", from_column = "ged_sb_best_sum_nokgi"))                   
.with_column(Column("ucdp_ged_best_os", from_table = "ged2_cm", from_column = "ged_os_best_sum_nokgi"))   
.with_column(Column("ucdp_ged_best_ns", from_table = "ged2_cm", from_column = "ged_ns_best_sum_nokgi"))          
  
# ACLED
                    
.with_column(Column("acled_sb_fat", from_table = "acled2_cm", from_column = "acled_sb_fat")) 
.with_column(Column("acled_os_fat", from_table = "acled2_cm", from_column = "acled_os_fat")) 
.with_column(Column("acled_ns_fat", from_table = "acled2_cm", from_column = "acled_ns_fat"))
           
.with_column(Column("acled_prx_fat", from_table = "acled2_cm", from_column = "acled_prx_fat"))
.with_column(Column("acled_bat_gov_fat", from_table = "acled2_cm", from_column = "acled_bat_gov_fat"))
.with_column(Column("acled_bat_reb_fat", from_table = "acled2_cm", from_column = "acled_bat_reb_fat"))
                     
.with_column(Column("acled_sb_count", from_table = "acled2_cm", from_column = "acled_sb_count")) 
.with_column(Column("acled_os_count", from_table = "acled2_cm", from_column = "acled_os_count")) 
.with_column(Column("acled_ns_count", from_table = "acled2_cm", from_column = "acled_ns_count"))
  
.with_column(Column("acled_prx_count", from_table = "acled2_cm", from_column = "acled_prx_count")) 
.with_column(Column("acled_pr_count", from_table = "acled2_cm", from_column = "acled_pr_count")) 
                    
# V-Dem v. 11 
                    
.with_column(Column("vdem_v11_v2x_rule", from_table = "vdem_v11_cy", from_column = "vdem_v2x_rule")) 
.with_column(Column("vdem_v11_v2x_gender", from_table = "vdem_v11_cy", from_column = "vdem_v2x_gender")) 
.with_column(Column("vdem_v11_v2xcl_acjst", from_table = "vdem_v11_cy", from_column = "vdem_v2xcl_acjst")) 
.with_column(Column("vdem_v11_v2xeg_eqdr", from_table = "vdem_v11_cy", from_column = "vdem_v2xeg_eqdr")) 
.with_column(Column("vdem_v11_v2x_libdem", from_table = "vdem_v11_cy", from_column = "vdem_v2x_libdem")) 

# WDI
                 
.with_column(Column("wb_wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")) 
.with_column(Column("wb_wdi_sp_dyn_imrt_in", from_table = "wdi_cy", from_column = "wdi_sp_dyn_imrt_in")) 
.with_column(Column("wb_wdi_se_enr_prim_fm_zs", from_table = "wdi_cy", from_column = "wdi_se_enr_prim_fm_zs"))
.with_column(Column("wb_wdi_ny_gdp_mktp_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_mktp_kd"))
              )
      
data_cm = data_cm.publish().fetch()

print(f"A dataset with {len(data_cm.columns)} columns, with "
      f"data between t {min(data_cm.index.get_level_values(0))} "
      f"and {max(data_cm.index.get_level_values(0))}. "
      f"({len(np.unique(data_cm.index.get_level_values(1)))} units)"
     )

In [None]:
cm_subset= data_cm.query(f'month_id >= {StartOfHistory} & month_id <= {EndOfHistory}')

cm_subset # displays the subset

#### Download as .csv

In [None]:
cm_subset.to_csv(desktop+f'API_predictors_cm_{StartOfHistory}-{EndOfHistory}_fatalities001.csv') 

## PRIO-GRID-month (*pgm*) data

In [None]:
data_pgm = (Queryset("fatalities001_API_predictors_pgm", "priogrid_month")  
                         
# Identifiers
            
.with_column(Column('year', from_table = 'month', from_column = 'year_id'))
.with_column(Column('month', from_table = 'month', from_column = 'month'))
.with_column(Column('isoab', from_table = 'country', from_column = 'isoab'))
.with_column(Column('country', from_table = 'country', from_column = 'name'))
.with_column(Column('gwcode', from_table = 'country', from_column = 'gwcode'))

            
# UCDP GED 

.with_column(Column("ucdp_ged_best_sum_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi"))                   
.with_column(Column("ucdp_ged_best_sum_os", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi"))   
.with_column(Column("ucdp_ged_best_sum_ns", from_table = "ged2_pgm", from_column = "ged_ns_best_sum_nokgi"))   
    
# NOTE: ucdp_ged_best_count is not included in fatalities001
            
.with_column(Column("ucdp_ged_best_count_os", from_table = "ged2_pgm", from_column = "ged_os_best_count_nokgi"))   
.with_column(Column("ucdp_ged_best_count_ns", from_table = "ged2_pgm", from_column = "ged_ns_best_count_nokgi"))   
           )
            
data_pgm = data_pgm.publish().fetch()

print(f"A dataset with {len(data_pgm.columns)} columns, with "
      f"data between t {min(data_pgm.index.get_level_values(0))} "
      f"and {max(data_pgm.index.get_level_values(0))}. "
      f"({len(np.unique(data_pgm.index.get_level_values(1)))} units)"
     )

In [None]:
# Subset for given month range

pgm_subset = data_pgm.query(f'month_id >= {StartOfHistory} & month_id <= {EndOfHistory}')
pgm_subset

#### Download as .csv

In [None]:
pgm_subset.to_csv(desktop+f'API_predictors_pgm_{StartOfHistory}-{EndOfHistory}_fatalities001.csv') 

In [None]:
print("All done")