# Explore and download predictor (input) data used in `fatalities002`

**Last updated: 17 July 2023**

This notebook allows you to query and extract a subset of the predictor data that feed into the VIEWS model each month, aggregated to the VIEWS levels of analysis. 

No other data transforms are applied. 

The queryset specified here matches the predictor data made available via the VIEWS API, where it is updated each month along with the release of our latest conflict predictions. 

**NOTE: Requires a certificate/access to the VIEWS database.**

In [None]:
# Basics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
# sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn import linear_model
# Views 3
from viewser.operations import fetch
from viewser import Queryset, Column
import views_runs
from views_partitioning import data_partitioner, legacy
from stepshift import views
from views_mapper2.label_writer import *

In [None]:
 # !viewser tables list

In [None]:
 # !viewser tables show 'country'      

In [None]:
# !viewser queryset list

# Specify the output folder

In [None]:
import os
home = os.path.expanduser("~")

#### Option 1: Save to your desktop

In [None]:
desktop = home+'/Desktop/' 

#### Option 2: Save to the VIEWS Dropbox

In [None]:
# Mydropbox = home + '/Dropbox (ViEWS)/ViEWS/' # (Toggle on to save to Dropbox)
# Monthly_updates = Mydropbox + 'DataReleases/MonthlyUpdates/Data/Features/' # (Toggle on to save to Dropbox)

# Fetch data 


### Select Development ID

In [None]:
dev_id = 'fatalities002' # Change as needed

### Select time period

Select the time period of interest below using VIEWS `month_ids`. 

In [None]:
StartOfHistory = 121 # 121 = Jan 1990
EndOfHistory = 521 # Update as needed

### Identify `month_ids` (if needed)

In [None]:
# Call the function

def vid2date(i):
    year=str(ViewsMonth(i).year)
    month=str(ViewsMonth(i).month)
    return year+'/'+monthb

In [None]:
# Choose year and month below to print the correlated `month_id`

print(ViewsMonth.from_year_month(year=2019, month=5)) 

## Country-month level (*cm*)

In [None]:
data_cm = (Queryset("fatalities002_API_predictors_cm", "country_month")

# Identifiers
            
.with_column(Column('year', from_table = 'month', from_column = 'year_id'))
.with_column(Column('month', from_table = 'month', from_column = 'month'))
.with_column(Column('isoab', from_table = 'country', from_column = 'isoab'))
.with_column(Column('country', from_table = 'country', from_column = 'name'))
.with_column(Column('gwcode', from_table = 'country', from_column = 'gwcode'))

# GED

.with_column(Column("ucdp_ged_sb_best_sum", from_table = "ged2_cm", from_column = "ged_sb_best_sum_nokgi"))                   
.with_column(Column("ucdp_ged_os_best_sum", from_table = "ged2_cm", from_column = "ged_os_best_sum_nokgi"))   
.with_column(Column("ucdp_ged_ns_best_sum", from_table = "ged2_cm", from_column = "ged_ns_best_sum_nokgi"))   
           
# Note: ACLED data has been left out while awaiting automated codebook procedures that fall in line with the ACLED ToU (e.g. incl. date of access)           
                    
# V-Dem v.12
         
.with_column(Column("vdem_v12_v2x_rule", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_rule")) 
.with_column(Column("vdem_v12_v2x_gender", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_gender")) 
.with_column(Column("vdem_v12_v2x_civlib", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_civlib"))
.with_column(Column("vdem_v12_v2xcl_acjst", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2xcl_acjst")) 
.with_column(Column("vdem_v12_v2xeg_eqdr", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2xeg_eqdr")) 
.with_column(Column("vdem_v12_v2x_libdem", from_table = "vdem_v12_cy", from_column = "vdem_v12_v2x_libdem"))

# WDI
                 
.with_column(Column("wdi_ny_gdp_pcap_kd", from_table = "wdi_cy", from_column = "wdi_ny_gdp_pcap_kd")) 
.with_column(Column("wdi_sp_dyn_le00_in", from_table = "wdi_cy", from_column = "wdi_sp_dyn_le00_in")) 
.with_column(Column("wdi_se_prm_nenr", from_table = "wdi_cy", from_column = "wdi_se_prm_nenr"))
.with_column(Column("wdi_sp_pop_totl", from_table = "wdi_cy", from_column = "wdi_sp_pop_totl")) 
.with_column(Column("wdi_sp_dyn_imrt_in", from_table = "wdi_cy", from_column = "wdi_sp_dyn_imrt_in"))                 
                
# FAO FAOSTAT
   
.with_column(Column("fao_faostat_wheat_price", from_table = "faostat_pp_cm", from_column = "wheat_price")) 
.with_column(Column("fao_faostat_sugar_price", from_table = "faostat_pp_cm", from_column = "sugar_price")) 
.with_column(Column("fao_faostat_milk_price", from_table = "faostat_pp_cm", from_column = "milk_price"))                 
.with_column(Column("fao_faostat_meat_price", from_table = "faostat_pp_cm", from_column = "meat_price")) 
                    
.with_column(Column("fao_faostat_pct_pop_basicdrink", from_table = "faostat_fsec_cy", from_column = "pct_pop_basicdrink")) 
.with_column(Column("fao_faostat_pct_undernourished", from_table = "faostat_fsec_cy", from_column = "pct_undernourished")) 
.with_column(Column("fao_faostat_pcap_fsupply_var", from_table = "faostat_fsec_cy", from_column = "pcap_fsupply_var")) 
        
# FAO AQUASTAT
                 
.with_column(Column("fao_aquastat_renewable_pcap", from_table = "fao_aqua_cy", from_column = "renewable_pcap")) 
.with_column(Column("fao_aquastat_general_efficiency", from_table = "fao_aqua_cy", from_column = "general_efficiency"))
.with_column(Column("fao_aquastat_services_efficiency", from_table = "fao_aqua_cy", from_column = "services_efficiency"))  
           
          )
      
data_cm = data_cm.publish().fetch()

print(f"A dataset with {len(data_cm.columns)} columns, with "
      f"data between t {min(data_cm.index.get_level_values(0))} "
      f"and {max(data_cm.index.get_level_values(0))}. "
      f"({len(np.unique(data_cm.index.get_level_values(1)))} units)"
     )

In [None]:
cm_subset= data_cm.query(f'month_id >= {StartOfHistory} & month_id <= {EndOfHistory}')

cm_subset # displays the subset

#### Download as .csv

In [None]:
cm_subset.to_csv(desktop+f'{dev_id}_API_predictors_cm_{StartOfHistory}-{EndOfHistory}.csv') 

## PRIO-GRID-month (*pgm*) data

In [None]:
data_pgm = (Queryset("fatalities002_API_predictors_pgm", "priogrid_month")  
                         
# Identifiers
            
.with_column(Column('year', from_table = 'month', from_column = 'year_id'))
.with_column(Column('month', from_table = 'month', from_column = 'month'))
.with_column(Column('isoab', from_table = 'country', from_column = 'isoab'))
.with_column(Column('country', from_table = 'country', from_column = 'name'))
.with_column(Column('gwcode', from_table = 'country', from_column = 'gwcode'))

# UCDP GED 

.with_column(Column("ucdp_ged_sb_best_sum", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi"))                   
.with_column(Column("ucdp_ged_os_best_sum", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi"))   
.with_column(Column("ucdp_ged_ns_best_sum", from_table = "ged2_pgm", from_column = "ged_ns_best_sum_nokgi"))   
    
.with_column(Column("ucdp_ged_os_best_count", from_table = "ged2_pgm", from_column = "ged_os_best_count_nokgi"))   
.with_column(Column("ucdp_ged_ns_best_count", from_table = "ged2_pgm", from_column = "ged_ns_best_count_nokgi"))  
          )
            
data_pgm = data_pgm.publish().fetch()

print(f"A dataset with {len(data_pgm.columns)} columns, with "
      f"data between t {min(data_pgm.index.get_level_values(0))} "
      f"and {max(data_pgm.index.get_level_values(0))}. "
      f"({len(np.unique(data_pgm.index.get_level_values(1)))} units)"
     )

In [None]:
# Subset for given month range

pgm_subset = data_pgm.query(f'month_id >= {StartOfHistory} & month_id <= {EndOfHistory}')
pgm_subset

#### Download as .csv

In [None]:
pgm_subset.to_csv(desktop+f'{dev_id}_API_predictors_pgm_{StartOfHistory}-{EndOfHistory}.csv') 

In [None]:
print("All done")