# Explore and download conflict data

This notebook allows you explore and extract subsets of UCDP GED, UCDP Candidate, and ACLED data aggregated to the VIEWS levels of analysis. No other data transforms are applied. 

**NOTE: Requires a certificate/access to the VIEWS database.**

In [None]:
# Basics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
# sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn import linear_model
# Views 3
from viewser.operations import fetch
from viewser import Queryset, Column
import views_runs
from views_partitioning import data_partitioner, legacy
from stepshift import views
from views_mapper2.label_writer import *

In [None]:
# !viewser tables list

In [None]:
# !viewser tables show 'faostat_pp_cm'      ## will show all variables in that table

In [None]:
# !viewser queryset list

# Specify the output folder

In [None]:
import os
home = os.path.expanduser("~")

#### Option 1: Save to your desktop

In [None]:
desktop = home+'/Desktop' # (Toggle on to save to desktop)

#### Option 2: Save to the VIEWS Dropbox

In [None]:
# Mydropbox = home + '/Dropbox (ViEWS)/ViEWS/' # (Toggle on to save to Dropbox)
# Monthly_updates = Mydropbox + 'DataReleases/MonthlyUpdates/' # (Toggle on to save to Dropbox)

# Find and specify time period and country of interest

## Find month_id of interest

In [None]:
def vid2date(i):
    year=str(1980 + i//12)
    month=str(i%12)
    return year+'/'+month

In [None]:
# Print month_ids for the chosen period

print(ViewsMonth.from_year_month(year=2022, month=7)) 

## Specify time period of interest

In [None]:
StartOfHistory=121 # Jan 1990, change as needed
EndOfHistory=512 # Change as needed

## Find country of interest

In [None]:
name_date2cid('Kenya','2017-07-01')

# Fetch and download conflict data 


## Country-month level (*cm*)

### UCDP GED/UCDP Candidate

In [None]:
qs_ged_cm = (Queryset("ALM_cm_ged_data_no_transforms", "country_month")

# identifiers
             
.with_column(Column('year', from_table = 'month', from_column = 'year_id'))
.with_column(Column('month', from_table = 'month', from_column = 'month'))
.with_column(Column('country_name', from_table = 'country', from_column = 'name'))

# non-logged target variable GED fatalities
         
.with_column(Column("ged_best_sb", from_table = "ged2_cm", from_column = "ged_sb_best_sum_nokgi")
             #.transform.missing.fill()
            )                   
.with_column(Column("ged_best_os", from_table = "ged2_cm", from_column = "ged_os_best_sum_nokgi")
             #.transform.missing.fill()
            )   
.with_column(Column("ged_best_ns", from_table = "ged2_cm", from_column = "ged_ns_best_sum_nokgi")
             #.transform.missing.fill()
            )  
              )
      
qs_ged_cm = qs_ged_cm.publish().fetch()

print(f"A dataset with {len(qs_ged_cm.columns)} columns, with "
      f"data between t {min(qs_ged_cm.index.get_level_values(0))} "
      f"and {max(qs_ged_cm.index.get_level_values(0))}. "
      f"({len(np.unique(qs_ged_cm.index.get_level_values(1)))} units)"
     )

In [None]:
qs_ged_cm # displays the dataframe above

In [None]:
ged_cm_subset= qs_ged_cm.query(f'month_id >= {StartOfHistory} and month_id <= {EndOfHistory}')
#ged_cm_subset= qs_ged_cm.query(f'month_id >= {StartOfHistory} and month_id <= {EndOfHistory} and country_id == 237')

ged_cm_subset # displays the subset

#### Download as .csv

In [None]:
ged_cm_subset.to_csv(desktop+f'/UCDP_cm_{StartOfHistory}-{EndOfHistory}.csv') # change name of csv
# ged_cm_subset.to_csv(desktop+f'/UCDP_cm_{StartOfHistory}-{EndOfHistory}_country.csv') # change name of csv

### ACLED

In [None]:
qs_acled_cm = (Queryset("ALM_cm_acled_data_no_transforms", "country_month")

# identifiers
               
.with_column(Column('year', from_table = 'month', from_column = 'year_id'))
.with_column(Column('month', from_table = 'month', from_column = 'month'))
.with_column(Column('country_name', from_table = 'country', from_column = 'name'))        
         
# non-logged target variable ACLED fatalities
       
.with_column(Column("acled_sb_fat", from_table = "acled2_cm", from_column = "acled_sb_fat")) 
.with_column(Column("acled_os_fat", from_table = "acled2_cm", from_column = "acled_os_fat")) 
.with_column(Column("acled_ns_fat", from_table = "acled2_cm", from_column = "acled_ns_fat"))
        )
      
qs_acled_cm = qs_acled_cm.publish().fetch()

print(f"A dataset with {len(qs_acled_cm.columns)} columns, with "
      f"data between t {min(qs_acled_cm.index.get_level_values(0))} "
      f"and {max(qs_acled_cm.index.get_level_values(0))}. "
      f"({len(np.unique(qs_acled_cm.index.get_level_values(1)))} units)"
     )

In [None]:
acled_cm_subset= qs_acled_cm.query(f'month_id >= {StartOfHistory} and month_id <= {EndOfHistory}')
#acled_cm_subset= qs_acled_cm.query(f'month_id >= {StartOfHistory} and month_id <= {EndOfHistory} and country_id == 237')

acled_cm_subset

#### Download as .csv

In [None]:
acled_cm_subset.to_csv(desktop+f'/ACLED_cm_{StartOfHistory}-{EndOfHistory}.csv') # change name of csv file
#acled_cm_subset.to_csv(desktop+f'/ACLED_cm_{StartOfHistory}-{EndOfHistory_country}.csv') # change name of csv file

## PRIO-GRID-month (*pgm*) data

### UCDP GED/UCDP Candidate

In [None]:
qs_ged_pgm = (Queryset("ALM_pgm_ged_data_no_transforms", "priogrid_month")
         
# identifiers
         
.with_column(Column('country_name', from_table = 'country', from_column = 'name'))
.with_column(Column('year', from_table = 'month', from_column = 'year_id'))
.with_column(Column('month', from_table = 'month', from_column = 'month'))      
         
# non-logged target variable GED fatalities
         
.with_column(Column("ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi"))                   
.with_column(Column("ged_best_os", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi"))   
.with_column(Column("ged_best_ns", from_table = "ged2_pgm", from_column = "ged_ns_best_sum_nokgi"))
             )
      
qs_ged_pgm = qs_ged_pgm.publish().fetch()

print(f"A dataset with {len(qs_ged_pgm.columns)} columns, with "
      f"data between t {min(qs_ged_pgm.index.get_level_values(0))} "
      f"and {max(qs_ged_pgm.index.get_level_values(0))}. "
      f"({len(np.unique(qs_ged_pgm.index.get_level_values(1)))} units)"
     )

In [None]:
# Subset for given month range

ged_pgm_step1 = qs_ged_pgm.query(f'month_id >= {StartOfHistory} and month_id <= {EndOfHistory}')

In [None]:
# Subset also for a given country

ged_pgm_step2 = ged_pgm_step1[ged_pgm_step1['country_name'] == 'Kenya']
ged_pgm_step2

#### Download as .csv

In [None]:
ged_pgm_step1.to_csv(desktop+f'/UCDP_pgm_{StartOfHistory}-{EndOfHistory}.csv')
# ged_pgm_step2.to_csv(desktop+f'/UCDP_pgm_{StartOfHistory}-{EndOfHistory}_country.csv')

### ACLED

In [None]:
qs_acled_pgm = (Queryset("ALM_pgm_acled_data_no_transforms", "priogrid_month")
         
# identifiers
         
.with_column(Column('country_name', from_table = 'country', from_column = 'name'))
.with_column(Column('year', from_table = 'month', from_column = 'year_id'))
.with_column(Column('month', from_table = 'month', from_column = 'month'))      
         
# non-logged target variable ACLED fatalities
       
.with_column(Column("acled_sb_fat", from_table = "acled2_cm", from_column = "acled_sb_fat")) 
.with_column(Column("acled_os_fat", from_table = "acled2_cm", from_column = "acled_os_fat")) 
.with_column(Column("acled_ns_fat", from_table = "acled2_cm", from_column = "acled_ns_fat"))
         )
      
qs_acled_pgm = qs_acled_pgm.publish().fetch()

print(f"A dataset with {len(qs_acled_pgm.columns)} columns, with "
      f"data between t {min(qs_acled_pgm.index.get_level_values(0))} "
      f"and {max(qs_acled_pgm.index.get_level_values(0))}. "
      f"({len(np.unique(qs_acled_pgm.index.get_level_values(1)))} units)"
     )

In [None]:
# Subset for given month range

acled_pgm_step1 = qs_acled_pgm.query(f'month_id >= {StartOfHistory} and month_id <= {EndOfHistory}')

In [None]:
# Subset also for a given country

acled_pgm_step2 = acled_pgm_step1[acled_pgm_step1['country_name'] == 'Kenya']
acled_pgm_step2

#### Download as .csv

In [None]:
acled_pgm_step1.to_csv(desktop+f'/ACLED_pgm_{StartOfHistory}-{EndOfHistory}.csv')
# acled_pgm_step2.to_csv(desktop+f'/ACLED_pgm_{StartOfHistory}-{EndOfHistory}_country.csv')


In [None]:
print("All done")