# Explore and download conflict data

This notebook allows you explore and extract subsets of UCDP GED, UCDP Candidate, and ACLED data aggregated to the VIEWS levels of analysis. No other data transforms are applied. 

**NOTE: Requires a certificate/access to the VIEWS database.**

In [87]:
# Basics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
# sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn import linear_model
# Views 3
from viewser.operations import fetch
from viewser import Queryset, Column
import views_runs
from views_partitioning import data_partitioner, legacy
from stepshift import views
import views_dataviz
from views_mapper2.label_writer import *

In [88]:
# !viewser tables list

In [89]:
# !viewser tables show 'faostat_pp_cm'      ## will show all variables in that table

In [90]:
# !viewser queryset list

# Specify the output folder

In [91]:
import os
home = os.path.expanduser("~")

#### Option 1: Save to your desktop

In [92]:
desktop = home+'/Desktop' # (Toggle on to save to desktop)

#### Option 2: Save to the VIEWS Dropbox

In [93]:
# Mydropbox = home + '/Dropbox (ViEWS)/ViEWS/' # (Toggle on to save to Dropbox)
# Monthly_updates = Mydropbox + 'DataReleases/MonthlyUpdates/' # (Toggle on to save to Dropbox)

# Find and specify time period and country of interest

## Find month_id of interest

In [94]:
def vid2date(i):
    year=str(ViewsMonth(i).year)
    month=str(ViewsMonth(i).month)
    return year+'/'+monthb

In [95]:
# Print month_ids for the chosen period

print(ViewsMonth.from_year_month(year=2022, month=7)) 

ViewsMonth(id=511) #=> year:2022, month:7


## Specify time period of interest

In [96]:
StartOfHistory=121 # Jan 1990, change as needed
EndOfHistory=512 # Change as needed

## Find country of interest

In [97]:
name_date2cid('Kenya','2017-07-01')

237

# Fetch and download conflict data 


## Country-month level (*cm*)

### UCDP GED/UCDP Candidate

In [98]:
qs_ged_cm = (Queryset("ALM_cm_ged_data_no_transforms", "country_month")

# identifiers
             
.with_column(Column('year', from_table = 'month', from_column = 'year_id'))
.with_column(Column('month', from_table = 'month', from_column = 'month'))
.with_column(Column('country_name', from_table = 'country', from_column = 'name'))

# non-logged target variable GED fatalities
         
.with_column(Column("ged_best_sb", from_table = "ged2_cm", from_column = "ged_sb_best_sum_nokgi")
             #.transform.missing.fill()
            )                   
.with_column(Column("ged_best_os", from_table = "ged2_cm", from_column = "ged_os_best_sum_nokgi")
             #.transform.missing.fill()
            )   
.with_column(Column("ged_best_ns", from_table = "ged2_cm", from_column = "ged_ns_best_sum_nokgi")
             #.transform.missing.fill()
            )  
              )
      
qs_ged_cm = qs_ged_cm.publish().fetch()

print(f"A dataset with {len(qs_ged_cm.columns)} columns, with "
      f"data between t {min(qs_ged_cm.index.get_level_values(0))} "
      f"and {max(qs_ged_cm.index.get_level_values(0))}. "
      f"({len(np.unique(qs_ged_cm.index.get_level_values(1)))} units)"
     )

 .    A dataset with 6 columns, with data between t 1 and 852. (213 units)


In [99]:
qs_ged_cm # displays the dataframe above

Unnamed: 0_level_0,Unnamed: 1_level_0,year,month,country_name,ged_best_sb,ged_best_os,ged_best_ns
month_id,country_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1,1980,1,Guyana,,,
1,2,1980,1,Suriname,,,
1,3,1980,1,Trinidad and Tobago,,,
1,4,1980,1,Venezuela,,,
1,5,1980,1,Samoa,,,
...,...,...,...,...,...,...,...
852,242,2050,12,Tanzania,,,
852,243,2050,12,Morocco,,,
852,244,2050,12,Mauritania,,,
852,245,2050,12,Sudan,,,


In [100]:
ged_cm_subset= qs_ged_cm.query(f'month_id >= {StartOfHistory} and month_id <= {EndOfHistory}')
#ged_cm_subset= qs_ged_cm.query(f'month_id >= {StartOfHistory} and month_id <= {EndOfHistory} and country_id == 237')

ged_cm_subset # displays the subset

Unnamed: 0_level_0,Unnamed: 1_level_0,year,month,country_name,ged_best_sb,ged_best_os,ged_best_ns
month_id,country_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
121,1,1990,1,Guyana,0.0,0.0,0.0
121,2,1990,1,Suriname,0.0,0.0,0.0
121,3,1990,1,Trinidad and Tobago,0.0,0.0,0.0
121,4,1990,1,Venezuela,0.0,12.0,0.0
121,5,1990,1,Samoa,0.0,0.0,0.0
...,...,...,...,...,...,...,...
512,242,2022,8,Tanzania,0.0,0.0,0.0
512,243,2022,8,Morocco,0.0,0.0,0.0
512,244,2022,8,Mauritania,0.0,0.0,0.0
512,245,2022,8,Sudan,7.0,23.0,9.0


#### Download as .csv

In [101]:
ged_cm_subset.to_csv(desktop+f'/UCDP_cm_{StartOfHistory}-{EndOfHistory}.csv') # change name of csv
# ged_cm_subset.to_csv(desktop+f'/UCDP_cm_{StartOfHistory}-{EndOfHistory}_country.csv') # change name of csv

### ACLED

In [102]:
qs_acled_cm = (Queryset("ALM_cm_acled_data_no_transforms", "country_month")

# identifiers
               
.with_column(Column('year', from_table = 'month', from_column = 'year_id'))
.with_column(Column('month', from_table = 'month', from_column = 'month'))
.with_column(Column('country_name', from_table = 'country', from_column = 'name'))        
         
# non-logged target variable ACLED fatalities
       
.with_column(Column("acled_sb_fat", from_table = "acled2_cm", from_column = "acled_sb_fat")) 
.with_column(Column("acled_os_fat", from_table = "acled2_cm", from_column = "acled_os_fat")) 
.with_column(Column("acled_ns_fat", from_table = "acled2_cm", from_column = "acled_ns_fat"))
        )
      
qs_acled_cm = qs_acled_cm.publish().fetch()

print(f"A dataset with {len(qs_acled_cm.columns)} columns, with "
      f"data between t {min(qs_acled_cm.index.get_level_values(0))} "
      f"and {max(qs_acled_cm.index.get_level_values(0))}. "
      f"({len(np.unique(qs_acled_cm.index.get_level_values(1)))} units)"
     )

 .    A dataset with 6 columns, with data between t 1 and 852. (213 units)


In [103]:
acled_cm_subset= qs_acled_cm.query(f'month_id >= {StartOfHistory} and month_id <= {EndOfHistory}')
#acled_cm_subset= qs_acled_cm.query(f'month_id >= {StartOfHistory} and month_id <= {EndOfHistory} and country_id == 237')

acled_cm_subset

Unnamed: 0_level_0,Unnamed: 1_level_0,year,month,country_name,acled_sb_fat,acled_os_fat,acled_ns_fat
month_id,country_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
121,1,1990,1,Guyana,,,
121,2,1990,1,Suriname,,,
121,3,1990,1,Trinidad and Tobago,,,
121,4,1990,1,Venezuela,,,
121,5,1990,1,Samoa,,,
...,...,...,...,...,...,...,...
512,242,2022,8,Tanzania,0.0,0.0,0.0
512,243,2022,8,Morocco,0.0,0.0,0.0
512,244,2022,8,Mauritania,0.0,0.0,0.0
512,245,2022,8,Sudan,11.0,31.0,41.0


#### Download as .csv

In [104]:
acled_cm_subset.to_csv(desktop+f'/ACLED_cm_{StartOfHistory}-{EndOfHistory}.csv') # change name of csv file
#acled_cm_subset.to_csv(desktop+f'/ACLED_cm_{StartOfHistory}-{EndOfHistory_country}.csv') # change name of csv file

## PRIO-GRID-month (*pgm*) data

### UCDP GED/UCDP Candidate

In [105]:
qs_ged_pgm = (Queryset("ALM_pgm_ged_data_no_transforms", "priogrid_month")
         
# identifiers
         
.with_column(Column('country_name', from_table = 'country', from_column = 'name'))
.with_column(Column('year', from_table = 'month', from_column = 'year_id'))
.with_column(Column('month', from_table = 'month', from_column = 'month'))      
         
# non-logged target variable GED fatalities
         
.with_column(Column("ged_best_sb", from_table = "ged2_pgm", from_column = "ged_sb_best_sum_nokgi"))                   
.with_column(Column("ged_best_os", from_table = "ged2_pgm", from_column = "ged_os_best_sum_nokgi"))   
.with_column(Column("ged_best_ns", from_table = "ged2_pgm", from_column = "ged_ns_best_sum_nokgi"))
             )
      
qs_ged_pgm = qs_ged_pgm.publish().fetch()

print(f"A dataset with {len(qs_ged_pgm.columns)} columns, with "
      f"data between t {min(qs_ged_pgm.index.get_level_values(0))} "
      f"and {max(qs_ged_pgm.index.get_level_values(0))}. "
      f"({len(np.unique(qs_ged_pgm.index.get_level_values(1)))} units)"
     )

 .    A dataset with 6 columns, with data between t 1 and 852. (13110 units)


In [106]:
# Subset for given month range

ged_pgm_step1 = qs_ged_pgm.query(f'month_id >= {StartOfHistory} and month_id <= {EndOfHistory}')

In [107]:
# Subset also for a given country

ged_pgm_step2 = ged_pgm_step1[ged_pgm_step1['country_name'] == 'Kenya']
ged_pgm_step2

Unnamed: 0_level_0,Unnamed: 1_level_0,country_name,year,month,ged_best_sb,ged_best_os,ged_best_ns
month_id,priogrid_gid,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
121,123558,Kenya,1990,1,0.0,0.0,0.0
121,123559,Kenya,1990,1,0.0,0.0,0.0
121,123560,Kenya,1990,1,0.0,0.0,0.0
121,124277,Kenya,1990,1,0.0,0.0,0.0
121,124278,Kenya,1990,1,0.0,0.0,0.0
...,...,...,...,...,...,...,...
512,135790,Kenya,2022,8,0.0,0.0,0.0
512,135791,Kenya,2022,8,0.0,0.0,0.0
512,135792,Kenya,2022,8,0.0,0.0,0.0
512,135793,Kenya,2022,8,0.0,0.0,0.0


#### Download as .csv

In [108]:
ged_pgm_step1.to_csv(desktop+f'/UCDP_pgm_{StartOfHistory}-{EndOfHistory}.csv')
# ged_pgm_step2.to_csv(desktop+f'/UCDP_pgm_{StartOfHistory}-{EndOfHistory}_country.csv')

### ACLED

In [109]:
qs_acled_pgm = (Queryset("ALM_pgm_acled_data_no_transforms", "priogrid_month")
         
# identifiers
         
.with_column(Column('country_name', from_table = 'country', from_column = 'name'))
.with_column(Column('year', from_table = 'month', from_column = 'year_id'))
.with_column(Column('month', from_table = 'month', from_column = 'month'))      
         
# non-logged target variable ACLED fatalities
       
.with_column(Column("acled_sb_fat", from_table = "acled2_cm", from_column = "acled_sb_fat")) 
.with_column(Column("acled_os_fat", from_table = "acled2_cm", from_column = "acled_os_fat")) 
.with_column(Column("acled_ns_fat", from_table = "acled2_cm", from_column = "acled_ns_fat"))
         )
      
qs_acled_pgm = qs_acled_pgm.publish().fetch()

print(f"A dataset with {len(qs_acled_pgm.columns)} columns, with "
      f"data between t {min(qs_acled_pgm.index.get_level_values(0))} "
      f"and {max(qs_acled_pgm.index.get_level_values(0))}. "
      f"({len(np.unique(qs_acled_pgm.index.get_level_values(1)))} units)"
     )

 .    A dataset with 6 columns, with data between t 1 and 852. (13110 units)


In [110]:
# Subset for given month range

acled_pgm_step1 = qs_acled_pgm.query(f'month_id >= {StartOfHistory} and month_id <= {EndOfHistory}')

In [111]:
# Subset also for a given country

acled_pgm_step2 = acled_pgm_step1[acled_pgm_step1['country_name'] == 'Kenya']
acled_pgm_step2

Unnamed: 0_level_0,Unnamed: 1_level_0,country_name,year,month,acled_sb_fat,acled_os_fat,acled_ns_fat
month_id,priogrid_gid,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
121,123558,Kenya,1990,1,,,
121,123559,Kenya,1990,1,,,
121,123560,Kenya,1990,1,,,
121,124277,Kenya,1990,1,,,
121,124278,Kenya,1990,1,,,
...,...,...,...,...,...,...,...
512,135790,Kenya,2022,8,0.0,7.0,9.0
512,135791,Kenya,2022,8,0.0,7.0,9.0
512,135792,Kenya,2022,8,0.0,7.0,9.0
512,135793,Kenya,2022,8,0.0,7.0,9.0


#### Download as .csv

In [112]:
acled_pgm_step1.to_csv(desktop+f'/ACLED_pgm_{StartOfHistory}-{EndOfHistory}.csv')
# acled_pgm_step2.to_csv(desktop+f'/ACLED_pgm_{StartOfHistory}-{EndOfHistory}_country.csv')


In [113]:
print("All done")

All done
