In [1]:
import requests
import io
import json
import re
import numpy as np
import pandas as pd

In [2]:
# starting Oct 2020, ACLED requires you to use your individual email/key to access the data and api
# to store and import your key/email into this code, first,
#  - create an ipynb file called:
#
#               acled_api_email_key.ipynb, 
#
# and inside:
#
# - define this dict variable as:
#               acled_api_email_key = { 'key':'[your key here]', 
#                                       'email':'[your email here]'}import import_ipynb
#
# DON'T INCLUDE THAT FILE ON GITHUB
# 
# then import it like this (you may need to "pip install import-ipynb" first):
#
import import_ipynb
from acled_email_key import acled_api_email_key



importing Jupyter notebook from acled_email_key.ipynb


In [3]:
acled_api_url_base = 'https://api.acleddata.com/acled/read' 

In [4]:

# load the ACLED ISO country code tables
def get_acled_country_code(country_name_str):
    ''' 
    uses an excel file provided by ACLED to take a country name string and get a country code number
    ACLED now allows you to pass the string as a parameter in the query but it's buggy
    e.g. passing "Niger" gets "Niger" and "Nigeria" both at time of this coding
    '''

    acled_country_codes = pd.read_excel('ACLED-Country-Coverage-and-ISO-Codes_3.2020.xlsx')
    acled_country_codes.set_index('Country', inplace=True)
    #print(acled_country_codes)

    # get the ACLED ISO country code 
    acled_country_code = acled_country_codes.loc[country_name_str]['ISO Code']
    return acled_country_code
    

In [5]:
def get_acled_download_parameters(country_name_str, event_start_date_str):
    ''' 
    pass it a country name and an event start date as strings
    will go from that start date to present
    '''
    # let's set the acled api calling parameters
    ploads = acled_api_email_key
    ploads.update({ 'iso': get_acled_country_code(country_name_str),
                    'event_date':event_start_date_str,
                    'event_date_where':'>',
                    'page':1 # starts at page 1
                    })
    return ploads

In [6]:
# a function that updates our ACLED GET calls to help us sequence it with page numbers
def ping_acled(api_url, api_parameters, page_num=1): 
    '''
    Takes base url for api, the parameters as a dict, and the page number as an int
    will auto update any existing page num in the api_parameters with the page number
    pass it a the base url string, a dict of parameters, and any pertinent page number
    '''

    api_parameters.update({'page':page_num}) # set the page number in the parameters

    r = requests.get(api_url, api_parameters)

    if r.status_code == 200:
        print('success')
    elif r.status_code == 404:
        print('not found')

    data = r.json()

    return data

In [7]:
def download_acled_data(acled_download_parameters):
    """ pass it a dict of parameters created with get_acled_download_parameters """
    
    # now the code to download the data

    results = [] # empty data structure to store results
    num_results = 500 # condition to continue adding pages
    count = 0  # tracker of results
    page = 1 # Per the documentation each page will give us more results, and we start with page 1

    while num_results == 500: # if less 500 or 0 we know we have all the results
        print ("downloading acled page ", page) # just to see our progress

        acled_download_parameters.update({'page':page}) # set the page number
        data = ping_acled(acled_api_url_base, acled_download_parameters, page) # call the previous function 

        results.append(data['data']) # store in our results
        count += data['count'] # Track number of results

        num_results = data['count'] # update our condition
        page += 1 # update our page variable
        print ("number of results ", count) # Track our progress

    super_list = []
    for res in results: 
        super_list += res
        print (len(super_list))

    return super_list

In [8]:
def recast_acled_numerical_columns( acled_df ):
    """ pass it a pandas dataframe created from an ACLED download """
    
    # convert numerical data types to increase calculations speed
    acled_df = acled_df.astype({
        "data_id": int, 
        "iso": int, 
        "event_id_no_cnty": int, 
        "event_date": np.datetime64, 
        "year": int,
        "time_precision": int,
        "inter1": int,
        "inter2": int,
        "interaction": int,
        "latitude": float, 
        "longitude": float,
        "geo_precision": int,
        "fatalities": int,
        "timestamp": int })
        
    return acled_df