## Sandbox for thesis code

In [21]:
# set path
import importlib
import sys, os
sys.path.append(os.path.abspath(".."))

import requests
import pandas as pd
import pickle as pkl

from sklearn.ensemble import RandomForestRegressor

Step 1: Import sample dataset from VIEWS to use for testing

In [2]:
api_url = "https://api.viewsforecasting.org"
run = "predictors_fatalities002_0000_00"
loa = "cm"

Make function to scrape data from API:

In [3]:
def retrieve_data_from_api(run: str, 
                           loa: str, 
                           verbose=False,
                           date_start=None,
                           date_end=None
                           ) -> pd.DataFrame:
    """ 
    Retrieve data from the API and return as a pandas DataFrame.

    Args:
        run (str): run identifier (e.g. fatalities001_2022_06_t01)
        loa (str): level of analysis; cm or pgm
        verbose (bool): whether to print progress messages
        start_date (str): start date for filtering data (YYYY-MM-DD), default is None
        end_date (str): end date for filtering data (YYYY-MM-DD), default is None

    Returns:
        pd.DataFrame: DataFrame containing the api data
    """

    # set up api url
    if date_start and date_end:
        api_url = f'https://api.viewsforecasting.org/{run}/{loa}?date_start={date_start}&date_end={date_end}'
    else:
        api_url = f'https://api.viewsforecasting.org/{run}/{loa}'

    # get response
    response = requests.get(api_url)

    # check response status
    page_data=response.json()

    master_list=[]
    master_list+=page_data['data']

    # loop through pages
    i = 1
    while page_data['next_page'] != '':

        # if verbose, print progress
        if verbose:
            print(f"Retrieving page {i}/{page_data['page_count']-1} at {loa} level...         ", end='\r', flush=True)

        r=requests.get(page_data['next_page'])
        page_data=r.json()

        master_list+=page_data['data']
        i += 1

    # convert to dataframe
    forecasts=pd.DataFrame(master_list)

    return forecasts

And retrieve data for the full range.

In [4]:
views_data = retrieve_data_from_api(run, loa, verbose=True)

Retrieving page 81/81 at cm level...         

And save as pickle file

In [11]:
with open("../data/raw/views_data.pkl", "wb") as f:
    pkl.dump(views_data, f)

## Test with basic random forest algorithm

In [13]:
views_data.columns

Index(['country_id', 'month_id', 'name', 'gwcode', 'isoab', 'year', 'month',
       'ucdp_ged_sb_best_sum', 'ucdp_ged_os_best_sum', 'vdem_v2x_libdem',
       'wb_wdi_se_prm_nenr', 'vdem_v2x_civlib', 'wb_wdi_sp_pop_totl',
       'vdem_v2x_gender', 'vdem_v2xcl_acjst', 'ucdp_ged_ns_best_sum',
       'wb_wdi_ny_gdp_pcap_kd', 'vdem_v2x_rule', 'vdem_v2xeg_eqdr',
       'wb_wdi_sp_dyn_imrt_in', 'wb_wdi_sp_dyn_le00_in'],
      dtype='str')

In [15]:
# make list of features
feature_list = [
    'vdem_v2x_libdem', 'wb_wdi_se_prm_nenr', 'vdem_v2x_civlib', 'wb_wdi_sp_pop_totl',
    'vdem_v2x_gender', 'vdem_v2xcl_acjst', 'wb_wdi_ny_gdp_pcap_kd', 'vdem_v2x_rule', 
    'vdem_v2xeg_eqdr','wb_wdi_sp_dyn_imrt_in', 'wb_wdi_sp_dyn_le00_in'
] 
target = "ucdp_ged_sb_best_sum"

Fit a basic random forest regression model to the data to test that it works.

## Test with Class structure

In [27]:
from src import dynamic
importlib.reload(dynamic)
from src.dynamic import DynamicModel

In [28]:
model = DynamicModel(views_data, feature_list, target, 3, (400, 500))

In [29]:
model.fit()