## Sandbox for thesis code

In [4]:
# set path
import importlib
import sys, os
sys.path.append(os.path.abspath(".."))

import requests
import pandas as pd
import pickle as pkl

from sklearn.ensemble import RandomForestRegressor

Step 1: Import sample dataset from VIEWS to use for testing

In [2]:
api_url = "https://api.viewsforecasting.org"
run = "predictors_fatalities002_0000_00"
loa = "cm"

Make function to scrape data from API:

In [3]:
def retrieve_data_from_api(run: str, 
                           loa: str, 
                           verbose=False,
                           date_start=None,
                           date_end=None
                           ) -> pd.DataFrame:
    """ 
    Retrieve data from the API and return as a pandas DataFrame.

    Args:
        run (str): run identifier (e.g. fatalities001_2022_06_t01)
        loa (str): level of analysis; cm or pgm
        verbose (bool): whether to print progress messages
        start_date (str): start date for filtering data (YYYY-MM-DD), default is None
        end_date (str): end date for filtering data (YYYY-MM-DD), default is None

    Returns:
        pd.DataFrame: DataFrame containing the api data
    """

    # set up api url
    if date_start and date_end:
        api_url = f'https://api.viewsforecasting.org/{run}/{loa}?date_start={date_start}&date_end={date_end}'
    else:
        api_url = f'https://api.viewsforecasting.org/{run}/{loa}'

    # get response
    response = requests.get(api_url)

    # check response status
    page_data=response.json()

    master_list=[]
    master_list+=page_data['data']

    # loop through pages
    i = 1
    while page_data['next_page'] != '':

        # if verbose, print progress
        if verbose:
            print(f"Retrieving page {i}/{page_data['page_count']-1} at {loa} level...         ", end='\r', flush=True)

        r=requests.get(page_data['next_page'])
        page_data=r.json()

        master_list+=page_data['data']
        i += 1

    # convert to dataframe
    forecasts=pd.DataFrame(master_list)

    return forecasts

And retrieve data for the full range.

In [4]:
views_data = retrieve_data_from_api(run, loa, verbose=True)

Retrieving page 81/81 at cm level...         

And save as pickle file

In [11]:
with open("../data/raw/views_data.pkl", "wb") as f:
    pkl.dump(views_data, f)

## Test with basic random forest algorithm

In [5]:
with open("../data/raw/views_data.pkl", "rb") as f:
    views_data = pkl.load(f)

EOFError: Ran out of input

In [13]:
views_data.columns

Index(['country_id', 'month_id', 'name', 'gwcode', 'isoab', 'year', 'month',
       'ucdp_ged_sb_best_sum', 'ucdp_ged_os_best_sum', 'vdem_v2x_libdem',
       'wb_wdi_se_prm_nenr', 'vdem_v2x_civlib', 'wb_wdi_sp_pop_totl',
       'vdem_v2x_gender', 'vdem_v2xcl_acjst', 'ucdp_ged_ns_best_sum',
       'wb_wdi_ny_gdp_pcap_kd', 'vdem_v2x_rule', 'vdem_v2xeg_eqdr',
       'wb_wdi_sp_dyn_imrt_in', 'wb_wdi_sp_dyn_le00_in'],
      dtype='str')

In [15]:
# make list of features
feature_list = [
    'vdem_v2x_libdem', 'wb_wdi_se_prm_nenr', 'vdem_v2x_civlib', 'wb_wdi_sp_pop_totl',
    'vdem_v2x_gender', 'vdem_v2xcl_acjst', 'wb_wdi_ny_gdp_pcap_kd', 'vdem_v2x_rule', 
    'vdem_v2xeg_eqdr','wb_wdi_sp_dyn_imrt_in', 'wb_wdi_sp_dyn_le00_in'
] 
target = "ucdp_ged_sb_best_sum"

Fit a basic random forest regression model to the data to test that it works.

## Test with Class structure

In [11]:
from src import dynamic
importlib.reload(dynamic)
from src.dynamic import DynamicModel, DynamicModelManager

In [7]:
model = DynamicModel(3, (400, 500))

In [None]:
model.fit(views_data, feature_list, target, )

In [12]:
full_model = DynamicModelManager(
    steps = [1,3,6,12],
    data = pd.DataFrame(),
    features = ["test"],
    target = "ged_sb",
    train_window_size=120,
    test_window_size=100,
    slide_window_size = 12,
    full_split = (0, 550)
)

In [28]:
full_model.fit()

Finished fitting all models


In [30]:
for model in full_model.models:
    print(full_model.models[model].train_split)

(12, 132)
(24, 144)
(36, 156)
(48, 168)
(60, 180)
(72, 192)
(84, 204)
(96, 216)
(108, 228)
(120, 240)
(132, 252)
(144, 264)
(156, 276)
(168, 288)
(180, 300)
(192, 312)
(204, 324)
(216, 336)
(228, 348)
(240, 360)
(252, 372)
(264, 384)
(276, 396)
(288, 408)
(300, 420)
(312, 432)
(324, 444)
(336, 456)
(348, 468)
(360, 480)
(372, 492)
(384, 504)
(396, 516)
(408, 528)
(420, 540)
(12, 132)
(24, 144)
(36, 156)
(48, 168)
(60, 180)
(72, 192)
(84, 204)
(96, 216)
(108, 228)
(120, 240)
(132, 252)
(144, 264)
(156, 276)
(168, 288)
(180, 300)
(192, 312)
(204, 324)
(216, 336)
(228, 348)
(240, 360)
(252, 372)
(264, 384)
(276, 396)
(288, 408)
(300, 420)
(312, 432)
(324, 444)
(336, 456)
(348, 468)
(360, 480)
(372, 492)
(384, 504)
(396, 516)
(408, 528)
(420, 540)
(12, 132)
(24, 144)
(36, 156)
(48, 168)
(60, 180)
(72, 192)
(84, 204)
(96, 216)
(108, 228)
(120, 240)
(132, 252)
(144, 264)
(156, 276)
(168, 288)
(180, 300)
(192, 312)
(204, 324)
(216, 336)
(228, 348)
(240, 360)
(252, 372)
(264, 384)
(276, 396)
(