## Sandbox for thesis code

In [19]:
# set path
import importlib
import sys, os
sys.path.append(os.path.abspath(".."))

import requests
import pandas as pd
import pickle as pkl

from src.auxilaries import retrieve_data_from_api

from sklearn.ensemble import RandomForestRegressor

Step 1: Import sample dataset from VIEWS to use for testing

In [None]:
api_url = "https://api.viewsforecasting.org"
loa = "cm"

fat_002_run = "predictors_fatalities002_2025_12"

And retrieve data for the full range.

In [None]:
fat_002_data = retrieve_data_from_api(fat_002_run, loa, verbose=True)

Retrieving page 81/81 at cm level...         

And save as pickle file

In [None]:
with open("../data/raw/fat_002_data.pkl", "wb") as f:
    pkl.dump(fat_002_data, f)

## Test with basic random forest algorithm

In [35]:
with open("../data/raw/fat_002_data.pkl", "rb") as f:
    views_data = pkl.load(f)

In [31]:
views_data.columns

Index(['country_id', 'month_id', 'name', 'gwcode', 'isoab', 'year', 'month',
       'wb_wdi_sp_dyn_le00_in', 'wb_wdi_sp_dyn_imrt_in', 'wb_wdi_sp_pop_totl',
       'wb_wdi_se_prm_nenr', 'ucdp_ged_sb_best_sum', 'ucdp_ged_ns_best_sum',
       'vdem_v2x_rule', 'wb_wdi_ny_gdp_pcap_kd', 'ucdp_ged_os_best_sum',
       'vdem_v2x_libdem', 'vdem_v2x_gender', 'vdem_v2x_civlib',
       'vdem_v2xcl_acjst', 'vdem_v2xeg_eqdr'],
      dtype='str')

In [191]:
# make list of features
feature_list = [
   'wb_wdi_sp_dyn_le00_in', 'wb_wdi_sp_dyn_imrt_in', 'wb_wdi_sp_pop_totl',
    'ucdp_ged_sb_best_sum', 'ucdp_ged_ns_best_sum',
    'vdem_v2x_rule', 'wb_wdi_ny_gdp_pcap_kd', 'ucdp_ged_os_best_sum',
    'vdem_v2x_libdem', 'vdem_v2x_gender', 'vdem_v2x_civlib',
    'vdem_v2xcl_acjst', 'vdem_v2xeg_eqdr'
] 
target = "ucdp_ged_sb_best_sum"

## Test with Class structure

In [231]:
from src import dynamic
from src import auxilaries
importlib.reload(dynamic)
importlib.reload(auxilaries)
from src.auxilaries import date_to_month_id, month_id_to_ym
from src.dynamic import DynamicModel, DynamicModelManager

In [232]:
full_model = DynamicModelManager(
    steps = [1],
    data = views_data,
    features = feature_list,
    target = target,
    train_window_size=120,
    test_window_size=100,
    slide_window_size = 12,
    full_split = (300, 528)
)

In [233]:
full_model.fit()

Fitting models: 100%|██████████| 9/9 [00:28<00:00,  3.22s/it]

Finished fitting all models in 28.95 seconds





In [234]:
full_model.predict(data = views_data, test_window_size=12, slide_window_size=5)

Predicting: 100%|██████████| 158/158 [00:01<00:00, 138.75it/s]


In [235]:
full_model.predictions[1].predictions

Unnamed: 0,target_month_id,country_id,prediction
57126,428,1,0.0
57127,428,2,0.0
57128,428,3,0.0
57129,428,4,0.0
57132,428,7,0.0
...,...,...,...
59603,440,237,6.0
59604,440,242,0.0
59605,440,243,0.0
59606,440,244,0.0


In [236]:
views_data.head()

Unnamed: 0,country_id,month_id,name,gwcode,isoab,year,month,wb_wdi_sp_dyn_le00_in,wb_wdi_sp_dyn_imrt_in,wb_wdi_sp_pop_totl,...,vdem_v2x_rule,wb_wdi_ny_gdp_pcap_kd,ucdp_ged_os_best_sum,vdem_v2x_libdem,vdem_v2x_gender,vdem_v2x_civlib,vdem_v2xcl_acjst,vdem_v2xeg_eqdr,target_month_id,target
0,1,121,Guyana,110,GUY,1990,1,62.254,52.6,749894.0,...,0.498,2377.218,0.0,0.219,0.796,0.741,0.789,0.586,122,0.0
1,2,121,Suriname,115,SUR,1990,1,62.088,37.4,412493.0,...,0.751,6474.9557,0.0,0.559,0.694,0.858,0.751,0.64,122,0.0
2,3,121,Trinidad and Tobago,52,TTO,1990,1,67.591,29.6,1252208.0,...,0.86,6058.2152,0.0,0.607,0.81,0.885,0.9,0.816,122,0.0
3,4,121,Venezuela,101,VEN,1990,1,70.964,24.3,19827010.0,...,0.54,3762.2751,12.0,0.595,0.806,0.871,0.634,0.649,122,0.0
4,5,121,Samoa,990,WSM,1990,1,67.808,22.8,169042.0,...,,2534.122,0.0,,,,,,122,0.0
