In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Add the python path to the folder containing some useful custom packages.
import sys
sys.path.insert(0, "../../packages/")
from TsIP.TsIP import TsIP
from tools import find_multiple_sets
from LagsCreator.LagsCreator import LagsCreator

## Dataset

In [3]:
COUNTRY = "Yemen"

In [None]:
PATH_TO_DATA_FOLDER = "../../Dataset time-series/data/" + COUNTRY + "/"

In [4]:
# Load the dataset of the training sets.
train = pd.read_csv(PATH_TO_DATA_FOLDER + "train_smooth.csv", header = [0, 1], index_col = 0)
train.index.name = "Datetime"
train.index = pd.to_datetime(train.index)
freq = "D"
train.index.freq = freq

In [5]:
# Load the dataset of the test sets.
test = pd.read_csv(PATH_TO_DATA_FOLDER + "test_target.csv", header = [0, 1], index_col = 0)
test.index.name = "Datetime"
test.index = pd.to_datetime(test.index)
freq = "D"
test.index.freq = freq

In [6]:
# Load the dataset of the whole time-series of the fcs indicator.
target = pd.read_csv(PATH_TO_DATA_FOLDER + "all_target.csv", header = [0, 1], index_col = 0)
target.index.name = "Datetime"
target.index = pd.to_datetime(target.index)
freq = "D"
target.index.freq = freq

In [7]:
TEST_SIZE = 30
FREQ = train.index.freq

In [8]:
TRAIN = train.copy()

In [9]:
PROVINCES = TRAIN.columns.get_level_values(0).unique()
PROVINCES

Index(['Abyan', 'Aden', 'Al Bayda', 'Al Dhale'e', 'Al Hudaydah', 'Al Jawf',
       'Al Maharah', 'Al Mahwit', 'Amanat Al Asimah', 'Amran', 'Dhamar',
       'Hajjah', 'Ibb', 'Lahj', 'Marib', 'Raymah', 'Sa'ada', 'Sana'a',
       'Shabwah', 'Taizz'],
      dtype='object', name='AdminStrata')

In [10]:
PREDICTORS = TRAIN.columns.get_level_values(1).unique()
PREDICTORS

Index(['1 Month Anomaly (%) Rainfall', '3 Months Anomaly (%) Rainfall',
       'Cereals and tubers', 'Exchange rate (USD/LCU)', 'FCS', 'Fatality',
       'Lat', 'Lon', 'NDVI Anomaly', 'Population', 'Rainfall (mm)', 'Ramadan',
       'rCSI'],
      dtype='object', name='Indicator')

In [11]:
# Get the training and test sets.
TRAIN_NORMALIZED_SETS = find_multiple_sets(train)
TEST_TARGET_SETS = find_multiple_sets(test)

## Training & Validation

In [12]:
lags_dict = dict()
# Define lags for each indicator.
lags_dict["1 Month Anomaly (%) Rainfall"] = 2
lags_dict["3 Months Anomaly (%) Rainfall"] = 1
lags_dict["Cereals and tubers"] = 2
lags_dict["Exchange rate (USD/LCU)"] = 5
lags_dict["FCS"] = 2
lags_dict["Fatality"] = 2
lags_dict["NDVI Anomaly"] = 2
lags_dict["Rainfall (mm)"] = 2
lags_dict["rCSI"] = 2
lags_dict["Lat"] = 0
lags_dict["Lon"] = 0
lags_dict["Population"] = 0
lags_dict["Ramadan"] = 1

In [13]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error

In [14]:
val_losses_h = dict()

In [16]:
for h in range(TEST_SIZE):
    print("Training and validation for prediction horizon: %d" % (h+1))
    X_train_list, y_train_list, X_val_list, y_val_list = list(), list(), list(), list()
    for train_normalized in TRAIN_NORMALIZED_SETS:
        # Create training and validation samples.  
        for PROVINCE in PROVINCES:
            creator = LagsCreator(train_normalized[[PROVINCE]], lags_dictionary = lags_dict, target = "FCS")
            X_train, y_train, X_val, y_val, _ = creator.to_supervised(n_out = TEST_SIZE, single_step = True, h = h+1, return_dataframe = True,
                                                                      feature_time = True, validation = True, return_single_level = True, 
                                                                      dtype = np.float32)
            X_train_list.append(X_train)
            y_train_list.append(y_train)
            X_val_list.append(X_val)
            y_val_list.append(y_val)  

    X_train = pd.concat(X_train_list).reset_index(drop = True)
    y_train = pd.concat(y_train_list).reset_index(drop = True)
    
    # Train the model.
    model = xgb.XGBRegressor(n_estimators = 100, objective = "reg:squarederror")   
    model.fit(X_train, y_train)  
    
    X_val = pd.concat(X_val_list).reset_index(drop = True)
    y_val = pd.concat(y_val_list).reset_index(drop = True)
    
    # Validation.
    y_hats = model.predict(X_val)
    # Compute validation error.
    val_loss = mean_squared_error(y_val.values.flatten(), y_hats)
    val_losses_h[h+1] = val_loss

Training and validation for prediction horizon: 1
Training and validation for prediction horizon: 2
Training and validation for prediction horizon: 3
Training and validation for prediction horizon: 4
Training and validation for prediction horizon: 5
Training and validation for prediction horizon: 6
Training and validation for prediction horizon: 7
Training and validation for prediction horizon: 8
Training and validation for prediction horizon: 9
Training and validation for prediction horizon: 10
Training and validation for prediction horizon: 11
Training and validation for prediction horizon: 12
Training and validation for prediction horizon: 13
Training and validation for prediction horizon: 14
Training and validation for prediction horizon: 15
Training and validation for prediction horizon: 16
Training and validation for prediction horizon: 17


KeyboardInterrupt: 

In [21]:
val_losses_h

{1: 0.28121138,
 2: 0.9905112,
 3: 2.0189388,
 4: 3.3603206,
 5: 4.7964625,
 6: 6.3696,
 7: 8.07987,
 8: 9.562505,
 9: 11.854316,
 10: 13.578206,
 11: 15.36817,
 12: 17.191833,
 13: 18.321814,
 14: 22.209806,
 15: 24.07338,
 16: 27.094795,
 17: 31.21045,
 18: 32.763477,
 19: 33.91556,
 20: 38.3616,
 21: 40.759476,
 22: 41.180992,
 23: 46.210106,
 24: 47.536396,
 25: 50.85919,
 26: 50.87438,
 27: 51.137665,
 28: 52.892708,
 29: 49.80466,
 30: 49.44041}

In [15]:
lags_dict

{'1 Month Anomaly (%) Rainfall': 2,
 '3 Months Anomaly (%) Rainfall': 1,
 'Cereals and tubers': 2,
 'Exchange rate (USD/LCU)': 5,
 'FCS': 2,
 'Fatality': 2,
 'NDVI Anomaly': 2,
 'Rainfall (mm)': 2,
 'rCSI': 2,
 'Lat': 0,
 'Lon': 0,
 'Population': 0,
 'Ramadan': 1}

In [49]:
creator = LagsCreator(TRAIN_NORMALIZED_SETS[0][["Abyan"]], lags_dictionary = lags_dict, target = "FCS")
X_train, y_train, X_val, y_val, _ = creator.to_supervised(n_out = TEST_SIZE, single_step = True, h = 30, return_dataframe = False,
                                                          feature_time = False, validation = True, return_single_level = True, 
                                                          dtype = np.float32)

In [50]:
train, val, test = creator.visualization()

In [54]:
df_styled = train[0]
html = df_styled.render()

In [60]:
html

'<style  type="text/css" >\n    #T_1a3c6954_c6c5_11ea_b0ab_8ca982f47233row0_col0 {\n            background-color:  RGBA(0,131,255,0.44);\n            background-color:  RGBA(0,131,255,0.44);\n        }    #T_1a3c6954_c6c5_11ea_b0ab_8ca982f47233row0_col1 {\n            background-color:  RGBA(0,131,255,0.44);\n            background-color:  RGBA(0,131,255,0.44);\n        }    #T_1a3c6954_c6c5_11ea_b0ab_8ca982f47233row0_col2 {\n            background-color:  RGBA(0,131,255,0.44);\n            background-color:  RGBA(0,131,255,0.44);\n        }    #T_1a3c6954_c6c5_11ea_b0ab_8ca982f47233row0_col3 {\n            background-color:  RGB(0,131,255);\n            background-color:  RGB(0,131,255);\n        }    #T_1a3c6954_c6c5_11ea_b0ab_8ca982f47233row0_col4 {\n            background-color:  RGBA(0,131,255,0.44);\n            background-color:  RGBA(0,131,255,0.44);\n        }    #T_1a3c6954_c6c5_11ea_b0ab_8ca982f47233row0_col5 {\n            background-color:  RGBA(0,131,255,0.44);\n         

In [59]:
import imgkit
from xvfbwrapper import Xvfb
vdisplay = Xvfb()
vdisplay.start()
imgkit.from_string(html, 'styled_df.png')

ModuleNotFoundError: No module named 'fcntl'

In [61]:
import pdfkit
pdfkit.from_string('my_testpdf ABC','testpdf.pdf')

OSError: No wkhtmltopdf executable found: "b''"
If this file exists please check that this process can read it. Otherwise please install wkhtmltopdf - https://github.com/JazzCore/python-pdfkit/wiki/Installing-wkhtmltopdf

In [47]:
val[0]

Indicator,1 Month Anomaly (%) Rainfall,3 Months Anomaly (%) Rainfall,Cereals and tubers,Exchange rate (USD/LCU),FCS,Fatality,Lat,Lon,NDVI Anomaly,Population,Rainfall (mm),Ramadan,rCSI
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2018-08-22 00:00:00,272.9966,329.8125,0.3282,0.3676,31.7605,11.0799,13.7049,46.1581,3051.979,615154.0,13.0043,0.0,38.6733
2018-08-23 00:00:00,272.5439,327.7998,0.3308,0.3752,32.9014,8.6143,13.7049,46.1581,3048.9094,615154.0,13.0461,0.0,36.0221
2018-08-24 00:00:00,271.8172,325.2867,0.3321,0.378,33.2565,8.6533,13.7049,46.1581,3045.8612,615154.0,13.1684,0.0,35.7513
2018-08-25 00:00:00,270.967,322.6224,0.3328,0.3784,33.3615,9.9082,13.7049,46.1581,3042.7768,615154.0,13.3472,0.0,36.8927
2018-08-26 00:00:00,270.0971,319.9946,0.3333,0.378,33.5423,11.5225,13.7049,46.1581,3039.6259,615154.0,13.5515,0.0,38.7311
2018-08-27 00:00:00,269.2744,317.4781,0.334,0.3784,33.968,12.9736,13.7049,46.1581,3036.3988,615154.0,13.7482,0.0,40.7602
2018-08-28 00:00:00,268.5378,315.0746,0.3352,0.3805,34.6959,13.9893,13.7049,46.1581,3033.1005,615154.0,13.9068,0.0,42.645
2018-08-29 00:00:00,267.9056,312.7451,0.337,0.3851,35.7084,14.476,13.7049,46.1581,3029.7456,615154.0,14.0017,0.0,44.1865
2018-08-30 00:00:00,267.3817,310.4345,0.3395,0.3925,36.943,14.4608,13.7049,46.1581,3026.3543,615154.0,14.015,0.0,45.2916
2018-08-31 00:00:00,266.9605,308.0896,0.3428,0.4028,38.3154,14.0432,13.7049,46.1581,3022.9489,615154.0,13.9365,0.0,45.9459


In [33]:
test[0]

Indicator,1 Month Anomaly (%) Rainfall,3 Months Anomaly (%) Rainfall,Cereals and tubers,Exchange rate (USD/LCU),FCS,Fatality,Lat,Lon,NDVI Anomaly,Population,Rainfall (mm),Ramadan,rCSI
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2018-08-22 00:00:00,272.9966,329.8125,0.3282,0.3676,31.7605,11.0799,13.7049,46.1581,3051.979,615154.0,13.0043,0.0,38.6733
2018-08-23 00:00:00,272.5439,327.7998,0.3308,0.3752,32.9014,8.6143,13.7049,46.1581,3048.9094,615154.0,13.0461,0.0,36.0221
2018-08-24 00:00:00,271.8172,325.2867,0.3321,0.378,33.2565,8.6533,13.7049,46.1581,3045.8612,615154.0,13.1684,0.0,35.7513
2018-08-25 00:00:00,270.967,322.6224,0.3328,0.3784,33.3615,9.9082,13.7049,46.1581,3042.7768,615154.0,13.3472,0.0,36.8927
2018-08-26 00:00:00,270.0971,319.9946,0.3333,0.378,33.5423,11.5225,13.7049,46.1581,3039.6259,615154.0,13.5515,0.0,38.7311
2018-08-27 00:00:00,269.2744,317.4781,0.334,0.3784,33.968,12.9736,13.7049,46.1581,3036.3988,615154.0,13.7482,0.0,40.7602
2018-08-28 00:00:00,268.5378,315.0746,0.3352,0.3805,34.6959,13.9893,13.7049,46.1581,3033.1005,615154.0,13.9068,0.0,42.645
2018-08-29 00:00:00,267.9056,312.7451,0.337,0.3851,35.7084,14.476,13.7049,46.1581,3029.7456,615154.0,14.0017,0.0,44.1865
2018-08-30 00:00:00,267.3817,310.4345,0.3395,0.3925,36.943,14.4608,13.7049,46.1581,3026.3543,615154.0,14.015,0.0,45.2916
2018-08-31 00:00:00,266.9605,308.0896,0.3428,0.4028,38.3154,14.0432,13.7049,46.1581,3022.9489,615154.0,13.9365,0.0,45.9459
