In [24]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from ste import STE
import matplotlib.pyplot as plt

In [25]:
# Set default setting for the figures.
plt.style.use("default") # style matplotlib
plt.rc("axes", labelsize = 12) # fontsize of the x and y labels
plt.rc("axes", titlesize = 15) # fontsize of the axes title
plt.rc("xtick", labelsize = 12) # fontsize of the tick labels
plt.rc("ytick", labelsize = 12) # fontsize of the tick labels
plt.rc("legend", fontsize = 12) # legend fontsize

In [26]:
# Add the python path to the folder containing some custom packages.
import sys
sys.path.insert(0, "../packages/")
from TsIP.TsIP import TsIP

# Time-series dataset

In [27]:
# Define the country name.
COUNTRY = "Yemen"

In [28]:
# Load the time-series.
df = pd.read_csv(f"../Dataset time-series/output_data/{COUNTRY}/{COUNTRY}-day.csv", header = [0, 1], index_col = 0)
# Not consider some indicators.
INDICATORS_TO_CONSIDER = ["FCG", "rCSI"]

df = df.loc[:, df.columns.get_level_values(1).isin(INDICATORS_TO_CONSIDER)]
df.index.name = "Datetime"
df.index = pd.to_datetime(df.index)
freq = "D"
df.index.freq = freq
df.head()

AdminStrata,Abyan,Abyan,Aden,Aden,Al Bayda,Al Bayda,Al Dhale'e,Al Dhale'e,Al Hudaydah,Al Hudaydah,...,Raymah,Raymah,Sa'ada,Sa'ada,Sana'a,Sana'a,Shabwah,Shabwah,Taizz,Taizz
Indicator,FCG,rCSI,FCG,rCSI,FCG,rCSI,FCG,rCSI,FCG,rCSI,...,FCG,rCSI,FCG,rCSI,FCG,rCSI,FCG,rCSI,FCG,rCSI
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2018-08-22,31.8098,37.4219,19.4126,35.6284,49.3186,39.1018,38.4156,43.9762,17.9022,52.0974,...,51.3547,56.3011,29.2503,46.4355,30.9019,52.6679,33.0488,34.3022,39.0431,50.919
2018-08-23,32.3747,37.3462,21.1497,30.4678,51.0345,38.4446,37.6435,44.5241,18.2878,53.3952,...,52.4478,57.3241,29.9303,45.2619,31.7387,50.2146,31.0345,36.7425,39.3717,50.4159
2018-08-24,33.7721,37.9994,25.5069,23.6549,49.2245,37.49,39.629,47.8733,19.1723,53.5702,...,51.7865,57.4176,31.3953,44.5205,32.759,50.0732,25.936,35.8577,36.6621,49.3913
2018-08-25,34.5337,36.0682,24.3389,21.326,48.0764,35.3081,40.4953,47.4804,21.6879,53.126,...,51.1138,55.3242,30.4553,46.3459,33.139,48.7917,28.1353,34.6584,37.2052,50.3024
2018-08-26,32.3279,38.2185,24.2947,22.3105,48.9112,38.2346,40.9919,48.67,21.3714,52.1866,...,48.9169,57.6665,25.8282,43.2946,33.0359,48.8396,28.348,35.9131,37.0257,50.293


In [29]:
len(df)

863

In [30]:
PROVINCES = df.columns.get_level_values("AdminStrata").unique()
PROVINCES

Index(['Abyan', 'Aden', 'Al Bayda', 'Al Dhale'e', 'Al Hudaydah', 'Al Jawf',
       'Al Maharah', 'Al Mahwit', 'Amanat Al Asimah', 'Amran', 'Dhamar',
       'Hajjah', 'Ibb', 'Lahj', 'Marib', 'Raymah', 'Sa'ada', 'Sana'a',
       'Shabwah', 'Taizz'],
      dtype='object', name='AdminStrata')

In [31]:
INDICATORS = df.columns.get_level_values("Indicator").unique()
INDICATORS

Index(['FCG', 'rCSI'], dtype='object', name='Indicator')

In [32]:
#TsIP(df).interactive_plot_df(title = "Time-series", matplotlib = False, style = "lines", comparison = False, normalization = True)

## Compute the STE

In [42]:
np.random.seed(123)

max_deltas = 15

def compute_STE(serie):
    adminstrata, indicator = serie.name
    x = serie.values.ravel().copy()
    y = df[adminstrata]["FCG"].values.ravel().copy()
    
    deltas = list()
    for delta in range(max_deltas):
        # Compute STE.
        original_STE = STE.calc_ste(x, y, m = 3, h = delta+1) 
        # Append results.
        deltas.append([original_STE])
        
    deltas = pd.DataFrame(deltas, columns = ["original_STE"]) 
    deltas.index = deltas.index + 1   
    deltas.index.name = "Delta"
    return deltas 

tqdm.pandas()
df_STE_deltas = df.groupby(axis = 1, level = ["AdminStrata", "Indicator"]).progress_apply(compute_STE)
df_STE_deltas.drop("FCG", axis = 1, level = "Indicator", inplace = True)
df_STE_deltas.head()

HBox(children=(FloatProgress(value=0.0, max=40.0), HTML(value='')))




AdminStrata,Abyan,Aden,Al Bayda,Al Dhale'e,Al Hudaydah,Al Jawf,Al Maharah,Al Mahwit,Amanat Al Asimah,Amran,Dhamar,Hajjah,Ibb,Lahj,Marib,Raymah,Sa'ada,Sana'a,Shabwah,Taizz
Indicator,rCSI,rCSI,rCSI,rCSI,rCSI,rCSI,rCSI,rCSI,rCSI,rCSI,rCSI,rCSI,rCSI,rCSI,rCSI,rCSI,rCSI,rCSI,rCSI,rCSI
Unnamed: 0_level_2,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE
Delta,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3
1,0.061994,0.060386,0.059543,0.056094,0.04518,0.048783,0.043259,0.062392,0.052395,0.070908,0.06198,0.043759,0.058232,0.05458,0.068737,0.06264,0.057759,0.041948,0.047088,0.058958
2,0.061994,0.060386,0.059543,0.056094,0.04518,0.048783,0.043259,0.062392,0.052395,0.070908,0.06198,0.043759,0.058232,0.05458,0.068737,0.06264,0.057759,0.041948,0.047088,0.058958
3,0.061994,0.060386,0.059543,0.056094,0.04518,0.048783,0.043259,0.062392,0.052395,0.070908,0.06198,0.043759,0.058232,0.05458,0.068737,0.06264,0.057759,0.041948,0.047088,0.058958
4,0.061994,0.060386,0.059543,0.056094,0.04518,0.048783,0.043259,0.062392,0.052395,0.070908,0.06198,0.043759,0.058232,0.05458,0.068737,0.06264,0.057759,0.041948,0.047088,0.058958
5,0.061994,0.060386,0.059543,0.056094,0.04518,0.048783,0.043259,0.062392,0.052395,0.070908,0.06198,0.043759,0.058232,0.05458,0.068737,0.06264,0.057759,0.041948,0.047088,0.058958


In [43]:
def compute_importance(x):
    idx = x.xs("original_STE", axis = 1, level = 2).idxmax()
    return x.xs("original_STE", axis = 1, level = 2).loc[idx].values.ravel()[0]
    
T_fcs = df_STE_deltas.groupby(axis = 1, level = ["AdminStrata", "Indicator"], group_keys = False).apply(compute_importance).unstack("Indicator")
T_fcs

Indicator,rCSI
AdminStrata,Unnamed: 1_level_1
Abyan,0.061994
Aden,0.060386
Al Bayda,0.059543
Al Dhale'e,0.056094
Al Hudaydah,0.04518
Al Jawf,0.048783
Al Maharah,0.043259
Al Mahwit,0.062392
Amanat Al Asimah,0.052395
Amran,0.070908


In [44]:
np.random.seed(123)

max_deltas = 15

def compute_STE(serie):
    adminstrata, indicator = serie.name
    x = serie.values.ravel().copy()
    y = df[adminstrata]["rCSI"].values.ravel().copy()
    
    deltas = list()
    for delta in range(max_deltas):
        # Compute STE.
        original_STE = STE.calc_ste(x, y, m = 3, h = delta+1) 
        # Append results.
        deltas.append([original_STE])
        
    deltas = pd.DataFrame(deltas, columns = ["original_STE"]) 
    deltas.index = deltas.index + 1   
    deltas.index.name = "Delta"
    return deltas 

tqdm.pandas()
df_STE_deltas = df.groupby(axis = 1, level = ["AdminStrata", "Indicator"]).progress_apply(compute_STE)
df_STE_deltas.drop("rCSI", axis = 1, level = "Indicator", inplace = True)
df_STE_deltas.head()

HBox(children=(FloatProgress(value=0.0, max=40.0), HTML(value='')))




AdminStrata,Abyan,Aden,Al Bayda,Al Dhale'e,Al Hudaydah,Al Jawf,Al Maharah,Al Mahwit,Amanat Al Asimah,Amran,Dhamar,Hajjah,Ibb,Lahj,Marib,Raymah,Sa'ada,Sana'a,Shabwah,Taizz
Indicator,FCG,FCG,FCG,FCG,FCG,FCG,FCG,FCG,FCG,FCG,FCG,FCG,FCG,FCG,FCG,FCG,FCG,FCG,FCG,FCG
Unnamed: 0_level_2,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE,original_STE
Delta,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3
1,0.041375,0.053202,0.049301,0.044071,0.060542,0.062665,0.069357,0.06257,0.060714,0.055247,0.06107,0.055524,0.046205,0.047611,0.039901,0.063287,0.039071,0.052892,0.053312,0.055199
2,0.041375,0.053202,0.049301,0.044071,0.060542,0.062665,0.069357,0.06257,0.060714,0.055247,0.06107,0.055524,0.046205,0.047611,0.039901,0.063287,0.039071,0.052892,0.053312,0.055199
3,0.041375,0.053202,0.049301,0.044071,0.060542,0.062665,0.069357,0.06257,0.060714,0.055247,0.06107,0.055524,0.046205,0.047611,0.039901,0.063287,0.039071,0.052892,0.053312,0.055199
4,0.041375,0.053202,0.049301,0.044071,0.060542,0.062665,0.069357,0.06257,0.060714,0.055247,0.06107,0.055524,0.046205,0.047611,0.039901,0.063287,0.039071,0.052892,0.053312,0.055199
5,0.041375,0.053202,0.049301,0.044071,0.060542,0.062665,0.069357,0.06257,0.060714,0.055247,0.06107,0.055524,0.046205,0.047611,0.039901,0.063287,0.039071,0.052892,0.053312,0.055199


In [45]:
T_rcsi = df_STE_deltas.groupby(axis = 1, level = ["AdminStrata", "Indicator"], group_keys = False).apply(compute_importance).unstack("Indicator")

In [46]:
T_rcsi

Indicator,FCG
AdminStrata,Unnamed: 1_level_1
Abyan,0.041375
Aden,0.053202
Al Bayda,0.049301
Al Dhale'e,0.044071
Al Hudaydah,0.060542
Al Jawf,0.062665
Al Maharah,0.069357
Al Mahwit,0.06257
Amanat Al Asimah,0.060714
Amran,0.055247


In [47]:
T_rcsi.values - T_fcs.values

array([[-0.02061957],
       [-0.0071838 ],
       [-0.01024151],
       [-0.01202316],
       [ 0.01536272],
       [ 0.01388248],
       [ 0.0260975 ],
       [ 0.00017744],
       [ 0.00831909],
       [-0.01566072],
       [-0.00090995],
       [ 0.01176479],
       [-0.01202698],
       [-0.00696826],
       [-0.02883548],
       [ 0.00064696],
       [-0.0186884 ],
       [ 0.01094342],
       [ 0.00622407],
       [-0.00375971]])