In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.options.display.max_rows = 4

In [3]:
# Add the python path to the folder containing some useful custom packages.
import sys
sys.path.insert(0, "../packages/")
from TsIP.TsIP import TsIP

# Nigeria

I create the dataset of multivariate time-series for the Nigeria country.

In [4]:
PATH_TO_SAVE = "./output_data/Nigeria/"

## Data sources

For more details regarding each single data source see the folder *Data Sources*.

In [5]:
PATH_TO_DATA_FOLDER = "../Data Sources/"

    - Food Consumption Score:

In [6]:
# Load the data of the fcs indicator released by wfp.
fcs = pd.read_csv(PATH_TO_DATA_FOLDER + "Food Consumption Score (FCS)/output_timeseries/Nigeria/wfp_fcs-interpolate.csv", header = [0, 1], index_col = 0)
fcs.index.name = "Datetime"
fcs.index = pd.to_datetime(fcs.index)
freq = "D"
fcs.index.freq = freq

In [7]:
fcs

AdminStrata,Adamawa,Borno,Yobe
Indicator,FCS,FCS,FCS
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
2019-01-18,23.586938,37.455808,30.722900
2019-01-19,22.755425,36.451190,30.273423
...,...,...,...
2020-08-31,54.063342,60.400693,41.045098
2020-09-01,53.804956,60.722706,40.501112


    - Reduced Coping Strategy Index

In [8]:
# Load the the data of the rcsi indicator released by wfp.
rcsi = pd.read_csv(PATH_TO_DATA_FOLDER + "Reduced Coping Strategy Index (rCSI)/output_timeseries/Nigeria/wfp_rcsi-interpolate.csv", header = [0, 1], index_col = 0)
rcsi.index.name = "Datetime"
rcsi.index = pd.to_datetime(rcsi.index)
freq = "D"
rcsi.index.freq = freq

In [9]:
rcsi

AdminStrata,Adamawa,Borno,Yobe
Indicator,rCSI,rCSI,rCSI
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
2019-01-18,43.008741,35.259528,45.458462
2019-01-19,43.619385,34.719999,45.499663
...,...,...,...
2020-08-31,6.001410,12.643851,13.244230
2020-09-01,6.046490,11.318429,12.705388


In [10]:
adminstratas = fcs.columns.get_level_values(0)

    - Rainfall:

In [11]:
# Load the data of rainfall indicator released by wfp.
rainfall = pd.read_csv(PATH_TO_DATA_FOLDER + "Rainfall & Vegetation/output_timeseries/Nigeria/wfp_rainfall-dekad.csv", header = [0, 1], index_col = 0)
rainfall.index.name = "Datetime"
rainfall.index = pd.to_datetime(rainfall.index)
freq = "D"
rainfall.index.freq = freq
rainfall = rainfall[adminstratas]

In [12]:
rainfall

AdminStrata,Adamawa,Adamawa,Adamawa,Borno,Borno,Borno,Yobe,Yobe,Yobe
Indicator,1 Month Anomaly (%) Rainfall,3 Months Anomaly (%) Rainfall,Rainfall (mm),1 Month Anomaly (%) Rainfall,3 Months Anomaly (%) Rainfall,Rainfall (mm),1 Month Anomaly (%) Rainfall,3 Months Anomaly (%) Rainfall,Rainfall (mm)
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
2018-01-10,100.000000,36.007619,0.000,100.000000,70.595816,0.00,100.000000,86.517022,0.000
2018-01-11,,,,,,,,,
...,...,...,...,...,...,...,...,...,...
2020-08-30,,,,,,,,,
2020-08-31,66.152901,89.272429,74.566,109.266447,109.116041,91.38,121.790069,124.634196,81.125


    - NDVI:

In [13]:
# Load the data of vegetation indicator released by wfp.
NDVI = pd.read_csv(PATH_TO_DATA_FOLDER + "Rainfall & Vegetation/output_timeseries/Nigeria/wfp_NDVI-dekad.csv", header = [0, 1], index_col = 0)
NDVI.index.name = "Datetime"
NDVI.index = pd.to_datetime(NDVI.index)
freq = "D"
NDVI.index.freq = freq
NDVI = NDVI[adminstratas]

In [14]:
NDVI

AdminStrata,Adamawa,Adamawa,Borno,Borno,Yobe,Yobe
Indicator,NDVI,NDVI Anomaly,NDVI,NDVI Anomaly,NDVI,NDVI Anomaly
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2018-01-10,0.306793,95.776235,0.246087,99.000805,0.215315,95.845607
2018-01-11,,,,,,
...,...,...,...,...,...,...
2020-08-30,,,,,,
2020-08-31,0.634967,94.035526,0.674152,112.066308,0.498843,95.973841


    - Conflict:

In [15]:
# Load the data of conflicts indicator released by ACLED.
conflict = pd.read_csv(PATH_TO_DATA_FOLDER + "Conflicts/output_timeseries/Nigeria/wfp_conflicts_sum_event_SvyWindow.csv", header = [0, 1], index_col = 0)
conflict.index.name = "Datetime"
conflict.index = pd.to_datetime(conflict.index)
freq = "D"
conflict.index.freq = freq
conflict = conflict[adminstratas]

In [16]:
conflict

AdminStrata,Adamawa,Borno,Yobe
Indicator,Fatality,Fatality,Fatality
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
2018-01-31,65.0,298.0,0.0
2018-02-01,72.0,455.0,0.0
...,...,...,...
2020-09-04,0.0,300.0,9.0
2020-09-05,0.0,290.0,9.0


    - Market price:

In [17]:
# Load the data of market prices indicator released by wfp.
price = pd.read_csv(PATH_TO_DATA_FOLDER + "Market Prices/output_timeseries/Nigeria/wfp_relevant_market_price-interpolate-common-food_group-month.csv", header = [0, 1], index_col = 0)
price.index.name = "Datetime"
price.index = pd.to_datetime(price.index)
freq = "D"
price.index.freq = freq
price = price[adminstratas]

In [18]:
price

AdminStrata,Adamawa,Adamawa,Borno,Borno,Yobe,Yobe
Food Group,Cereals and tubers,Fuel,Cereals and tubers,Fuel,Cereals and tubers,Fuel
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2018-01-31,,,0.482900,0.821832,0.539280,0.983226
2018-02-01,,,,,,
...,...,...,...,...,...,...
2020-08-30,,,,,,
2020-08-31,0.790123,0.616324,0.444897,0.634757,0.390409,0.697316


    - Population:

In [19]:
# Load the data of population released by wfp.
population = pd.read_csv(PATH_TO_DATA_FOLDER + "Population/output_timeseries/Nigeria/wfp_population.csv", header = [0, 1], index_col = 0)
population.index.name = "Datetime"
population.index = pd.to_datetime(population.index)
freq = "D"
population.index.freq = freq
population = population[adminstratas]

In [20]:
population

AdminStrata,Adamawa,Borno,Yobe
Indicator,Population,Population,Population
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
2018-01-01,4946724,5884116,4340967
2018-01-02,4946724,5884116,4340967
...,...,...,...
2020-12-31,4946724,5884116,4340967
2021-01-01,4946724,5884116,4340967


    - Coordinates:

In [21]:
# Load the data of coordination.
coordinates = pd.read_csv(PATH_TO_DATA_FOLDER + "Coordinates/output_timeseries/Nigeria/wfp_coordinates.csv", header = [0, 1], index_col = 0)
coordinates.index.name = "Datetime"
coordinates.index = pd.to_datetime(coordinates.index)
freq = "D"
coordinates.index.freq = freq
coordinates = coordinates[adminstratas]

In [22]:
coordinates

AdminStrata,Adamawa,Adamawa,Borno,Borno,Yobe,Yobe
Indicator,Lat,Lon,Lat,Lon,Lat,Lon
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2018-01-01,9.323227,12.400241,11.88898,13.153347,12.29868,11.437066
2018-01-02,9.323227,12.400241,11.88898,13.153347,12.29868,11.437066
...,...,...,...,...,...,...
2020-12-31,9.323227,12.400241,11.88898,13.153347,12.29868,11.437066
2021-01-01,9.323227,12.400241,11.88898,13.153347,12.29868,11.437066


    - Ramadan:

In [23]:
# Load the data of ramadan.
ramadan = pd.read_csv(PATH_TO_DATA_FOLDER + "Ramadan/output_timeseries/Nigeria/wfp_ramadan_SvyWindow.csv", header = [0, 1], index_col = 0)
ramadan.index.name = "Datetime"
ramadan.index = pd.to_datetime(ramadan.index)
freq = "D"
ramadan.index.freq = freq
ramadan = ramadan[adminstratas]

In [24]:
ramadan

AdminStrata,Adamawa,Borno,Yobe
Indicator,Ramadan,Ramadan,Ramadan
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
2018-01-31,0.0,0.0,0.0
2018-02-01,0.0,0.0,0.0
...,...,...,...
2020-12-31,0.0,0.0,0.0
2021-01-01,0.0,0.0,0.0


    - Adminstrata Codes:

In [25]:
# Load the data of ramadan.
admin_code = pd.read_csv(PATH_TO_DATA_FOLDER + "Adminstrata Code/output_timeseries/Nigeria/wfp_adminstrata_code.csv", header = [0, 1], index_col = 0)
admin_code.index.name = "Datetime"
admin_code.index = pd.to_datetime(admin_code.index)
freq = "D"
admin_code.index.freq = freq

In [26]:
admin_code

AdminStrata,Abia,Adamawa,Akwa Ibom,Anambra,Bauchi,Bayelsa,Benue,Borno,Cross River,Delta,...,Ogun,Ondo,Osun,Oyo,Plateau,Rivers,Sokoto,Taraba,Yobe,Zamfara
Indicator,Code,Code,Code,Code,Code,Code,Code,Code,Code,Code,...,Code,Code,Code,Code,Code,Code,Code,Code,Code,Code
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2018-01-01,13,14,15,16,17,18,19,20,21,22,...,40,41,42,43,44,45,46,47,48,49
2018-01-02,13,14,15,16,17,18,19,20,21,22,...,40,41,42,43,44,45,46,47,48,49
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-31,13,14,15,16,17,18,19,20,21,22,...,40,41,42,43,44,45,46,47,48,49
2021-01-01,13,14,15,16,17,18,19,20,21,22,...,40,41,42,43,44,45,46,47,48,49


In [27]:
# Create the full dataset.
df = pd.concat([fcs, rcsi, rainfall, conflict, NDVI, price, population, coordinates, ramadan, admin_code], axis = 1, levels = 0).sort_index(axis = 1, level = [0, 1])

In [28]:
df = df[["Adamawa", "Borno", "Yobe"]]

In [29]:
PROVINCES = df.columns.get_level_values(0).unique()
PROVINCES

Index(['Adamawa', 'Borno', 'Yobe'], dtype='object', name='AdminStrata')

In [30]:
PREDICTORS = df.columns.get_level_values(1).unique()
PREDICTORS

Index(['1 Month Anomaly (%) Rainfall', '3 Months Anomaly (%) Rainfall',
       'Cereals and tubers', 'Code', 'FCS', 'Fatality', 'Fuel', 'Lat', 'Lon',
       'NDVI', 'NDVI Anomaly', 'Population', 'Rainfall (mm)', 'Ramadan',
       'rCSI'],
      dtype='object', name='Indicator')

In [31]:
for PROVINCE in PROVINCES:
    print(PROVINCE)
    print(df[PROVINCE].columns)

Adamawa
Index(['1 Month Anomaly (%) Rainfall', '3 Months Anomaly (%) Rainfall',
       'Cereals and tubers', 'Code', 'FCS', 'Fatality', 'Fuel', 'Lat', 'Lon',
       'NDVI', 'NDVI Anomaly', 'Population', 'Rainfall (mm)', 'Ramadan',
       'rCSI'],
      dtype='object', name='Indicator')
Borno
Index(['1 Month Anomaly (%) Rainfall', '3 Months Anomaly (%) Rainfall',
       'Cereals and tubers', 'Code', 'FCS', 'Fatality', 'Fuel', 'Lat', 'Lon',
       'NDVI', 'NDVI Anomaly', 'Population', 'Rainfall (mm)', 'Ramadan',
       'rCSI'],
      dtype='object', name='Indicator')
Yobe
Index(['1 Month Anomaly (%) Rainfall', '3 Months Anomaly (%) Rainfall',
       'Cereals and tubers', 'Code', 'FCS', 'Fatality', 'Fuel', 'Lat', 'Lon',
       'NDVI', 'NDVI Anomaly', 'Population', 'Rainfall (mm)', 'Ramadan',
       'rCSI'],
      dtype='object', name='Indicator')


In [32]:
TsIP(df).interactive_plot_df(title = "Time-series", matplotlib = False, style = "mix", comparison = False, first_last_valid_index_group = True)

interactive(children=(ToggleButtons(description='AdminStrata', options=('Adamawa', 'Borno', 'Yobe'), value='Ad…

In [33]:
# Save the datasets.
df.to_csv(PATH_TO_SAVE + "Nigeria.csv", index_label = False)