In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
plt.style.use("default")

In [2]:
pd.options.display.max_rows = 4

In [3]:
# Add the python path to the folder containing some custom packages.
import sys
sys.path.insert(0, "../packages/")
from TsIP.TsIP import TsIP

In [4]:
# Set default sizes for figures.
plt.style.use("default") # style matplotlib
plt.rc("axes", labelsize = 15) # fontsize of the x and y labels
plt.rc("axes", titlesize = 15) # fontsize of the axes title
plt.rc("xtick", labelsize = 15) # fontsize of the tick labels
plt.rc("ytick", labelsize = 15) # fontsize of the tick labels
plt.rc("legend", fontsize = 15) # legend fontsize

# Burkina Faso

I create the dataset of multivariate time-series.

In [5]:
COUNTRY = "Burkina Faso"
PATH_TO_SAVE = "./output_data/Burkina Faso/"
PATH_TO_DATA_FOLDER = "../Data Sources/"

## Data sources

For more details regarding each single data source see the folder *Data Sources*.

In [6]:
# Load the data of the fcs indicator released by wfp.
fcs = pd.read_csv(PATH_TO_DATA_FOLDER + f"1. Food Consumption Score (FCS)/output_timeseries/{COUNTRY}/wfp_fcs-interpolate.csv", header = [0, 1], index_col = 0)
fcs.index.name = "Datetime"
fcs.index = pd.to_datetime(fcs.index)
freq = "D"
fcs.index.freq = freq

# Load the the data of the rcsi indicator released by wfp.
rcsi = pd.read_csv(PATH_TO_DATA_FOLDER + f"2. Reduced Coping Strategy Index (rCSI)/output_timeseries/{COUNTRY}/wfp_rcsi-interpolate.csv", header = [0, 1], index_col = 0)
rcsi.index.name = "Datetime"
rcsi.index = pd.to_datetime(rcsi.index)
freq = "D"
rcsi.index.freq = freq

# Load the data of market prices indicator released by wfp.
price = pd.read_csv(PATH_TO_DATA_FOLDER + f"3. Market Prices/output_timeseries/{COUNTRY}/wfp_market_price-interpolate.csv", header = [0, 1], index_col = 0)
price.index.name = "Datetime"
price.index = pd.to_datetime(price.index)
freq = "D"
price.index.freq = freq

# Load the data of rainfall indicator released by wfp.
rainfall = pd.read_csv(PATH_TO_DATA_FOLDER + f"4. Rainfall & Vegetation/output_timeseries/{COUNTRY}/wfp_rainfall.csv", header = [0, 1], index_col = 0)
rainfall.index.name = "Datetime"
rainfall.index = pd.to_datetime(rainfall.index)
freq = "D"
rainfall.index.freq = freq

# Load the data of vegetation indicator released by wfp.
NDVI = pd.read_csv(PATH_TO_DATA_FOLDER + f"4. Rainfall & Vegetation/output_timeseries/{COUNTRY}/wfp_NDVI.csv", header = [0, 1], index_col = 0)
NDVI.index.name = "Datetime"
NDVI.index = pd.to_datetime(NDVI.index)
freq = "D"
NDVI.index.freq = freq

# Load the data of conflicts indicator released by ACLED.
conflict = pd.read_csv(PATH_TO_DATA_FOLDER + f"5. Conflicts/output_timeseries/{COUNTRY}/wfp_conflicts-SvyWindow.csv", header = [0, 1], index_col = 0)
conflict.index.name = "Datetime"
conflict.index = pd.to_datetime(conflict.index)
freq = "D"
conflict.index.freq = freq

# Load the data of administrative codes.
admin_code = pd.read_csv(PATH_TO_DATA_FOLDER + f"6. Adminstrata Code/output_timeseries/{COUNTRY}/wfp_adminstrata_code.csv", header = [0, 1], index_col = 0)
admin_code.index.name = "Datetime"
admin_code.index = pd.to_datetime(admin_code.index)
freq = "D"
admin_code.index.freq = freq

# Load the data of Ramadan.
ramadan = pd.read_csv(PATH_TO_DATA_FOLDER + f"7. Ramadan/output_timeseries/{COUNTRY}/wfp_ramadan_SvyWindow.csv", header = [0, 1], index_col = 0)
ramadan.index.name = "Datetime"
ramadan.index = pd.to_datetime(ramadan.index)
freq = "D"
ramadan.index.freq = freq

# Load the data of coordination.
coordinates = pd.read_csv(PATH_TO_DATA_FOLDER + f"8. Geospatial Data/output_timeseries/{COUNTRY}/wfp_geospatial.csv", header = [0, 1], index_col = 0)
coordinates.index.name = "Datetime"
coordinates.index = pd.to_datetime(coordinates.index)
freq = "D"
coordinates.index.freq = freq

# Load the data of population released by wfp.
population = pd.read_csv(PATH_TO_DATA_FOLDER + f"9. Population/output_timeseries/{COUNTRY}/wfp_population.csv", header = [0, 1], index_col = 0)
population.index.name = "Datetime"
population.index = pd.to_datetime(population.index)
freq = "D"
population.index.freq = freq

In [7]:
# Create the full dataset.
df = pd.concat([fcs, rcsi, rainfall, conflict, NDVI, admin_code, population, ramadan, price, coordinates], axis = 1, levels = 0).sort_index(axis = 1, level = [0, 1])

In [8]:
PROVINCES = df.columns.get_level_values(0).unique()
PROVINCES

Index(['Boucle-Du-Mouhoun', 'Cascades', 'Centre', 'Centre-Est', 'Centre-Nord',
       'Centre-Ouest', 'Centre-Sud', 'Est', 'Hauts-Bassins', 'Nord',
       'Plateau-Central', 'Sahel', 'Sud-Ouest'],
      dtype='object', name='AdminStrata')

In [9]:
PREDICTORS = df.columns.get_level_values(1).unique()
PREDICTORS

Index(['1 Month Anomaly Rainfalls (%)', '3 Months Anomaly Rainfalls (%)',
       'Area', 'Code', 'FCG', 'Fatalities', 'Lat', 'Lon', 'NDVI',
       'NDVI Anomaly', 'Population', 'Price cereals and tubers',
       'Rainfalls (mm)', 'Ramadan', 'Waterways', 'rCSI'],
      dtype='object', name='Indicator')

In [10]:
for PROVINCE in PROVINCES:
    print(PROVINCE)
    print(df[PROVINCE].columns)

Boucle-Du-Mouhoun
Index(['1 Month Anomaly Rainfalls (%)', '3 Months Anomaly Rainfalls (%)',
       'Area', 'Code', 'FCG', 'Fatalities', 'Lat', 'Lon', 'NDVI',
       'NDVI Anomaly', 'Population', 'Price cereals and tubers',
       'Rainfalls (mm)', 'Ramadan', 'Waterways', 'rCSI'],
      dtype='object', name='Indicator')
Cascades
Index(['1 Month Anomaly Rainfalls (%)', '3 Months Anomaly Rainfalls (%)',
       'Area', 'Code', 'FCG', 'Fatalities', 'Lat', 'Lon', 'NDVI',
       'NDVI Anomaly', 'Population', 'Price cereals and tubers',
       'Rainfalls (mm)', 'Ramadan', 'Waterways', 'rCSI'],
      dtype='object', name='Indicator')
Centre
Index(['1 Month Anomaly Rainfalls (%)', '3 Months Anomaly Rainfalls (%)',
       'Area', 'Code', 'FCG', 'Fatalities', 'Lat', 'Lon', 'NDVI',
       'NDVI Anomaly', 'Population', 'Price cereals and tubers',
       'Rainfalls (mm)', 'Ramadan', 'Waterways', 'rCSI'],
      dtype='object', name='Indicator')
Centre-Est
Index(['1 Month Anomaly Rainfalls (%)', '3 Mon

In [11]:
# Plot time-series.
TsIP(df).interactive_plot_df(title = "Time-series", matplotlib = False, style = "mix", comparison = False, first_last_valid_index_group = True)

interactive(children=(ToggleButtons(description='AdminStrata', options=('Boucle-Du-Mouhoun', 'Cascades', 'Cent…

In [12]:
# Save the datasets.
df.to_csv(PATH_TO_SAVE + f"{COUNTRY}.csv", index_label = False)