# Rainfall time-series dataset released by WFP 

The data regarding the rainfall are provided to the wfp: https://dataviz.vam.wfp.org/seasonal_explorer/rainfall_vegetation/visualizations#

In [1]:
from plotly_dataframe import plot, plot_hist
import pandas as pd
import ntpath
import glob

In [2]:
# Define the path where to save the results arising from this analysis.
path_to_save_data = "./time-series/"

# Monthly dekad level - YEM

In [3]:
# Read the data released by wfp regarding the rainfall in the adminstratas of the selected countries.
COUNTRY = "Yemen"

path = "./wfp_data/Rainfall/"
all_folders = glob.glob(path + "*")

dfs = []

for folder in all_folders:
    country = ntpath.basename(folder).split(".")[0]
    all_subfolders = glob.glob(path + country + "/*")
    for subfolder in all_subfolders:
        adminstrata = ntpath.basename(subfolder).split(".")[0]
        # All the files of the years.
        all_files = glob.glob(subfolder + "/*.csv")
        for filename in all_files:
            df = pd.read_csv(filename)
            df["AdminStrata"] = adminstrata
            df["Country"] = country
            dfs.append(df)

In [4]:
print("The data released by wfp:")
df = pd.concat(dfs, axis = 0, ignore_index = True)
df.head()

The data released by wfp:


Unnamed: 0,Year,Month,Dekad,Rainfall (mm),Average (mm),AdminStrata,Country
0,2014,1,1,0.891,0.419,Abyan,Yemen
1,2014,1,2,0.505,0.118,Abyan,Yemen
2,2014,1,3,0.887,0.477,Abyan,Yemen
3,2014,2,1,1.22,1.011,Abyan,Yemen
4,2014,2,2,1.898,1.168,Abyan,Yemen


### Brief items description

- *Year*: reference year of the data collection.
- *Month*: reference month of the data collection.
- *Dekad*: reference month dekad of the data collection: these correspond to the calendar dates 1-10, 11-20, and 21-end of each month.
- *Rainfall (mm)*: rainfall amount in the reference period.
- *Average (mm): long-term rainfall average for the same period starting from 1994 to 2013.
- *AdminStrata*: the adminstrata information of the reference country.

In [5]:
# Delete the duplicate rows of the dataframe (some date have duplicate information due to loading data from different files).
df.drop_duplicates(inplace = True)

In [6]:
# Convert the dekad to proper format.
# These correspond to the calendar dates 1-10, 11-20, and 21-end of each month.
def dekad_to_day(row):
    date = pd.to_datetime(str(row["Year"]) + "-" + str(row["Month"]) + "-01")
    end_month = date.days_in_month
    dekad = row.Dekad   
    if dekad == 1:
        return 10
    if dekad == 2:
        return 20
    if dekad == 3:
        return end_month
    
df["Day"] = df.apply(dekad_to_day, axis = 1)
df.drop(columns = "Dekad", inplace = True)
data = pd.to_datetime(df[["Year", "Month", "Day"]])
df.insert(1, "Datetime", data)
df.drop(["Year", "Month", "Day"], axis = 1, inplace = True)
df.sort_values("Datetime", ascending = True, inplace = True) 
df = df.groupby(["Country", "AdminStrata"]).apply(lambda group: group.set_index("Datetime").resample("D").mean()).reset_index()
df.reset_index(drop = True, inplace = True)
df.head()

Unnamed: 0,Country,AdminStrata,Datetime,Rainfall (mm),Average (mm)
0,Yemen,Abyan,2014-01-10,0.891,0.419
1,Yemen,Abyan,2014-01-11,,
2,Yemen,Abyan,2014-01-12,,
3,Yemen,Abyan,2014-01-13,,
4,Yemen,Abyan,2014-01-14,,


In [7]:
# Create a dataframe with multi index column in order to have a summary dataframe of the time-series.
df.drop(labels = ["Average (mm)"], axis = 1, inplace = True)
df = df.set_index(["Datetime", "Country", "AdminStrata"]).unstack(["Country", "AdminStrata"])
df.columns = df.columns.droplevel(0)
df.columns = pd.MultiIndex.from_tuples(list(map(lambda x: tuple([x[0], x[1], "Rainfall (mm)"]), df.columns)))
df.columns.rename("Country", level = 0, inplace = True)
df.columns.rename("AdminStrata", level = 1, inplace = True)
df.columns.rename("Indicator", level = 2, inplace = True)
freq = "D"
df.index.freq = freq
df.head()

Country,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen
AdminStrata,Abyan,Aden,Al Bayda,Al Dhale'e,Al Hudaydah,Al Jawf,Al Maharah,Al Mahwit,Amanat Al Asimah,Amran,...,Hadramaut,Hajjah,Ibb,Lahj,Marib,Raymah,Sa'ada,Sana'a,Shabwah,Taizz
Indicator,Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),...,Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm)
Datetime,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2014-01-10,0.891,0.083,0.98,0.38,0.667,1.488,4.292,0.692,0.0,0.805,...,3.267,0.968,0.631,0.681,1.757,0.0,0.794,0.57,1.684,0.771
2014-01-11,,,,,,,,,,,...,,,,,,,,,,
2014-01-12,,,,,,,,,,,...,,,,,,,,,,
2014-01-13,,,,,,,,,,,...,,,,,,,,,,
2014-01-14,,,,,,,,,,,...,,,,,,,,,,


In [8]:
# Now check if interolate the nan values (if exist) of the datetime dekadly.
df = df.loc[(df.index.day == 10) | (df.index.day == 20) | (df.index.is_month_end)]
# Keep dataframe from first to end valid index based on the measured rainfall.
first_idx = df.first_valid_index()
last_idx = df.last_valid_index()
df = df.loc[first_idx:last_idx]
# Check if the dataframe contains NaN values in correspondence of the dekades.
print("Check if the dataframe contains NaN values:")
df.isnull().sum()

Check if the dataframe contains NaN values:


Country  AdminStrata       Indicator    
Yemen    Abyan             Rainfall (mm)    0
         Aden              Rainfall (mm)    0
         Al Bayda          Rainfall (mm)    0
         Al Dhale'e        Rainfall (mm)    0
         Al Hudaydah       Rainfall (mm)    0
         Al Jawf           Rainfall (mm)    0
         Al Maharah        Rainfall (mm)    0
         Al Mahwit         Rainfall (mm)    0
         Amanat Al Asimah  Rainfall (mm)    0
         Amran             Rainfall (mm)    0
         Dhamar            Rainfall (mm)    0
         Hadramaut         Rainfall (mm)    0
         Hajjah            Rainfall (mm)    0
         Ibb               Rainfall (mm)    0
         Lahj              Rainfall (mm)    0
         Marib             Rainfall (mm)    0
         Raymah            Rainfall (mm)    0
         Sa'ada            Rainfall (mm)    0
         Sana'a            Rainfall (mm)    0
         Shabwah           Rainfall (mm)    0
         Taizz             Rainfall (mm

In [9]:
# Resample data to daily level (as before).
df = df.resample("D").mean()
df.head()

Country,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen
AdminStrata,Abyan,Aden,Al Bayda,Al Dhale'e,Al Hudaydah,Al Jawf,Al Maharah,Al Mahwit,Amanat Al Asimah,Amran,...,Hadramaut,Hajjah,Ibb,Lahj,Marib,Raymah,Sa'ada,Sana'a,Shabwah,Taizz
Indicator,Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),...,Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm)
Datetime,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2014-01-10,0.891,0.083,0.98,0.38,0.667,1.488,4.292,0.692,0.0,0.805,...,3.267,0.968,0.631,0.681,1.757,0.0,0.794,0.57,1.684,0.771
2014-01-11,,,,,,,,,,,...,,,,,,,,,,
2014-01-12,,,,,,,,,,,...,,,,,,,,,,
2014-01-13,,,,,,,,,,,...,,,,,,,,,,
2014-01-14,,,,,,,,,,,...,,,,,,,,,,


## Reproduce the wfp interface 

In [10]:
plot_hist(df, title = "Rainfall (mm)", yaxis = "Rainfall (mm)")

HBox(children=(Dropdown(description='Country:', options=('', 'Yemen'), value=''), Dropdown(description='Admins…

Output()

## Resampling monthly

In [11]:
# Group the time monthly taking the sum of the results.
df_month = df.resample("M").sum()
df_month.head()

Country,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen
AdminStrata,Abyan,Aden,Al Bayda,Al Dhale'e,Al Hudaydah,Al Jawf,Al Maharah,Al Mahwit,Amanat Al Asimah,Amran,...,Hadramaut,Hajjah,Ibb,Lahj,Marib,Raymah,Sa'ada,Sana'a,Shabwah,Taizz
Indicator,Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),...,Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm)
Datetime,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2014-01-31,2.283,0.125,2.522,1.286,2.334,4.501,12.832,2.192,0.142,2.497,...,9.577,2.975,1.469,1.902,5.342,0.905,2.625,2.117,4.719,2.491
2014-02-28,4.16,3.0,3.912,3.153,6.631,3.911,4.268,5.308,1.501,2.896,...,4.014,4.543,3.737,4.793,4.058,4.555,4.116,3.005,3.442,5.306
2014-03-31,5.519,1.792,4.635,3.496,4.064,15.054,5.364,5.59,3.143,4.598,...,10.761,7.39,4.106,4.095,4.291,3.794,13.599,2.017,7.854,6.059
2014-04-30,4.186,0.667,8.144,11.628,9.927,5.798,4.544,23.667,10.357,16.015,...,5.402,18.025,22.475,5.574,6.297,24.191,13.139,14.375,5.12,9.652
2014-05-31,6.031,1.25,6.023,17.343,12.492,2.37,2.714,30.743,18.286,23.247,...,1.669,17.336,37.525,12.43,2.207,39.556,17.854,16.244,1.77,26.785


In [12]:
plot(df_month, title = "Rainfall", yaxis = "Rainfall (mm)", style = "lines+markers")

interactive(children=(ToggleButtons(description='Country', options=('Yemen',), value='Yemen'), RadioButtons(de…

In [13]:
# Now save the time-series of each country keeping as indeces the own first and last index.
def save(group, name):
    country = group.name
    group = group[country]
    # Adjust time-series group.
    first_idx = group.first_valid_index()
    last_idx = group.last_valid_index()
    group = group.loc[first_idx:last_idx]
    # Save.
    group.to_csv(path_to_save_data + country + "/" + name + ".csv", index_label = False)

In [14]:
_ = df.groupby(level = 0, axis = 1).apply(lambda x: save(x, name = "wfp_rainfall-monthly"))

In [15]:
# I get the dataframe with time step of the day filling nan values with previous value dekad.
df_fit = df.fillna(method = "bfill")
freq = "D"
df_fit.index.freq = freq
# Furthermore, being the values of the dekaed the tot rainfall happened in that interval I divide for the days of the range of the dekad.
def dekad_day_norm(row):
    if row.name.day >= 1 or row.name.day <= 10:
        return row/10
    if row.name.day >= 11 or row.name.day <= 20:
        return row/10
    if row.name.day >= 21:
        return row/row.name.days_in_month - 20

df_fit = df_fit.apply(dekad_day_norm, axis = 1)
df_fit.head()

Country,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen
AdminStrata,Abyan,Aden,Al Bayda,Al Dhale'e,Al Hudaydah,Al Jawf,Al Maharah,Al Mahwit,Amanat Al Asimah,Amran,...,Hadramaut,Hajjah,Ibb,Lahj,Marib,Raymah,Sa'ada,Sana'a,Shabwah,Taizz
Indicator,Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),...,Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm),Rainfall (mm)
Datetime,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2014-01-10,0.0891,0.0083,0.098,0.038,0.0667,0.1488,0.4292,0.0692,0.0,0.0805,...,0.3267,0.0968,0.0631,0.0681,0.1757,0.0,0.0794,0.057,0.1684,0.0771
2014-01-11,0.0505,0.0,0.0542,0.0015,0.0717,0.1427,0.4454,0.0641,0.0071,0.0741,...,0.3102,0.0968,0.0296,0.0381,0.1784,0.0,0.0836,0.0744,0.1368,0.0637
2014-01-12,0.0505,0.0,0.0542,0.0015,0.0717,0.1427,0.4454,0.0641,0.0071,0.0741,...,0.3102,0.0968,0.0296,0.0381,0.1784,0.0,0.0836,0.0744,0.1368,0.0637
2014-01-13,0.0505,0.0,0.0542,0.0015,0.0717,0.1427,0.4454,0.0641,0.0071,0.0741,...,0.3102,0.0968,0.0296,0.0381,0.1784,0.0,0.0836,0.0744,0.1368,0.0637
2014-01-14,0.0505,0.0,0.0542,0.0015,0.0717,0.1427,0.4454,0.0641,0.0071,0.0741,...,0.3102,0.0968,0.0296,0.0381,0.1784,0.0,0.0836,0.0744,0.1368,0.0637


In [16]:
plot(df_fit, title = "Rainfall", yaxis = "Rainfall (mm)")

interactive(children=(ToggleButtons(description='Country', options=('Yemen',), value='Yemen'), RadioButtons(de…

In [17]:
_ = df_fit.groupby(level = 0, axis = 1).apply(lambda x: save(x, name = "wfp_rainfall-daily"))