# Normalized Difference Vegetation Index (NDVI) time-series dataset released by WFP 

The data regarding the NDVI are provided to the wfp: https://dataviz.vam.wfp.org/seasonal_explorer/rainfall_vegetation/visualizations#

In [1]:
import plotly.graph_objects as go
from ipywidgets import interact, widgets, fixed
from plotly_dataframe import plot 
import pandas as pd
import ntpath
import glob

In [2]:
# Read the data released by wfp regarding the NDVI in the adminstratas of the Yemen country.
path = "./wfp_data/Yemen/NDVI/"
all_folders = glob.glob(path + "*")

dfs = []

for folder in all_folders:
    adminstrata = ntpath.basename(folder).split(".")[0]
    # All the files of the years.
    all_files = glob.glob(path + adminstrata + "/*.csv")
    for filename in all_files:
        df = pd.read_csv(filename)
        df["AdminStrata"] = adminstrata
        dfs.append(df)

In [3]:
print("The data released by wfp:")
df = pd.concat(dfs, axis = 0, ignore_index = True)
df.head()

The data released by wfp:


Unnamed: 0,Year,Month,Dekad,NDVI,Average,AdminStrata
0,2014,1,1,0.105812,0.112938,Abyan
1,2015,1,1,0.110225,0.112938,Abyan
2,2015,1,1,0.113498,0.112938,Abyan
3,2016,1,1,0.104313,0.112938,Abyan
4,2016,1,2,0.114902,0.111869,Abyan


In [4]:
# Delete the duplicate rows of the dataframe (some date have duplicate information due to loading data from different files).
df.drop_duplicates(inplace = True)

In [5]:
# Convert the date dekad to proper format.
# These correspond to the calendar dates 1-10, 11-20, and 21-end of each month.
def dekad_to_day(row):
    date = pd.to_datetime(str(row["Year"]) + "-" + str(row["Month"]) + "-01")
    end_month = date.days_in_month

    dekad = row.Dekad
    if dekad == 1:
        return 10
    if dekad == 2:
        return 20
    if dekad == 3:
        return end_month
    
df["Day"] = df.apply(dekad_to_day, axis = 1)
df.drop(columns = "Dekad", inplace = True)
data = pd.to_datetime(df[["Year", "Month", "Day"]])
df.insert(1, "Datetime", data)
df.drop(["Year", "Month", "Day"], axis = 1, inplace = True)
df.sort_values("Datetime", ascending = True, inplace = True) 
df = df.groupby(["AdminStrata"]).apply(lambda group: group.set_index("Datetime").resample("D").mean()).reset_index()
df.reset_index(drop = True, inplace = True)
df.head()

Unnamed: 0,AdminStrata,Datetime,NDVI,Average
0,Abyan,2014-01-10,0.105812,0.112938
1,Abyan,2014-01-11,,
2,Abyan,2014-01-12,,
3,Abyan,2014-01-13,,
4,Abyan,2014-01-14,,


In [6]:
# Create a dataframe with multi index column in order to have a summary dataframe of the time-series.
df.drop(labels = ["Average"], axis = 1, inplace = True)
df = df.set_index(["Datetime", "AdminStrata"]).unstack(["AdminStrata"])
df.columns = df.columns.droplevel(0)
df.columns = pd.MultiIndex.from_tuples(list(map(lambda x: tuple([x, "NDVI"]), df.columns)))
df.columns.rename("AdminStrata", level = 0, inplace = True)
df.columns.rename("Indicator", level = 1, inplace = True)
freq = "D"
df.index.freq = freq
df.head()

AdminStrata,Abyan,Aden,Al Bayda,Al Dhale'e,Al Hudaydah,Al Jawf,Al Maharah,Al Mahwit,Amanat Al Asimah,Amran,...,Hadramaut,Hajjah,Ibb,Lahj,Marib,Raymah,Sa'ada,Sana'a,Shabwah,Taizz
Indicator,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,...,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2014-01-10,0.105812,0.074338,0.123559,0.156994,0.150623,0.101391,0.131938,0.213939,0.143621,0.147269,...,0.131578,0.160001,0.222735,0.110664,0.095933,0.235468,0.134444,0.138935,0.109319,0.166291
2014-01-11,,,,,,,,,,,...,,,,,,,,,,
2014-01-12,,,,,,,,,,,...,,,,,,,,,,
2014-01-13,,,,,,,,,,,...,,,,,,,,,,
2014-01-14,,,,,,,,,,,...,,,,,,,,,,


In [7]:
# Now check if interolate the nan values (if exist) of the datetime dekadly.
df = df.loc[(df.index.day == 10) | (df.index.day == 20) | (df.index.is_month_end)]
# Keep dataframe from first to end valid index based on the measured rainfall.
first_idx = df.first_valid_index()
last_idx = df.last_valid_index()
df = df.loc[first_idx:last_idx]
# Check if the dataframe contains NaN values.
print("Check if the dataframe contains NaN values:")
df.isnull().sum()

Check if the dataframe contains NaN values:


AdminStrata       Indicator
Abyan             NDVI         2
Aden              NDVI         2
Al Bayda          NDVI         2
Al Dhale'e        NDVI         2
Al Hudaydah       NDVI         2
Al Jawf           NDVI         2
Al Maharah        NDVI         2
Al Mahwit         NDVI         2
Amanat Al Asimah  NDVI         2
Amran             NDVI         2
Dhamar            NDVI         2
Hadramaut         NDVI         2
Hajjah            NDVI         2
Ibb               NDVI         2
Lahj              NDVI         2
Marib             NDVI         2
Raymah            NDVI         2
Sa'ada            NDVI         2
Sana'a            NDVI         2
Shabwah           NDVI         2
Taizz             NDVI         2
dtype: int64

In [8]:
# Interpolation of the nan values.
df = df.interpolate(method = "linear")

In [9]:
# Resample data to daily level.
df = df.resample("D").mean()
df.head()

AdminStrata,Abyan,Aden,Al Bayda,Al Dhale'e,Al Hudaydah,Al Jawf,Al Maharah,Al Mahwit,Amanat Al Asimah,Amran,...,Hadramaut,Hajjah,Ibb,Lahj,Marib,Raymah,Sa'ada,Sana'a,Shabwah,Taizz
Indicator,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,...,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2014-01-10,0.105812,0.074338,0.123559,0.156994,0.150623,0.101391,0.131938,0.213939,0.143621,0.147269,...,0.131578,0.160001,0.222735,0.110664,0.095933,0.235468,0.134444,0.138935,0.109319,0.166291
2014-01-11,,,,,,,,,,,...,,,,,,,,,,
2014-01-12,,,,,,,,,,,...,,,,,,,,,,
2014-01-13,,,,,,,,,,,...,,,,,,,,,,
2014-01-14,,,,,,,,,,,...,,,,,,,,,,


In [10]:
# Group the time monthly taking the mean of the results.
df_month = df.resample("M").sum()
df_month.head()

AdminStrata,Abyan,Aden,Al Bayda,Al Dhale'e,Al Hudaydah,Al Jawf,Al Maharah,Al Mahwit,Amanat Al Asimah,Amran,...,Hadramaut,Hajjah,Ibb,Lahj,Marib,Raymah,Sa'ada,Sana'a,Shabwah,Taizz
Indicator,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,...,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2014-01-31,0.333705,0.229404,0.390407,0.495926,0.46601,0.304352,0.393896,0.671522,0.436707,0.440294,...,0.393984,0.499076,0.697879,0.353771,0.294888,0.724484,0.403971,0.438653,0.332303,0.520421
2014-02-28,0.339062,0.233417,0.386221,0.471887,0.448684,0.306079,0.391659,0.625152,0.432379,0.428034,...,0.393851,0.482368,0.636999,0.355123,0.297046,0.658554,0.399725,0.428336,0.33101,0.486374
2014-03-31,0.337761,0.230179,0.374598,0.43892,0.422136,0.308194,0.388371,0.58509,0.427621,0.418578,...,0.3951,0.460734,0.567754,0.329718,0.296132,0.59697,0.397641,0.409384,0.328811,0.431032
2014-04-30,0.33683,0.226842,0.406861,0.459193,0.403031,0.30871,0.38506,0.583328,0.434357,0.414582,...,0.397602,0.430746,0.621352,0.321972,0.300744,0.705527,0.383995,0.417518,0.333153,0.431336
2014-05-31,0.334494,0.221929,0.438178,0.50539,0.399964,0.309033,0.380642,0.717445,0.450714,0.447708,...,0.396407,0.44947,0.792269,0.325949,0.304151,0.922432,0.4013,0.461988,0.334897,0.499731


In [11]:
plot(df_month, title = "NDVI", yaxis = "NDVI", style = "lines+markers")

interactive(children=(RadioButtons(description='Select', options=('Time-series', 'Missing values'), value='Tim…

In [12]:
df_month.to_csv("./time-series/wfp_NDVI_month_Yemen.csv", index_label = False)

In [13]:
# I get the dataframe with time step of the day making an interpolation.
df_fit = df.resample("D").interpolate(method = "polynomial", order = 2)
freq = "D"
df_fit.index.freq = freq
df_fit.head()

AdminStrata,Abyan,Aden,Al Bayda,Al Dhale'e,Al Hudaydah,Al Jawf,Al Maharah,Al Mahwit,Amanat Al Asimah,Amran,...,Hadramaut,Hajjah,Ibb,Lahj,Marib,Raymah,Sa'ada,Sana'a,Shabwah,Taizz
Indicator,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,...,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2014-01-10,0.105812,0.074338,0.123559,0.156994,0.150623,0.101391,0.131938,0.213939,0.143621,0.147269,...,0.131578,0.160001,0.222735,0.110664,0.095933,0.235468,0.134444,0.138935,0.109319,0.166291
2014-01-11,0.107218,0.074755,0.125704,0.159991,0.152445,0.101382,0.131831,0.218671,0.144279,0.147739,...,0.13153,0.162639,0.227264,0.112445,0.096524,0.239684,0.13475,0.14134,0.109767,0.169331
2014-01-12,0.108507,0.075146,0.127649,0.162697,0.154086,0.101374,0.131731,0.222902,0.144874,0.148138,...,0.131486,0.165008,0.23132,0.114084,0.097068,0.243415,0.135018,0.14352,0.110174,0.17206
2014-01-13,0.109681,0.075509,0.129394,0.165114,0.155546,0.101369,0.131636,0.226631,0.145407,0.148467,...,0.131444,0.167107,0.234902,0.115583,0.097564,0.246661,0.135248,0.145476,0.110541,0.174477
2014-01-14,0.110738,0.075844,0.130939,0.167241,0.156825,0.101365,0.131547,0.229857,0.145878,0.148727,...,0.131405,0.168938,0.238011,0.11694,0.098013,0.249421,0.135439,0.147207,0.110867,0.176584


In [14]:
plot(df_fit, title = "Rainfall", yaxis = "Rainfall (mm)")

interactive(children=(RadioButtons(description='Select', options=('Time-series', 'Missing values'), value='Tim…

In [15]:
df_fit.to_csv("./time-series/wfp_NDVI_daily_Yemen.csv", index_label = False)