# Normalized Difference Vegetation Index (NDVI) time-series dataset released by WFP

The data regarding the NDVI are provided to the wfp: https://dataviz.vam.wfp.org/seasonal_explorer/rainfall_vegetation/visualizations#

In [1]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')

In [1]:
from plotly_dataframe import plot
import pandas as pd
import ntpath
import glob

In [2]:
# Define the path where the results arising from this analysis will be saved.
path_to_save_data = "./time-series/"

# Month dekad granularity data - YEM, SYR, BFA

The NDVI data of three countries have been downloaded: *Yemen* (YEM), *Syria* (SYR) and *Burkina Faso* (BFA). These countries have been chosen according to the data of the FCS & rCSI indicators. The Nigeria data aren't been downloaded beacuse the NDVI data doesn't have the same administrative division of the FCS & rCSI indicators.

The files downloaded are that selected with "all" from the site.

In [3]:
# Read the data released by wfp regarding the NDVI in the adminstratas of the selected countries.
path = "./wfp_data/NDVI/"
all_folders = glob.glob(path + "*")

dfs = []

for folder in all_folders:
    country = ntpath.basename(folder).split(".")[0]
    all_subfolders = glob.glob(path + country + "/*")
    for subfolder in all_subfolders:
        adminstrata = ntpath.basename(subfolder).split(".")[0]
        # All the files of the years.
        all_files = glob.glob(subfolder + "/*.csv")
        for filename in all_files:
            df = pd.read_csv(filename)
            df["AdminStrata"] = adminstrata
            df["Country"] = country
            dfs.append(df)

In [4]:
print("The data released by wfp:")
df = pd.concat(dfs, axis = 0, ignore_index = True)
df.head()

The data released by wfp:


Unnamed: 0,Year,Month,Dekad,NDVI,Average,AdminStrata,Country
0,2014,1,1,0.105812,0.112938,Abyan,Yemen
1,2015,1,1,0.110225,0.112938,Abyan,Yemen
2,2015,1,1,0.113498,0.112938,Abyan,Yemen
3,2016,1,1,0.104313,0.112938,Abyan,Yemen
4,2016,1,2,0.114902,0.111869,Abyan,Yemen


### Brief items description

- *Year*: reference year of the data collection.
- *Month*: reference month of the data collection.
- *Dekad*: reference month dekad of the data collection: these correspond to the calendar dates 1-10, 11-20, and 21-end of each month.
- *NDVI*: NDVI index in the reference period.
- *Average*: long-term NDVI index for the same period starting from 2002 to 2013.
- *AdminStrata*: the adminstrata information of the reference country.
- *Country*: the country information.

In [5]:
# Delete the duplicate rows of the dataframe (some date have duplicate information due to loading data from different files).
df.drop_duplicates(inplace = True)

In [6]:
# Convert the dekad to proper pandas datetime format.
# These correspond to the calendar dates 1-10, 11-20, and 21-end of each month.
def dekad_to_day(row):
    date = pd.to_datetime(str(row["Year"]) + "-" + str(row["Month"]) + "-01")
    end_month = date.days_in_month
    dekad = row.Dekad   
    if dekad == 1:
        return 10
    if dekad == 2:
        return 20
    if dekad == 3:
        return end_month
    
df["Day"] = df.apply(dekad_to_day, axis = 1)
df.drop(columns = "Dekad", inplace = True)
data = pd.to_datetime(df[["Year", "Month", "Day"]])
df.insert(1, "Datetime", data)
df.drop(["Year", "Month", "Day"], axis = 1, inplace = True)
df.sort_values("Datetime", ascending = True, inplace = True) 
df = df.groupby(["Country", "AdminStrata"]).apply(lambda group: group.set_index("Datetime").resample("D").mean()).reset_index()
df.reset_index(drop = True, inplace = True)
df.head()

Unnamed: 0,Country,AdminStrata,Datetime,NDVI,Average
0,Yemen,Abyan,2014-01-10,0.105812,0.112938
1,Yemen,Abyan,2014-01-11,,
2,Yemen,Abyan,2014-01-12,,
3,Yemen,Abyan,2014-01-13,,
4,Yemen,Abyan,2014-01-14,,


In [7]:
# Create a dataframe with multi-column in order to have a summary dataframe of the time-series.
df.drop(labels = ["Average"], axis = 1, inplace = True)
df = df.set_index(["Datetime", "Country", "AdminStrata"]).unstack(["Country", "AdminStrata"])
df.columns = df.columns.droplevel(0)
df.columns = pd.MultiIndex.from_tuples(list(map(lambda x: tuple([x[0], x[1], "NDVI"]), df.columns)))
df.columns.rename("Country", level = 0, inplace = True)
df.columns.rename("AdminStrata", level = 1, inplace = True)
df.columns.rename("Indicator", level = 2, inplace = True)
freq = "D"
df.index.freq = freq

In [8]:
# Select data between a temporal range.
df = df.loc[(df.index >= "2014-01-10") & (df.index <= "2020-03-10")]
df.head()

Country,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen
AdminStrata,Abyan,Aden,Al Bayda,Al Dhale'e,Al Hudaydah,Al Jawf,Al Maharah,Al Mahwit,Amanat Al Asimah,Amran,...,Hadramaut,Hajjah,Ibb,Lahj,Marib,Raymah,Sa'ada,Sana'a,Shabwah,Taizz
Indicator,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,...,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI
Datetime,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2014-01-10,0.105812,0.074338,0.123559,0.156994,0.150623,0.101391,0.131938,0.213939,0.143621,0.147269,...,0.131578,0.160001,0.222735,0.110664,0.095933,0.235468,0.134444,0.138935,0.109319,0.166291
2014-01-11,,,,,,,,,,,...,,,,,,,,,,
2014-01-12,,,,,,,,,,,...,,,,,,,,,,
2014-01-13,,,,,,,,,,,...,,,,,,,,,,
2014-01-14,,,,,,,,,,,...,,,,,,,,,,


In [9]:
# Now check if it is necessary interolate the nan values of the datetime dekadly.
df = df.loc[(df.index.day == 10) | (df.index.day == 20) | (df.index.is_month_end)]
# Keep dataframe from first to end valid index based on the measured NDVI.
first_idx = df.first_valid_index()
last_idx = df.last_valid_index()
df = df.loc[first_idx:last_idx]
# Check if the dataframe contains NaN values in correspondence of the dekades.
print("Check if the dataframe contains NaN values:")
df.isnull().sum()

Check if the dataframe contains NaN values:


Country  AdminStrata       Indicator
Yemen    Abyan             NDVI         2
         Aden              NDVI         2
         Al Bayda          NDVI         2
         Al Dhale'e        NDVI         2
         Al Hudaydah       NDVI         2
         Al Jawf           NDVI         2
         Al Maharah        NDVI         2
         Al Mahwit         NDVI         2
         Amanat Al Asimah  NDVI         2
         Amran             NDVI         2
         Dhamar            NDVI         2
         Hadramaut         NDVI         2
         Hajjah            NDVI         2
         Ibb               NDVI         2
         Lahj              NDVI         2
         Marib             NDVI         2
         Raymah            NDVI         2
         Sa'ada            NDVI         2
         Sana'a            NDVI         2
         Shabwah           NDVI         2
         Taizz             NDVI         2
dtype: int64

In [10]:
# Interpolation of the nan values.
df = df.interpolate(method = "linear", limit = 4)

In [11]:
# Recheck of the nan values.
df.isnull().sum().sum()

0

In [12]:
# Resample data to daily level (as before).
df = df.resample("D").mean()
df.head()

Country,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen
AdminStrata,Abyan,Aden,Al Bayda,Al Dhale'e,Al Hudaydah,Al Jawf,Al Maharah,Al Mahwit,Amanat Al Asimah,Amran,...,Hadramaut,Hajjah,Ibb,Lahj,Marib,Raymah,Sa'ada,Sana'a,Shabwah,Taizz
Indicator,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,...,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI
Datetime,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2014-01-10,0.105812,0.074338,0.123559,0.156994,0.150623,0.101391,0.131938,0.213939,0.143621,0.147269,...,0.131578,0.160001,0.222735,0.110664,0.095933,0.235468,0.134444,0.138935,0.109319,0.166291
2014-01-11,,,,,,,,,,,...,,,,,,,,,,
2014-01-12,,,,,,,,,,,...,,,,,,,,,,
2014-01-13,,,,,,,,,,,...,,,,,,,,,,
2014-01-14,,,,,,,,,,,...,,,,,,,,,,


## Resampling month granularity

Create a time-series for the NDVI data. The first time-series are created with a month granularity taking the mean of the amount of NVDI in the corresponding month.

In [13]:
# Group the time monthly taking the sum of the results.
df_month = df.resample("M").mean()
df_month.head()

Country,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen
AdminStrata,Abyan,Aden,Al Bayda,Al Dhale'e,Al Hudaydah,Al Jawf,Al Maharah,Al Mahwit,Amanat Al Asimah,Amran,...,Hadramaut,Hajjah,Ibb,Lahj,Marib,Raymah,Sa'ada,Sana'a,Shabwah,Taizz
Indicator,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,...,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI
Datetime,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2014-01-31,0.111235,0.076468,0.130136,0.165309,0.155337,0.101451,0.131299,0.223841,0.145569,0.146765,...,0.131328,0.166359,0.232626,0.117924,0.098296,0.241495,0.134657,0.146218,0.110768,0.173474
2014-02-28,0.113021,0.077806,0.12874,0.157296,0.149561,0.102026,0.130553,0.208384,0.144126,0.142678,...,0.131284,0.160789,0.212333,0.118374,0.099015,0.219518,0.133242,0.142779,0.110337,0.162125
2014-03-31,0.112587,0.076726,0.124866,0.146307,0.140712,0.102731,0.129457,0.19503,0.14254,0.139526,...,0.1317,0.153578,0.189251,0.109906,0.098711,0.19899,0.132547,0.136461,0.109604,0.143677
2014-04-30,0.112277,0.075614,0.13562,0.153064,0.134344,0.102903,0.128353,0.194443,0.144786,0.138194,...,0.132534,0.143582,0.207117,0.107324,0.100248,0.235176,0.127998,0.139173,0.111051,0.143779
2014-05-31,0.111498,0.073976,0.146059,0.168463,0.133321,0.103011,0.126881,0.239148,0.150238,0.149236,...,0.132136,0.149823,0.26409,0.10865,0.101384,0.307477,0.133767,0.153996,0.111632,0.166577


In [14]:
plot(df_month, title = "NDVI index", yaxis = "NDVI", style = "lines+markers")

interactive(children=(ToggleButtons(description='Country', options=('Yemen',), value='Yemen'), RadioButtons(de…

In [15]:
# Now save the time-series of each country keeping as indeces the own first and last index.
def save(group, name):
    country = group.name
    group = group[country]
    # Adjust time-series group.
    first_idx = group.first_valid_index()
    last_idx = group.last_valid_index()
    group = group.loc[first_idx:last_idx]
    # Save.
    group.to_csv(path_to_save_data + country + "/" + name + ".csv", index_label = False)

In [16]:
# Save the dataframe.
_ = df_month.groupby(level = 0, axis = 1).apply(lambda x: save(x, name = "wfp_NDVI-monthly"))

## Day granularity

I get the dataframe with time step of the day filling nan values (because I have information only for the dekades of the month) with previous value dekad.

In [17]:
# I get the dataframe with time step of the day filling nan values with previous value dekad.
df_fit = df.fillna(method = "bfill")
freq = "D"
df_fit.index.freq = freq
df_fit.head()

Country,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen,Yemen
AdminStrata,Abyan,Aden,Al Bayda,Al Dhale'e,Al Hudaydah,Al Jawf,Al Maharah,Al Mahwit,Amanat Al Asimah,Amran,...,Hadramaut,Hajjah,Ibb,Lahj,Marib,Raymah,Sa'ada,Sana'a,Shabwah,Taizz
Indicator,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,...,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI,NDVI
Datetime,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2014-01-10,0.105812,0.074338,0.123559,0.156994,0.150623,0.101391,0.131938,0.213939,0.143621,0.147269,...,0.131578,0.160001,0.222735,0.110664,0.095933,0.235468,0.134444,0.138935,0.109319,0.166291
2014-01-11,0.114648,0.077283,0.136013,0.17392,0.160705,0.101385,0.131132,0.238673,0.147407,0.148808,...,0.131236,0.174276,0.24672,0.122122,0.099703,0.2558,0.135785,0.152872,0.11197,0.18269
2014-01-12,0.114648,0.077283,0.136013,0.17392,0.160705,0.101385,0.131132,0.238673,0.147407,0.148808,...,0.131236,0.174276,0.24672,0.122122,0.099703,0.2558,0.135785,0.152872,0.11197,0.18269
2014-01-13,0.114648,0.077283,0.136013,0.17392,0.160705,0.101385,0.131132,0.238673,0.147407,0.148808,...,0.131236,0.174276,0.24672,0.122122,0.099703,0.2558,0.135785,0.152872,0.11197,0.18269
2014-01-14,0.114648,0.077283,0.136013,0.17392,0.160705,0.101385,0.131132,0.238673,0.147407,0.148808,...,0.131236,0.174276,0.24672,0.122122,0.099703,0.2558,0.135785,0.152872,0.11197,0.18269


In [18]:
plot(df_fit, title = "NDVI index", yaxis = "NDVI")

interactive(children=(ToggleButtons(description='Country', options=('Yemen',), value='Yemen'), RadioButtons(de…

In [19]:
# Save the dataframe.
_ = df_fit.groupby(level = 0, axis = 1).apply(lambda x: save(x, name = "wfp_NDVI-daily"))