# Chapter 8 - Example: Land Data
### Changes in vegetation index through the years for a given area

In this chapter we don't use data from the cloud, but exemplify how to obtain timeseries data from the initernet and analyze. In hte future (hopefully soon), when data is in the cloud on a similar data format, accessing from the cloud would be similar as in chapter 6.

This script reads NDVI (vegetation index) files from a `thredds` server, compile the region and time selected and then analyze the change in vegetation index through time.

In [None]:
# Import libraries
import numpy as np
import pandas as pd
import xarray as xr
import os
import re
from datetime import date
import urllib as ur
import requests
import matplotlib.pyplot as plt 
import hvplot.pandas
import hvplot.xarray
xr.set_options(display_style="html")  #display dataset nicely

# this library helps to make your code execution less messy
import warnings
warnings.simplefilter('ignore') # filter some warning messages

In [None]:
# Set parameters
# Select a region 
lat1, lat2 = 16, 18 # two latitudes, resolution?
lon1, lon2 = -98,-96
# select a day of year. the script will obtain data for the region for this day each available year
mon = 7 # month
dy = 31 # day

***
## Obtaining the data

In [None]:
# find data for chosen day for each year 
ayrs = [*range(1989,date.today().year)] # data available since 1989. for now it is hard coded
iurl = 'https://www.ncei.noaa.gov/thredds/fileServer/' # server link
flist = list() # initialization of file list 
tdt = list() # initialize a list for time

# looks for the file for each year
for i in ayrs:
    r = requests.get('https://www.ncei.noaa.gov/thredds/catalog/cdr/ndvi/files/'+str(i)+'/catalog.html')   # request download the indicated file
    x = re.split("\'",r.text) # search for symbol ' and separate the data (which comes in a html format)
    for j in x: # for each year
        dtnm = '_'+str(i)+str(mon).zfill(2)+str(dy).zfill(2)+'_' # constuct the file name
        if (dtnm in j) & ("catalog.html?" in j): # and if not repeated or a information line
            flist.append(j) # add the file name to our list

In [None]:
# download each file and select region and time
for iy, i in enumerate(flist): 
    iurl2 = iurl + re.split('=',i)[1] # construct the url for each fille
    print(iy,i) # print the file name, just for information
    tmp = ur.request.urlretrieve(iurl2, 'tmp.nc') # this is a different request (no s at the end), download a file that has a specific (non-text) format, saves it in a file call tmp.nc
    nvi = xr.open_dataset('tmp.nc') # now we can use it as regular local files
    nvi.close()
    tdt.append(nvi.time.data[0]) # add the date to oourr liest
    tmp1 = nvi.NDVI[0,:,:].sel(latitude=slice(lat2,lat1),longitude=slice(lon1,lon2)) # select region from the file data
    if iy==0: # if the first year, create an array to sttore data
        nvi_dt = np.full((len(flist),tmp1.shape[0],tmp1.shape[1]), np.nan)
    nvi_dt[iy,:,:] = tmp1.data # store selected data
    os.remove('tmp.nc') # delete file, so we can keep downloading using this name.

In [None]:
# create a xarray Data array structure for easy analysis from the data above
ndvi = xr.DataArray(data=nvi_dt, dims=['time','latitude','longitude'], 
                    coords=dict(time=tdt,latitude=tmp1.latitude.values, longitude=tmp1.longitude.values),attrs=tmp1.attrs) 
ndvi # taking a peek

***
## Now, let's do some analysis

First we average all years to get a climatological field of the vegetation index.

In [None]:
mean_ndvi = ndvi.mean(axis=0) # we average over the first axis (0), which is time. 
mean_ndvi.plot(cmap='YlGn') # we change the color map to yellow to green for this particular data set
plt.show()

Now we calculate the spatial anomaly for a given year by taking the difference.

In [None]:
syr = 2018 # set year
(ndvi.sel(time=str(2018)+'-'+str(mon).zfill(2)+'-'+str(dy).zfill(2))-mean_ndvi).plot() # first we constuct the time step index
plt.show()

## For a time series analyysis, we wnat to average over the latitute and longitude. 

But let's add a condition. We select only locations when the vegetation index is equal or larger than 0.3 - this means we are looking at the change in vegetation for locations that at the begining have at least 30% vegetation coverage.

In [None]:

veg_mean = list()
mask0 = ndvi[0,:,:].where(ndvi[0,:,:]>=0.3) # create a mask for veg. index >= 30% in the first time step
veg_area = mask0.count() # count the number of grid points above when the mask is applied
for i in range(len(ayrs)): 
    tmp=ndvi[i,:,:]*mask0 # apply the mask year
    veg_mean.append(tmp.mean())

plt.bar(ayrs,veg_mean-np.nanmean(veg_mean))
plt.title('Vegetation Index Change')
plt.ylabel('NDVI')
plt.grid(True, alpha=0.3)
plt.show()

***
# Another simple example for when there is a single NetCDF file for all data. 
### In this case, Surface Temperature from the NASA GISTEMP dataset. Note thatt this file is compressed.

In [None]:
iurl = 'https://data.giss.nasa.gov/pub/gistemp/gistemp250_GHCNv4.nc.gz' # the location of this ffile
with open('gisteep.nc.gz', "wb") as f: # open a file to download our internet file to
    r = requests.get(iurl) # using request again!
    f.write(r.content) # write into our final file gistemp.nc.gz. Note that is still compressed.
f.close() # close the file so we can use it again.
os.system('gunzip gisteep.nc.gz') # easy to uncompress the file using the library os.

# Normal xarray analysis now
tas = xr.open_dataset('gistmep.nc')
tas.tempanomaly.sel(lat=17,lon=-97,time=slice('1980-01-01','2021-08-15')).plot() # plot the surface temperatture anomalies for a given location for the entire period
plt.grid()
tas