## Extraction of Data from NASADEM Dataset (GEE) ##

Following same extraction process in all datasets

In [2]:
# Import Google Earth Engine API and Initialize it. 
import ee
import pandas as pd

ee.Authenticate()
ee.Initialize(project="ey-data-and-ai-challenge")

In [3]:
# Read coordinates from water quality training dataset, drop given features and date since iSDA datasets are static, i.e. not date dependent.

wq_df = pd.read_csv('../data/water_quality_training_dataset.csv')
wq_df = wq_df.drop(columns=['Total Alkalinity', 'Electrical Conductance', 'Dissolved Reactive Phosphorus'])
wq_df['id'] = wq_df.index
wq_df.head()

Unnamed: 0,Latitude,Longitude,Sample Date,id
0,-28.760833,17.730278,02-01-2011,0
1,-26.861111,28.884722,03-01-2011,1
2,-26.45,28.085833,03-01-2011,2
3,-27.671111,27.236944,03-01-2011,3
4,-27.356667,27.286389,03-01-2011,4


In [4]:
# Convert Coordintaes to ee.Features

features = []

for index, row in wq_df.iterrows():
    feat = ee.Feature(
        ee.Geometry.Point([row['Longitude'], row['Latitude']]).buffer(100), #add a 100m buffer in case of inexact coordinates
        {'id': row['id']}
    )
    features.append(feat)

In [9]:
# Read NASADEM dataset as Earth Engine image

nasa_img = ee.Image("NASA/NASADEM_HGT/001").select('elevation') # select wanted band

In [10]:
fc = ee.FeatureCollection(features) # convert features into feature collection

In [11]:
# Run geospatial data query using reduceRegions()

reducer = ee.Reducer.mean().setOutputs(['elevation'])
nasa_collection = nasa_img.reduceRegions(collection=fc, reducer=reducer, scale = 30)

In [12]:
# Process data and export to Google Drive

task = ee.batch.Export.table.toDrive(
    collection=nasa_collection,
    description="nasadem_csv_export",
    fileNamePrefix= "nasadem_features_training",
    fileFormat='CSV'
)
task.start()

In [13]:
nasa_df = pd.read_csv("../data/nasadem_features_training.csv")

# Drop irrelevant columns
nasa_df.drop(columns=[".geo", "system:index"], inplace=True)

nasa_df = nasa_df.merge(wq_df, on='id', how='left')
nasa_df.drop(columns=['id'], inplace=True)
nasa_df

Unnamed: 0,elevation,Latitude,Longitude,Sample Date
0,167.155040,-28.760833,17.730278,02-01-2011
1,1521.251493,-26.861111,28.884722,03-01-2011
2,1471.379902,-26.450000,28.085833,03-01-2011
3,1342.659998,-27.671111,27.236944,03-01-2011
4,1355.983661,-27.356667,27.286389,03-01-2011
...,...,...,...,...
9314,957.777185,-27.527500,30.858056,23-12-2015
9315,1521.251493,-26.861111,28.884722,23-12-2015
9316,1281.373723,-26.984722,26.632278,23-12-2015
9317,1249.195105,-27.935000,26.126667,23-12-2015


In [16]:
nasa_df.to_csv("../data/nasadem_features_training.csv")