<h1>Questions for DS4G-EIE AMA</h1>
First, some helper code.

In [None]:
import numpy as np # linear algebra
import folium

# Connect to Earth Engine
import ee
from kaggle_secrets import UserSecretsClient
from google.oauth2.credentials import Credentials

# Trigger the authentication flow.
#ee.Authenticate()

# Retrieve your refresh token.
#!cat ~/.config/earthengine/credentials

user_secret = "AJR_EIE_test" # Your user secret, defined in the add-on menu of the notebook editor
refresh_token = UserSecretsClient().get_secret(user_secret)
credentials = Credentials(
        None,
        refresh_token=refresh_token,
        token_uri=ee.oauth.TOKEN_URI,
        client_id=ee.oauth.CLIENT_ID,
        client_secret=ee.oauth.CLIENT_SECRET,
        scopes=ee.oauth.SCOPES)

# Initialize GEE
ee.Initialize(credentials=credentials)

def add_ee_layer(self, ee_image_object, vis_params, name):
    map_id_dict = ee.Image(ee_image_object).getMapId(vis_params)
    folium.raster_layers.TileLayer(
        tiles = map_id_dict['tile_fetcher'].url_format,
        attr = "Map Data Â© Google Earth Engine",
        name = name,
        overlay = True,
        control = True
    ).add_to(self)
    
# modified from https://www.kaggle.com/paultimothymooney/how-to-get-started-with-the-earth-engine-data
# zoom_country=True when showing Puerto Rico
# takes default lat and long (unless they are overwritten) and zoom=8
def plot_ee_data_on_map(add_ee_layer, begin_date, end_date,
                        min_value, max_value, opacity=1.0, lat=18.233, long=-66.279, 
                        zoom_country=True, res=0.01):
    
    product = "NO2"
    dataset = "COPERNICUS/S5P/OFFL/L3_NO2"
    column = 'tropospheric_NO2_column_number_density'
    
    if zoom_country: # zoom at the country level, default lat and long
        zoom_start = 8
        lat1 = lat-0.33; long1 = long-1.06
        lat2 = lat+0.33; long2 = long+1.06
    else: # zoom at a Power Plant level
        zoom_start = 13
        lat1 = lat-res/2; long1 = long-res/2
        lat2 = lat+res/2; long2 = long+res/2
    rectangle = ee.Geometry.Rectangle([long1, lat1, long2, lat2]) # (x, y) math style   
        
    Map = folium.Map(location=[lat, long], zoom_start=zoom_start) # (y, x) geo style
    folium.Map.add_ee_layer = add_ee_layer

    sat_image = (ee.ImageCollection(dataset)
           .select(column)
           .filterDate(begin_date, end_date)
           .mean()
          )
    
    vis_params = {
      'min': min_value,
      'max': max_value,
      'opacity': opacity,
      'palette': ['green', 'blue', 'yellow', 'red']}
        
    Map.add_ee_layer(sat_image.clip(rectangle), vis_params, product)
    Map.add_child(folium.LayerControl())
    display(Map)
    return sat_image    

<h2>1.- Resolution</h2>
<h3>1.1 EE Resolution</h3>
NO2 product has a resolution of 0.01 arc degrees (in other places it is said to be 7 x 7 km, or 5 x 5 for images after oct 2019 which is not our case). Let's see how a 0.01 arc degrees "pixel" looks like:

In [None]:
# A "pixel" around San Juan Power Plant
res = 0.01 # pixel resolution in arc degrees
long = -66.1045; lat = 18.427 # San Juan

begin_date = '2019-05-03'; end_date = '2019-05-04'
min_value = 0.00001; max_value = 0.000025

sat_image = plot_ee_data_on_map(add_ee_layer, begin_date, end_date, min_value, max_value, 
                        zoom_country=False, long=long, lat=lat, res=res)

Let's check its size with haversine:

In [None]:
res = 0.01
lat1 = lat-res/2; long1 = long-res/2
lat2 = lat+res/2; long2 = long+res/2

from haversine import haversine # distance in km (default) between points in UTM coordinates
p1 = (lat1, long1) # (y, x) geo style
p2 = (lat1, long2)
p3 = (lat2, long1)
p4 = (lat2, long2)
print("Horizontal pixel side", haversine(p1, p2), "km")
print("Vertical pixel side", haversine(p1, p3), "km")

<h3>*Question 1.1: Shouldn't the Horizontal and Vertical pixel sides be larger, like 7 x 7 km?*</h3>
<h2>1.2 TIFF images resolution</h2>
(from https://www.kaggle.com/paultimothymooney/explore-image-metadata-s5p-gfs-gldas)

In [None]:
import rasterio as rio
import os

s5p_file = '/kaggle/input/ds4g-environmental-insights-explorer/eie_data/s5p_no2/s5p_no2_20190501T161114_20190507T174400.tif'
def preview_meta_data(file_name):
    with rio.open(file_name) as img_filename:
        print('Metadata for: ',file_name)
        print('Bounding Box:',img_filename.bounds)
        print('Resolution:',img_filename.res)
        print('Tags:',img_filename.tags())
        print('More Tags:',img_filename.tags(ns='IMAGE_STRUCTURE'))
        print('Number of Channels =',img_filename.count,'\n')

preview_meta_data(s5p_file)

<h3>*Question 1.2: Metadata in the s5p TIFF image says its resolution is 0.004 x 0.004??, 
while in EE it was 0.01*</h3>
<h2>2.- From EE to numpy</h2>
<h3>Now let's check the shape of the pixel array (should be 1x1)</h3>
(This is a workaround, I hope there are better ways)

Rather than taking the mean, as before, I first want to know how many images are there in a given time period and have contributed to the mean.

In [None]:
rectangle = ee.Geometry.Rectangle([long1, lat1, long2, lat2]) # the pixel

collection = (ee.ImageCollection('COPERNICUS/S5P/OFFL/L3_NO2')
  .filterDate(begin_date, end_date))

count = collection.size()
print('Count: ', str(count.getInfo())+'\n')

It looks there are 14 images in just one day period. Let's get the first image.

In [None]:
image = collection.first()
band_arrs = image.sampleRectangle(rectangle);
# Get individual band arrays.
band_arr = band_arrs.get('tropospheric_NO2_column_number_density')
np_arr = np.array(band_arr.getInfo()) 

<h3>Big crash!</h3>
I took a look at the metadata in the EE code editor but didn't see anything wrong in the 14 Images (features) in ImageCollection.

Then I developed a workaround to check which images were REAL images: looping through the orbits until I find an image which does not throw an exception when converting to numpy.

In [None]:
orbitStats = collection.aggregate_stats("ORBIT")
minOrbit = orbitStats.getInfo()['values']['min']
maxOrbit = orbitStats.getInfo()['values']['max']
arrayList = []
for orbit in range(minOrbit, maxOrbit+1):
    index = orbit - minOrbit
    filtered = collection.filterMetadata('ORBIT', 'equals', orbit);
    image = filtered.first()
    try:
        date = image.date()
        # only arrives here in case of no error: incomplete image throws exception
        band_arrs = image.sampleRectangle(rectangle);
        # Get individual band arrays.
        band_arr = band_arrs.get('tropospheric_NO2_column_number_density')

        # Transfer the arrays from server to client and cast as np array.
        try:
            np_arr = np.array(band_arr.getInfo()) 
            # only arrives here in case of no error
            arrayList.append(np_arr)
            print("Catch!", date.format().getInfo())
        except ee.EEException:
            print("Bad luck", date.format().getInfo())
    except ee.EEException:
            # incomplete image
            print("Very bad luck, incomplete image", orbit)
        
print("Fin")

<h3>*Question 2.1: Is there another way to catch the non-empty images in an ImageCollection than this orbit workaround?*</h3>
Among the 14 "images", just one turned to have data. Let's check the size is 1x1

In [None]:
print(arrayList[0].shape, arrayList[0])

<h3>*Question 2.2: Why the size of the array extracted from the pixel Image has a (2, 2) shape rather than (1, 1) if it's supposed to have the minimum resolution (0.01 arc degree)?*</h3>