In [1]:
# Import statements

from sentinelsat import SentinelAPI, read_geojson, geojson_to_wkt
from datetime import date
from env_vars import sentinel_username,sentinel_password
import glob
import pandas as pd
import subprocess

In [2]:
# Copying functions from the "Download TCI Files with Google API" notebook

def get_api():
    """Returns a SentinelAPI object linked to the imported username and password"""
    
    return SentinelAPI(sentinel_username, sentinel_password, "https://scihub.copernicus.eu/apihub/")


def get_products_df(api, footprint, date_start, date_end,
                 area='IsWithin',
                 platform='Sentinel-2', cloudcover=(1,5)):
    """Queries for products from the Sentinel API."""
    
    products = api.query(footprint,
                         date=(date_start, date_end),
                         area_relation=area,
                         platformname=platform,
                         cloudcoverpercentage=cloudcover)
    
    return api.to_dataframe(products)

In [3]:
api = get_api()

In [18]:
# Polygon containing the entire congo basin

footprint = geojson_to_wkt(read_geojson('./data/Geometry/congo_basin_boundary/Congo_Basin_Boundary_no_islands_v4.geojson'))

In [5]:
# Since I'm pulling so many products, I'll do it by week to (hopefully!) lessen timeouts

dates = [
    (date(2020, 9, 1), date(2020, 9, 7),
     date(2020, 9, 8), date(2020, 9, 14),
     date(2020, 9, 15), date(2020, 9, 21),
     date(2020, 9, 22), date(2020, 9, 30))
]

In [8]:
df = pd.DataFrame()

for date_range in dates:
    start = date_range[0]
    end = date_range[1]
    print(f'Getting data from {start} to {end}')
    api = get_api()
    df2 = get_products_df(api, footprint, start, end)
    df = pd.concat([df, df2])
    # Do both "IsWithin" and "Intersects"
    df3 = get_products_df(api, footprint, start, end, area='Intersects')
    df = pd.concat([df, df3])
    
print('Finished')

Getting data from 2020-09-01 to 2020-09-07
Finished


In [9]:
print(df.shape)

df.head()

(58, 33)


Unnamed: 0,title,link,link_alternative,link_icon,summary,datatakesensingstart,beginposition,endposition,ingestiondate,orbitnumber,...,instrumentshortname,size,s2datatakeid,producttype,platformidentifier,orbitdirection,platformserialidentifier,processinglevel,identifier,uuid
b3216196-7720-48c5-b003-556e591ec574,S2B_MSIL1C_20200906T084559_N0209_R107_T34NEF_2...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,"Date: 2020-09-06T08:45:59.024Z, Instrument: MS...",2020-09-06 08:45:59.024,2020-09-06 08:45:59.024,2020-09-06 08:45:59.024,2020-09-07 03:56:12.706,18294,...,MSI,276.41 MB,GS2B_20200906T084559_018294_N02.09,S2MSI1C,2017-013A,DESCENDING,Sentinel-2B,Level-1C,S2B_MSIL1C_20200906T084559_N0209_R107_T34NEF_2...,b3216196-7720-48c5-b003-556e591ec574
6e3af652-f139-4d62-87d4-5b06b83332e6,S2B_MSIL1C_20200906T084559_N0209_R107_T34MED_2...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,"Date: 2020-09-06T08:45:59.024Z, Instrument: MS...",2020-09-06 08:45:59.024,2020-09-06 08:45:59.024,2020-09-06 08:45:59.024,2020-09-07 03:52:12.045,18294,...,MSI,29.80 MB,GS2B_20200906T084559_018294_N02.09,S2MSI1C,2017-013A,DESCENDING,Sentinel-2B,Level-1C,S2B_MSIL1C_20200906T084559_N0209_R107_T34MED_2...,6e3af652-f139-4d62-87d4-5b06b83332e6
b6ca0581-a492-4e8e-87c2-c89d7befed8e,S2B_MSIL1C_20200906T084559_N0209_R107_T33MZT_2...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,"Date: 2020-09-06T08:45:59.024Z, Instrument: MS...",2020-09-06 08:45:59.024,2020-09-06 08:45:59.024,2020-09-06 08:45:59.024,2020-09-07 03:40:32.266,18294,...,MSI,459.46 MB,GS2B_20200906T084559_018294_N02.09,S2MSI1C,2017-013A,DESCENDING,Sentinel-2B,Level-1C,S2B_MSIL1C_20200906T084559_N0209_R107_T33MZT_2...,b6ca0581-a492-4e8e-87c2-c89d7befed8e
249f89de-9d18-4952-b7d7-14d7fd75c0c2,S2B_MSIL1C_20200903T083609_N0209_R064_T34MEU_2...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,"Date: 2020-09-03T08:36:09.024Z, Instrument: MS...",2020-09-03 08:36:09.024,2020-09-03 08:36:09.024,2020-09-03 08:36:09.024,2020-09-05 23:38:31.890,18251,...,MSI,731.91 MB,GS2B_20200903T083609_018251_N02.09,S2MSI1C,2017-013A,DESCENDING,Sentinel-2B,Level-1C,S2B_MSIL1C_20200903T083609_N0209_R064_T34MEU_2...,249f89de-9d18-4952-b7d7-14d7fd75c0c2
896cb38b-dc22-4269-9fa8-7c44398b084c,S2B_MSIL1C_20200903T083609_N0209_R064_T34MDV_2...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,"Date: 2020-09-03T08:36:09.024Z, Instrument: MS...",2020-09-03 08:36:09.024,2020-09-03 08:36:09.024,2020-09-03 08:36:09.024,2020-09-05 23:30:46.185,18251,...,MSI,711.87 MB,GS2B_20200903T083609_018251_N02.09,S2MSI1C,2017-013A,DESCENDING,Sentinel-2B,Level-1C,S2B_MSIL1C_20200903T083609_N0209_R064_T34MDV_2...,896cb38b-dc22-4269-9fa8-7c44398b084c


In [4]:
# Rewrite this function so that it pulls products by week

def get_products_df_for_year(api, footprint, year, cloudcover, df_list, date_range_list):
    # It's basically impossible to run this without encountering a timeout at some point.
    # So I include "df_list" and "date_range_list" arguments. Because lists are passed by reference,
    # these list objects retain their information even if you re-run the function.
    """Obtains a products_df for an entire year.
    Operates week by week, refreshing the API each time, to avoid timeouts."""
    
    # Tracks how many days are in each month.
    months_dict = {
        1: 31,
        2: 28,
        3: 31,
        4: 30,
        5: 31,
        6: 30,
        7: 31,
        8: 31,
        9: 30,
        10: 31,
        11: 30,
        12: 31
    }
    
    month_start = 1
    month_end = 13
    if year < 2015 or year > 2020:
        # These years have no products
        return None
    elif year == 2015:
        # 2015 only has products starting in July
        month_start = 7
    elif year == 2016:
        # 2016 was a leap year
        months_dict[2] = 29
    elif year == 2020:
        # 2020 is a leap year.
        months_dict[2] = 29
        month_end = 8

    date_ranges = []
    for month in range(month_start, month_end):
        date_ranges.append((date(year, month, 1), date(year, month, 7)))
        date_ranges.append((date(year, month, 8), date(year, month, 14)))
        date_ranges.append((date(year, month, 15), date(year, month, 21)))
        date_ranges.append((date(year, month, 22), date(year, month, months_dict[month])))
        
    if df_list:
        # If df_list is not empty, start with the last dataframe in it.
        products_df = df_list[-1]
    else:
        # Otherwise, start with an empty dataframe.
        products_df = pd.DataFrame()

    for date_range in date_ranges:
        # Only pull products from date ranges **not in** the date_range_list
        if date_range not in date_range_list:
            date_start = date_range[0]
            date_end = date_range[1]
            print(f'Getting products from {date_start} to {date_end}')
            products_df_2 = get_products_df(api, footprint, date_start, date_end, cloudcover=cloudcover)
            products_df = pd.concat([products_df, products_df_2])
            products_df_3 = get_products_df(api, footprint, date_start, date_end, area='Intersects', cloudcover=cloudcover)
            products_df = pd.concat([products_df, products_df_2])
            print('Products so far:', len(products_df))
            df_list.append(products_df)
            date_range_list.append(date_range)
            # As a result of appending these, df_list will keep track of each product_df,
            # while date_range_list will keep track of all the date ranges we did queries for.
        
    return products_df

In [29]:
df_list = []
date_range_list = []

# Rerun the function as many times as it takes until we get the whole year.
df = get_products_df_for_year(api, footprint, 2019, (0, 10), df_list, date_range_list)

Getting products from 2019-01-01 to 2019-01-07


Querying products: 100%|██████████████████████████████████████████████████████| 112/112 [00:05<00:00, 19.96 products/s]


Products so far: 126
Getting products from 2019-01-08 to 2019-01-14


Querying products: 100%|██████████████████████████████████████████████████████| 150/150 [00:06<00:00, 22.43 products/s]


Products so far: 302
Getting products from 2019-01-15 to 2019-01-21


Querying products: 100%|██████████████████████████████████████████████████████| 141/141 [00:10<00:00, 13.91 products/s]


Products so far: 476
Getting products from 2019-01-22 to 2019-01-31


Querying products: 100%|██████████████████████████████████████████████████████| 130/130 [00:06<00:00, 18.67 products/s]


Products so far: 622
Getting products from 2019-02-01 to 2019-02-07


Querying products: 100%|██████████████████████████████████████████████████████| 101/101 [00:04<00:00, 21.56 products/s]


Products so far: 724
Getting products from 2019-02-08 to 2019-02-14


Querying products: 100%|██████████████████████████████████████████████████████| 131/131 [00:04<00:00, 29.70 products/s]


Products so far: 914
Getting products from 2019-02-15 to 2019-02-21
Products so far: 1056
Getting products from 2019-02-22 to 2019-02-28
Products so far: 1162
Getting products from 2019-03-01 to 2019-03-07


SentinelAPIError: HTTP status 500 Internal Server Error: SolrServerException : Timeout occured while waiting response from server at: http://172.30.175.89:8983/solr/dhus

In [30]:
# Confirm that the list arguments worked as intended

date_range_list

[(datetime.date(2019, 1, 1), datetime.date(2019, 1, 7)),
 (datetime.date(2019, 1, 8), datetime.date(2019, 1, 14)),
 (datetime.date(2019, 1, 15), datetime.date(2019, 1, 21)),
 (datetime.date(2019, 1, 22), datetime.date(2019, 1, 31)),
 (datetime.date(2019, 2, 1), datetime.date(2019, 2, 7)),
 (datetime.date(2019, 2, 8), datetime.date(2019, 2, 14)),
 (datetime.date(2019, 2, 15), datetime.date(2019, 2, 21)),
 (datetime.date(2019, 2, 22), datetime.date(2019, 2, 28))]

In [32]:
df_list[-1].shape

# They did!

(1162, 35)

In [35]:
df = get_products_df_for_year(api, footprint, 2019, (0, 10), df_list, date_range_list)

Getting products from 2019-03-01 to 2019-03-07
Products so far: 1246
Getting products from 2019-03-08 to 2019-03-14


Querying products: 100%|██████████████████████████████████████████████████████| 111/111 [00:14<00:00,  7.65 products/s]
Querying products: 100%|██████████████████████████████████████████████████████| 171/171 [00:06<00:00, 26.15 products/s]


Products so far: 1468
Getting products from 2019-03-15 to 2019-03-21
Products so far: 1580
Getting products from 2019-03-22 to 2019-03-31
Products so far: 1662
Getting products from 2019-04-01 to 2019-04-07


Querying products: 100%|██████████████████████████████████████████████████████| 117/117 [00:06<00:00, 17.77 products/s]


Products so far: 1812
Getting products from 2019-04-08 to 2019-04-14


SentinelAPIError: HTTP status 500 Internal Server Error: SolrServerException : Timeout occured while waiting response from server at: http://172.30.175.89:8983/solr/dhus

In [36]:
date_range_list

[(datetime.date(2019, 1, 1), datetime.date(2019, 1, 7)),
 (datetime.date(2019, 1, 8), datetime.date(2019, 1, 14)),
 (datetime.date(2019, 1, 15), datetime.date(2019, 1, 21)),
 (datetime.date(2019, 1, 22), datetime.date(2019, 1, 31)),
 (datetime.date(2019, 2, 1), datetime.date(2019, 2, 7)),
 (datetime.date(2019, 2, 8), datetime.date(2019, 2, 14)),
 (datetime.date(2019, 2, 15), datetime.date(2019, 2, 21)),
 (datetime.date(2019, 2, 22), datetime.date(2019, 2, 28)),
 (datetime.date(2019, 3, 1), datetime.date(2019, 3, 7)),
 (datetime.date(2019, 3, 8), datetime.date(2019, 3, 14)),
 (datetime.date(2019, 3, 15), datetime.date(2019, 3, 21)),
 (datetime.date(2019, 3, 22), datetime.date(2019, 3, 31)),
 (datetime.date(2019, 4, 1), datetime.date(2019, 4, 7))]

In [37]:
df = get_products_df_for_year(api, footprint, 2019, (0, 10), df_list, date_range_list)

Getting products from 2019-04-08 to 2019-04-14
Products so far: 1876
Getting products from 2019-04-15 to 2019-04-21
Products so far: 1942
Getting products from 2019-04-22 to 2019-04-30


Querying products: 100%|██████████████████████████████████████████████████████| 134/134 [00:06<00:00, 22.24 products/s]


Products so far: 2116
Getting products from 2019-05-01 to 2019-05-07
Products so far: 2210
Getting products from 2019-05-08 to 2019-05-14


SentinelAPIError: HTTP status 500 Internal Server Error: SolrServerException : Timeout occured while waiting response from server at: http://172.30.175.89:8983/solr/dhus

In [38]:
df = get_products_df_for_year(api, footprint, 2019, (0, 10), df_list, date_range_list)

Getting products from 2019-05-08 to 2019-05-14
Products so far: 2286
Getting products from 2019-05-15 to 2019-05-21


SentinelAPIError: HTTP status 500 Internal Server Error: SolrServerException : Timeout occured while waiting response from server at: http://172.30.175.89:8983/solr/dhus

In [39]:
df = get_products_df_for_year(api, footprint, 2019, (0, 10), df_list, date_range_list)

Getting products from 2019-05-15 to 2019-05-21
Products so far: 2336
Getting products from 2019-05-22 to 2019-05-31
Products so far: 2420
Getting products from 2019-06-01 to 2019-06-07
Products so far: 2546
Getting products from 2019-06-08 to 2019-06-14


Querying products: 100%|██████████████████████████████████████████████████████| 150/150 [00:04<00:00, 30.01 products/s]


Products so far: 2730
Getting products from 2019-06-15 to 2019-06-21


Querying products: 100%|██████████████████████████████████████████████████████| 131/131 [00:04<00:00, 27.42 products/s]


Products so far: 2872
Getting products from 2019-06-22 to 2019-06-30


Querying products: 100%|██████████████████████████████████████████████████████| 149/149 [00:07<00:00, 18.85 products/s]


Products so far: 3044
Getting products from 2019-07-01 to 2019-07-07


Querying products: 100%|██████████████████████████████████████████████████████| 127/127 [00:04<00:00, 25.91 products/s]


Products so far: 3190
Getting products from 2019-07-08 to 2019-07-14
Products so far: 3248
Getting products from 2019-07-15 to 2019-07-21


Querying products: 100%|██████████████████████████████████████████████████████| 127/127 [00:06<00:00, 18.62 products/s]


Products so far: 3394
Getting products from 2019-07-22 to 2019-07-31


SentinelAPIError: HTTP status 500 Internal Server Error: SolrServerException : Timeout occured while waiting response from server at: http://172.30.175.89:8983/solr/dhus

In [40]:
df = get_products_df_for_year(api, footprint, 2019, (0, 10), df_list, date_range_list)

Getting products from 2019-07-22 to 2019-07-31


Querying products: 100%|██████████████████████████████████████████████████████| 108/108 [00:57<00:00,  1.87 products/s]
Querying products: 100%|██████████████████████████████████████████████████████| 169/169 [00:12<00:00, 13.21 products/s]


Products so far: 3610
Getting products from 2019-08-01 to 2019-08-07


Querying products: 100%|██████████████████████████████████████████████████████| 117/117 [00:07<00:00, 16.44 products/s]


Products so far: 3742
Getting products from 2019-08-08 to 2019-08-14
Products so far: 3824
Getting products from 2019-08-15 to 2019-08-21
Products so far: 3946
Getting products from 2019-08-22 to 2019-08-31


Querying products: 100%|██████████████████████████████████████████████████████| 101/101 [00:18<00:00,  5.54 products/s]
Querying products: 100%|██████████████████████████████████████████████████████| 148/148 [00:07<00:00, 18.92 products/s]


Products so far: 4148
Getting products from 2019-09-01 to 2019-09-07


Querying products: 100%|██████████████████████████████████████████████████████| 104/104 [00:07<00:00, 13.11 products/s]


Products so far: 4306
Getting products from 2019-09-08 to 2019-09-14
Products so far: 4340
Getting products from 2019-09-15 to 2019-09-21
Products so far: 4392
Getting products from 2019-09-22 to 2019-09-30
Products so far: 4484
Getting products from 2019-10-01 to 2019-10-07
Products so far: 4506
Getting products from 2019-10-08 to 2019-10-14
Products so far: 4578
Getting products from 2019-10-15 to 2019-10-21
Products so far: 4586
Getting products from 2019-10-22 to 2019-10-31
Products so far: 4610
Getting products from 2019-11-01 to 2019-11-07
Products so far: 4722
Getting products from 2019-11-08 to 2019-11-14
Products so far: 4776
Getting products from 2019-11-15 to 2019-11-21
Products so far: 4842
Getting products from 2019-11-22 to 2019-11-30
Products so far: 4894
Getting products from 2019-12-01 to 2019-12-07
Products so far: 4952
Getting products from 2019-12-08 to 2019-12-14
Products so far: 5040
Getting products from 2019-12-15 to 2019-12-21


Querying products: 100%|██████████████████████████████████████████████████████| 118/118 [00:03<00:00, 30.91 products/s]


Products so far: 5180
Getting products from 2019-12-22 to 2019-12-31


Querying products: 100%|██████████████████████████████████████████████████████| 175/175 [00:07<00:00, 22.78 products/s]
Querying products: 100%|██████████████████████████████████████████████████████| 284/284 [00:10<00:00, 26.71 products/s]

Products so far: 5530





In [41]:
df.shape

(5530, 35)

In [42]:
df.head()

Unnamed: 0,title,link,link_alternative,link_icon,summary,datatakesensingstart,beginposition,endposition,ingestiondate,orbitnumber,...,producttype,platformidentifier,orbitdirection,platformserialidentifier,processinglevel,identifier,uuid,level1cpdiidentifier,granuleidentifier,datastripidentifier
abfdc0b1-a591-4b67-bb38-cd821d82df81,S2A_MSIL1C_20190106T091351_N0207_R050_T33NXD_2...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,"Date: 2019-01-06T09:13:51.024Z, Instrument: MS...",2019-01-06 09:13:51.024,2019-01-06 09:13:51.024,2019-01-06 09:13:51.024,2019-01-06 14:57:33.629,18494,...,S2MSI1C,2015-028A,DESCENDING,Sentinel-2A,Level-1C,S2A_MSIL1C_20190106T091351_N0207_R050_T33NXD_2...,abfdc0b1-a591-4b67-bb38-cd821d82df81,S2A_OPER_MSI_L1C_TL_SGS__20190106T111455_A0184...,S2A_OPER_MSI_L1C_TL_SGS__20190106T111455_A0184...,S2A_OPER_MSI_L1C_DS_SGS__20190106T111455_S2019...
21936107-a810-4486-9f8c-2c8d3301e0c4,S2A_MSIL1C_20190106T091351_N0207_R050_T33NWE_2...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,"Date: 2019-01-06T09:13:51.024Z, Instrument: MS...",2019-01-06 09:13:51.024,2019-01-06 09:13:51.024,2019-01-06 09:13:51.024,2019-01-06 14:53:30.962,18494,...,S2MSI1C,2015-028A,DESCENDING,Sentinel-2A,Level-1C,S2A_MSIL1C_20190106T091351_N0207_R050_T33NWE_2...,21936107-a810-4486-9f8c-2c8d3301e0c4,S2A_OPER_MSI_L1C_TL_SGS__20190106T111455_A0184...,S2A_OPER_MSI_L1C_TL_SGS__20190106T111455_A0184...,S2A_OPER_MSI_L1C_DS_SGS__20190106T111455_S2019...
8466109f-d07d-493f-8dce-2e35e2bdb2e3,S2A_MSIL1C_20190106T091351_N0207_R050_T33NYE_2...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,"Date: 2019-01-06T09:13:51.024Z, Instrument: MS...",2019-01-06 09:13:51.024,2019-01-06 09:13:51.024,2019-01-06 09:13:51.024,2019-01-06 14:40:44.145,18494,...,S2MSI1C,2015-028A,DESCENDING,Sentinel-2A,Level-1C,S2A_MSIL1C_20190106T091351_N0207_R050_T33NYE_2...,8466109f-d07d-493f-8dce-2e35e2bdb2e3,S2A_OPER_MSI_L1C_TL_SGS__20190106T111455_A0184...,S2A_OPER_MSI_L1C_TL_SGS__20190106T111455_A0184...,S2A_OPER_MSI_L1C_DS_SGS__20190106T111455_S2019...
7668aa1d-65f3-4f4a-bb5b-83addea93c58,S2A_MSIL1C_20190106T091351_N0207_R050_T33NWD_2...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,"Date: 2019-01-06T09:13:51.024Z, Instrument: MS...",2019-01-06 09:13:51.024,2019-01-06 09:13:51.024,2019-01-06 09:13:51.024,2019-01-06 14:39:16.545,18494,...,S2MSI1C,2015-028A,DESCENDING,Sentinel-2A,Level-1C,S2A_MSIL1C_20190106T091351_N0207_R050_T33NWD_2...,7668aa1d-65f3-4f4a-bb5b-83addea93c58,S2A_OPER_MSI_L1C_TL_SGS__20190106T111455_A0184...,S2A_OPER_MSI_L1C_TL_SGS__20190106T111455_A0184...,S2A_OPER_MSI_L1C_DS_SGS__20190106T111455_S2019...
79b3bf50-955e-40d0-a0a2-c498a065fc43,S2A_MSIL1C_20190106T091351_N0207_R050_T33NYD_2...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,"Date: 2019-01-06T09:13:51.024Z, Instrument: MS...",2019-01-06 09:13:51.024,2019-01-06 09:13:51.024,2019-01-06 09:13:51.024,2019-01-06 14:38:05.517,18494,...,S2MSI1C,2015-028A,DESCENDING,Sentinel-2A,Level-1C,S2A_MSIL1C_20190106T091351_N0207_R050_T33NYD_2...,79b3bf50-955e-40d0-a0a2-c498a065fc43,S2A_OPER_MSI_L1C_TL_SGS__20190106T111455_A0184...,S2A_OPER_MSI_L1C_TL_SGS__20190106T111455_A0184...,S2A_OPER_MSI_L1C_DS_SGS__20190106T111455_S2019...


In [43]:
df.columns

Index(['title', 'link', 'link_alternative', 'link_icon', 'summary',
       'datatakesensingstart', 'beginposition', 'endposition', 'ingestiondate',
       'orbitnumber', 'relativeorbitnumber', 'cloudcoverpercentage',
       'sensoroperationalmode', 'gmlfootprint', 'footprint', 'tileid',
       'hv_order_tileid', 'format', 'processingbaseline', 'platformname',
       'filename', 'instrumentname', 'instrumentshortname', 'size',
       's2datatakeid', 'producttype', 'platformidentifier', 'orbitdirection',
       'platformserialidentifier', 'processinglevel', 'identifier', 'uuid',
       'level1cpdiidentifier', 'granuleidentifier', 'datastripidentifier'],
      dtype='object')

In [44]:
# We only need the unique Tile IDs

df['tileid'].unique()

array(['33NXD', '33NWE', '33NYE', '33NWD', '33NYD', '33NXE', '35NND',
       '35MQV', '33NYC', '34NBH', '34NCJ', '34NBJ', '32NPM', '32NNL',
       '33NVD', '35NKC', '35MLV', '35NKD', '35NLD', '35MLU', '35MLT',
       '35MMV', '34MHD', '34NHJ', '34MBC', '34NBG', '33NZD', '33NZC',
       '34NCH', '33NYB', '33NZB', '33NXB', '33NXC', '35NPA', '35NQB',
       '34NFK', '34NEJ', '34NHH', '34NGJ', '34NGH', '34NEK', '34NFJ',
       '35NKE', '34NHG', '33NTF', '32NQL', '32NPL', '32NRL', '32NNM',
       '34MFD', '34NFH', '34NHL', '34MHE', '34MDA', '34MGE', '34NEF',
       '34MFE', '34MFC', '34NEG', '34NGG', '34NGK', '34NFF', '34MEE',
       '34MED', '34NFG', '34MCA', '34MDE', '34NHF', '34NEH', '34NGL',
       '34MDD', '34NGF', '34MEC', '34NHK', '35NQC', '35NNC', '35MPV',
       '35NQA', '33NVC', '33NVE', '33NWC', '35NMD', '35NMC', '35NMB',
       '34MDV', '33NUE', '33NUF', '35NLC', '35NLE', '35NLB', '34NDF',
       '34NDG', '34NCF', '34NDJ', '34NDH', '34NCG', '34NBF', '33NZA',
       '34MCE', '34M

In [45]:
len(df['tileid'].unique())

208

In [46]:
df['tileid'].unique().tolist()

['33NXD',
 '33NWE',
 '33NYE',
 '33NWD',
 '33NYD',
 '33NXE',
 '35NND',
 '35MQV',
 '33NYC',
 '34NBH',
 '34NCJ',
 '34NBJ',
 '32NPM',
 '32NNL',
 '33NVD',
 '35NKC',
 '35MLV',
 '35NKD',
 '35NLD',
 '35MLU',
 '35MLT',
 '35MMV',
 '34MHD',
 '34NHJ',
 '34MBC',
 '34NBG',
 '33NZD',
 '33NZC',
 '34NCH',
 '33NYB',
 '33NZB',
 '33NXB',
 '33NXC',
 '35NPA',
 '35NQB',
 '34NFK',
 '34NEJ',
 '34NHH',
 '34NGJ',
 '34NGH',
 '34NEK',
 '34NFJ',
 '35NKE',
 '34NHG',
 '33NTF',
 '32NQL',
 '32NPL',
 '32NRL',
 '32NNM',
 '34MFD',
 '34NFH',
 '34NHL',
 '34MHE',
 '34MDA',
 '34MGE',
 '34NEF',
 '34MFE',
 '34MFC',
 '34NEG',
 '34NGG',
 '34NGK',
 '34NFF',
 '34MEE',
 '34MED',
 '34NFG',
 '34MCA',
 '34MDE',
 '34NHF',
 '34NEH',
 '34NGL',
 '34MDD',
 '34NGF',
 '34MEC',
 '34NHK',
 '35NQC',
 '35NNC',
 '35MPV',
 '35NQA',
 '33NVC',
 '33NVE',
 '33NWC',
 '35NMD',
 '35NMC',
 '35NMB',
 '34MDV',
 '33NUE',
 '33NUF',
 '35NLC',
 '35NLE',
 '35NLB',
 '34NDF',
 '34NDG',
 '34NCF',
 '34NDJ',
 '34NDH',
 '34NCG',
 '34NBF',
 '33NZA',
 '34MCE',
 '34MBB',


In [47]:
# Write this list into a text file

with open('tile_ids.txt', 'w') as f:
    for tile_id in df['tileid'].unique().tolist():
        f.write(f'{tile_id}\n')

In [23]:
# Try again, but this time allow any cloudcover value

df_list = []
date_range_list = []

df = get_products_df_for_year(api, footprint, 2019, (0, 100), df_list, date_range_list)

Getting products from 2019-01-01 to 2019-01-07


Querying products: 100%|██████████████████████████████████████████████████████| 766/766 [01:07<00:00, 11.39 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1156/1156 [00:52<00:00, 22.07 products/s]


Products so far: 1532
Getting products from 2019-01-08 to 2019-01-14


Querying products: 100%|██████████████████████████████████████████████████████| 686/686 [00:37<00:00, 18.15 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1074/1074 [00:42<00:00, 25.36 products/s]


Products so far: 2904
Getting products from 2019-01-15 to 2019-01-21


Querying products: 100%|██████████████████████████████████████████████████████| 698/698 [00:39<00:00, 17.84 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1072/1072 [00:45<00:00, 23.71 products/s]


Products so far: 4300
Getting products from 2019-01-22 to 2019-01-31


Querying products: 100%|████████████████████████████████████████████████████| 1120/1120 [01:31<00:00, 12.27 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1696/1696 [01:25<00:00, 19.75 products/s]


Products so far: 6540
Getting products from 2019-02-01 to 2019-02-07


Querying products: 100%|██████████████████████████████████████████████████████| 764/764 [01:00<00:00, 12.59 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1176/1176 [00:53<00:00, 21.82 products/s]


Products so far: 8068
Getting products from 2019-02-08 to 2019-02-14


Querying products: 100%|██████████████████████████████████████████████████████| 804/804 [00:48<00:00, 16.47 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1200/1200 [00:45<00:00, 26.48 products/s]


Products so far: 9676
Getting products from 2019-02-15 to 2019-02-21


Querying products: 100%|██████████████████████████████████████████████████████| 750/750 [00:51<00:00, 14.66 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1132/1132 [00:55<00:00, 20.36 products/s]


Products so far: 11176
Getting products from 2019-02-22 to 2019-02-28


Querying products: 100%|██████████████████████████████████████████████████████| 674/674 [00:52<00:00, 12.92 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1048/1048 [00:55<00:00, 18.87 products/s]


Products so far: 12524
Getting products from 2019-03-01 to 2019-03-07


Querying products: 100%|██████████████████████████████████████████████████████| 708/708 [01:09<00:00, 10.16 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1076/1076 [00:59<00:00, 18.11 products/s]


Products so far: 13940
Getting products from 2019-03-08 to 2019-03-14


Querying products: 100%|██████████████████████████████████████████████████████| 766/766 [01:10<00:00, 10.84 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1150/1150 [01:17<00:00, 14.90 products/s]


Products so far: 15472
Getting products from 2019-03-15 to 2019-03-21


Querying products: 100%|██████████████████████████████████████████████████████| 788/788 [01:02<00:00, 12.64 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1180/1180 [01:02<00:00, 18.90 products/s]


Products so far: 17048
Getting products from 2019-03-22 to 2019-03-31


Querying products: 100%|████████████████████████████████████████████████████| 1146/1146 [01:49<00:00, 10.46 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1734/1734 [01:45<00:00, 16.51 products/s]


Products so far: 19340
Getting products from 2019-04-01 to 2019-04-07


Querying products: 100%|██████████████████████████████████████████████████████| 764/764 [01:14<00:00, 10.27 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1150/1150 [01:08<00:00, 16.71 products/s]


Products so far: 20868
Getting products from 2019-04-08 to 2019-04-14


SentinelAPIError: HTTP status 500 Internal Server Error: SolrServerException : Timeout occured while waiting response from server at: http://172.30.175.89:8983/solr/dhus

In [24]:
df = get_products_df_for_year(api, footprint, 2019, (0, 100), df_list, date_range_list)

Getting products from 2019-04-08 to 2019-04-14


Querying products: 100%|██████████████████████████████████████████████████████| 692/692 [01:25<00:00,  8.13 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1066/1066 [01:11<00:00, 14.87 products/s]


Products so far: 22252
Getting products from 2019-04-15 to 2019-04-21


Querying products: 100%|██████████████████████████████████████████████████████| 718/718 [00:58<00:00, 12.26 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1086/1086 [00:55<00:00, 19.70 products/s]


Products so far: 23688
Getting products from 2019-04-22 to 2019-04-30


Querying products: 100%|████████████████████████████████████████████████████| 1009/1009 [01:55<00:00,  8.76 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1528/1528 [01:50<00:00, 13.85 products/s]


Products so far: 25706
Getting products from 2019-05-01 to 2019-05-07


Querying products: 100%|██████████████████████████████████████████████████████| 736/736 [01:00<00:00, 12.26 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1118/1118 [01:05<00:00, 17.05 products/s]


Products so far: 27178
Getting products from 2019-05-08 to 2019-05-14


SentinelAPIError: HTTP status 500 Internal Server Error: SolrServerException : Timeout occured while waiting response from server at: http://172.30.175.89:8983/solr/dhus

In [26]:
df = get_products_df_for_year(api, footprint, 2019, (0, 100), df_list, date_range_list)

Getting products from 2019-05-08 to 2019-05-14


Querying products: 100%|██████████████████████████████████████████████████████| 696/696 [00:56<00:00, 12.38 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1070/1070 [01:03<00:00, 16.87 products/s]


Products so far: 28570
Getting products from 2019-05-15 to 2019-05-21


Querying products: 100%|██████████████████████████████████████████████████████| 718/718 [01:13<00:00,  9.70 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1088/1088 [00:59<00:00, 18.42 products/s]


Products so far: 30006
Getting products from 2019-05-22 to 2019-05-31


Querying products: 100%|████████████████████████████████████████████████████| 1018/1018 [01:15<00:00, 13.52 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1574/1574 [01:13<00:00, 21.56 products/s]


Products so far: 32042
Getting products from 2019-06-01 to 2019-06-07


Querying products: 100%|██████████████████████████████████████████████████████| 788/788 [00:46<00:00, 17.08 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1174/1174 [00:49<00:00, 23.73 products/s]


Products so far: 33618
Getting products from 2019-06-08 to 2019-06-14


Querying products: 100%|██████████████████████████████████████████████████████| 800/800 [01:05<00:00, 12.25 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1194/1194 [01:01<00:00, 19.26 products/s]


Products so far: 35218
Getting products from 2019-06-15 to 2019-06-21


Querying products: 100%|██████████████████████████████████████████████████████| 736/736 [00:53<00:00, 13.81 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1112/1112 [00:52<00:00, 21.34 products/s]


Products so far: 36690
Getting products from 2019-06-22 to 2019-06-30


Querying products: 100%|██████████████████████████████████████████████████████| 982/982 [01:33<00:00, 10.53 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1486/1486 [01:31<00:00, 16.25 products/s]


Products so far: 38654
Getting products from 2019-07-01 to 2019-07-07


Querying products: 100%|██████████████████████████████████████████████████████| 803/803 [01:10<00:00, 11.42 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1197/1197 [01:00<00:00, 19.92 products/s]


Products so far: 40260
Getting products from 2019-07-08 to 2019-07-14


Querying products: 100%|██████████████████████████████████████████████████████| 792/792 [01:11<00:00, 11.06 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1186/1186 [01:00<00:00, 19.52 products/s]


Products so far: 41844
Getting products from 2019-07-15 to 2019-07-21


Querying products: 100%|██████████████████████████████████████████████████████| 762/762 [01:31<00:00,  8.29 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1146/1146 [01:25<00:00, 13.39 products/s]


Products so far: 43368
Getting products from 2019-07-22 to 2019-07-31


Querying products: 100%|████████████████████████████████████████████████████| 1102/1102 [02:36<00:00,  7.02 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1682/1682 [02:28<00:00, 11.31 products/s]


Products so far: 45572
Getting products from 2019-08-01 to 2019-08-07


SentinelAPIError: HTTP status 500 Internal Server Error: SolrServerException : Timeout occured while waiting response from server at: http://172.30.175.89:8983/solr/dhus

In [32]:
df = get_products_df_for_year(api, footprint, 2019, (0, 100), df_list, date_range_list)

Getting products from 2019-08-01 to 2019-08-07


Querying products: 100%|██████████████████████████████████████████████████████| 710/710 [01:17<00:00,  9.14 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1084/1084 [01:15<00:00, 14.31 products/s]


Products so far: 46992
Getting products from 2019-08-08 to 2019-08-14


Querying products: 100%|██████████████████████████████████████████████████████| 688/688 [01:10<00:00,  9.76 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1054/1054 [01:20<00:00, 13.06 products/s]


Products so far: 48368
Getting products from 2019-08-15 to 2019-08-21


Querying products: 100%|██████████████████████████████████████████████████████| 808/808 [01:17<00:00, 10.42 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1198/1198 [01:08<00:00, 17.56 products/s]


Products so far: 49984
Getting products from 2019-08-22 to 2019-08-31


Querying products: 100%|████████████████████████████████████████████████████| 1196/1196 [02:07<00:00,  9.41 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1788/1788 [02:09<00:00, 13.80 products/s]


Products so far: 52376
Getting products from 2019-09-01 to 2019-09-07


Querying products: 100%|██████████████████████████████████████████████████████| 809/809 [01:23<00:00,  9.65 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1204/1204 [01:24<00:00, 14.30 products/s]


Products so far: 53994
Getting products from 2019-09-08 to 2019-09-14


Querying products: 100%|██████████████████████████████████████████████████████| 748/748 [01:29<00:00,  8.40 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1130/1130 [01:27<00:00, 12.97 products/s]


Products so far: 55490
Getting products from 2019-09-15 to 2019-09-21


Querying products: 100%|██████████████████████████████████████████████████████| 706/706 [01:25<00:00,  8.23 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1080/1080 [01:11<00:00, 15.08 products/s]


Products so far: 56902
Getting products from 2019-09-22 to 2019-09-30


Querying products: 100%|██████████████████████████████████████████████████████| 832/832 [01:29<00:00,  9.33 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1268/1268 [01:23<00:00, 15.14 products/s]


Products so far: 58566
Getting products from 2019-10-01 to 2019-10-07


Querying products: 100%|██████████████████████████████████████████████████████| 802/802 [01:21<00:00,  9.81 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1210/1210 [01:14<00:00, 16.24 products/s]


Products so far: 60170
Getting products from 2019-10-08 to 2019-10-14


SentinelAPIError: HTTP status 500 Internal Server Error: SolrServerException : Timeout occured while waiting response from server at: http://172.30.175.89:8983/solr/dhus

In [34]:
df = get_products_df_for_year(api, footprint, 2019, (0, 100), df_list, date_range_list)

Getting products from 2019-10-08 to 2019-10-14


Querying products: 100%|██████████████████████████████████████████████████████| 720/720 [01:02<00:00, 11.51 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1094/1094 [00:55<00:00, 19.65 products/s]


Products so far: 61610
Getting products from 2019-10-15 to 2019-10-21


Querying products: 100%|██████████████████████████████████████████████████████| 622/622 [00:52<00:00, 11.81 products/s]
Querying products: 100%|██████████████████████████████████████████████████████| 940/940 [00:49<00:00, 19.13 products/s]


Products so far: 62854
Getting products from 2019-10-22 to 2019-10-31


Querying products: 100%|████████████████████████████████████████████████████| 1039/1039 [01:28<00:00, 11.74 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1582/1582 [01:27<00:00, 18.04 products/s]


Products so far: 64932
Getting products from 2019-11-01 to 2019-11-07


Querying products: 100%|██████████████████████████████████████████████████████| 708/708 [00:41<00:00, 17.18 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1078/1078 [00:37<00:00, 28.58 products/s]


Products so far: 66348
Getting products from 2019-11-08 to 2019-11-14


Querying products: 100%|██████████████████████████████████████████████████████| 776/776 [01:05<00:00, 11.90 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1170/1170 [01:05<00:00, 17.82 products/s]


Products so far: 67900
Getting products from 2019-11-15 to 2019-11-21


Querying products: 100%|██████████████████████████████████████████████████████| 790/790 [00:45<00:00, 17.25 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1192/1192 [00:47<00:00, 25.22 products/s]


Products so far: 69480
Getting products from 2019-11-22 to 2019-11-30


Querying products: 100%|██████████████████████████████████████████████████████| 774/774 [00:41<00:00, 18.81 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1170/1170 [00:46<00:00, 25.32 products/s]


Products so far: 71028
Getting products from 2019-12-01 to 2019-12-07


Querying products: 100%|██████████████████████████████████████████████████████| 720/720 [00:44<00:00, 16.12 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1090/1090 [00:43<00:00, 24.91 products/s]


Products so far: 72468
Getting products from 2019-12-08 to 2019-12-14


Querying products: 100%|██████████████████████████████████████████████████████| 752/752 [00:38<00:00, 19.55 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1134/1134 [00:38<00:00, 29.77 products/s]


Products so far: 73972
Getting products from 2019-12-15 to 2019-12-21


Querying products: 100%|██████████████████████████████████████████████████████| 802/802 [01:00<00:00, 13.23 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1198/1198 [00:59<00:00, 20.04 products/s]


Products so far: 75576
Getting products from 2019-12-22 to 2019-12-31


Querying products: 100%|████████████████████████████████████████████████████| 1136/1136 [02:20<00:00,  8.06 products/s]
Querying products: 100%|████████████████████████████████████████████████████| 1722/1722 [02:07<00:00, 13.53 products/s]


Products so far: 77848


In [35]:
print(df.shape)

df.head()

(77848, 42)


Unnamed: 0,title,link,link_alternative,link_icon,summary,ingestiondate,beginposition,endposition,orbitnumber,relativeorbitnumber,...,platformname,size,uuid,datatakesensingstart,sensoroperationalmode,tileid,hv_order_tileid,level1cpdiidentifier,granuleidentifier,datastripidentifier
853d2f1f-47af-417e-a2a2-f66eba8ec41e,S2B_MSIL2A_20190106T082329_N0211_R121_T35MNV_2...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,"Date: 2019-01-06T08:23:29.024Z, Instrument: MS...",2019-01-06 19:28:07.027,2019-01-06 08:23:29.024,2019-01-06 08:23:29.024,9585,121,...,Sentinel-2,1.08 GB,853d2f1f-47af-417e-a2a2-f66eba8ec41e,NaT,,,,,,
358bb51a-0762-484c-b8f7-7f61805bc5fa,S2B_MSIL2A_20190106T082329_N0211_R121_T35MNS_2...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,"Date: 2019-01-06T08:23:29.024Z, Instrument: MS...",2019-01-06 19:28:05.966,2019-01-06 08:23:29.024,2019-01-06 08:23:29.024,9585,121,...,Sentinel-2,1.07 GB,358bb51a-0762-484c-b8f7-7f61805bc5fa,NaT,,,,,,
7142390e-f3bd-4122-b817-adcfbc72a625,S2B_MSIL2A_20190106T082329_N0211_R121_T35MMV_2...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,"Date: 2019-01-06T08:23:29.024Z, Instrument: MS...",2019-01-06 19:27:05.039,2019-01-06 08:23:29.024,2019-01-06 08:23:29.024,9585,121,...,Sentinel-2,1011.61 MB,7142390e-f3bd-4122-b817-adcfbc72a625,NaT,,,,,,
823834ef-2c5e-434a-b624-a78f5073af4e,S2B_MSIL2A_20190106T082329_N0211_R121_T35NNB_2...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,"Date: 2019-01-06T08:23:29.024Z, Instrument: MS...",2019-01-06 19:26:58.254,2019-01-06 08:23:29.024,2019-01-06 08:23:29.024,9585,121,...,Sentinel-2,1.06 GB,823834ef-2c5e-434a-b624-a78f5073af4e,NaT,,,,,,
04de2b67-02a7-4c0c-858c-900bd46e2377,S2B_MSIL2A_20190106T082329_N0211_R121_T35MMU_2...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,https://scihub.copernicus.eu/apihub/odata/v1/P...,"Date: 2019-01-06T08:23:29.024Z, Instrument: MS...",2019-01-06 19:26:31.763,2019-01-06 08:23:29.024,2019-01-06 08:23:29.024,9585,121,...,Sentinel-2,1.09 GB,04de2b67-02a7-4c0c-858c-900bd46e2377,NaT,,,,,,


In [36]:
len(df['tileid'].unique())

235

In [37]:
with open('tile_ids.txt', 'w') as f:
    for tile_id in df['tileid'].unique().tolist():
        f.write(f'{tile_id}\n')

In [38]:
df['tileid'].unique()

array([nan, '33NXD', '33MXU', '33NUA', '33NWE', '33MUV', '33NXA', '33NVC',
       '33NWA', '33NYE', '33NWD', '33NYD', '33NXE', '33MTU', '33NXB',
       '32MRV', '33NUB', '33MTS', '33NXC', '33NVE', '33NVA', '33NUC',
       '33NVB', '33MTT', '35NQC', '35NPB', '35NPA', '35NQB', '35NNC',
       '35NPC', '35NNB', '35NND', '35NNA', '35NQA', '35MMR', '35MNV',
       '35MMV', '35NMC', '35MNT', '35MNS', '35MNU', '35MMT', '35NMA',
       '35MPV', '33NVD', '35MMS', '35MMU', '35NMB', '35MPU', '35MPT',
       '35NMD', '35MPS', '35NLA', '35MPR', '35MQV', '35MLU', '35MLV',
       '35MLS', '35MLT', '33MXV', '33NWB', '33NWC', '33MWV', '33NYC',
       '34MCD', '34NFJ', '34NCG', '34NCH', '34NDJ', '34MBC', '34MBD',
       '34MCE', '34NFK', '34MDD', '34NEG', '34NDH', '34NBF', '34MED',
       '34NEJ', '34NBH', '34NCJ', '34NDF', '34MDC', '34NDG', '34NBJ',
       '34NEH', '34NCF', '34MDE', '34MEE', '34MBE', '33MZT', '33MZV',
       '34NEF', '34MCC', '33NZA', '33MZU', '34NBG', '34MDA', '34MCA',
       '34MDB',

In [5]:
# Now do it for the islands

api = get_api()

footprint2 = geojson_to_wkt(read_geojson('./data/Geometry/congo_basin_boundary/Congo_Basin_Boundary_islands_v4.geojson'))

In [13]:
df_list = []
date_range_list = []

df = get_products_df_for_year(api, footprint2, 2019, (0, 100), df_list, date_range_list)

Getting products from 2019-01-01 to 2019-01-07
Products so far: 0
Getting products from 2019-01-08 to 2019-01-14
Products so far: 0
Getting products from 2019-01-15 to 2019-01-21
Products so far: 0
Getting products from 2019-01-22 to 2019-01-31
Products so far: 0
Getting products from 2019-02-01 to 2019-02-07
Products so far: 0
Getting products from 2019-02-08 to 2019-02-14
Products so far: 0
Getting products from 2019-02-15 to 2019-02-21
Products so far: 0
Getting products from 2019-02-22 to 2019-02-28
Products so far: 0
Getting products from 2019-03-01 to 2019-03-07
Products so far: 0
Getting products from 2019-03-08 to 2019-03-14
Products so far: 0
Getting products from 2019-03-15 to 2019-03-21
Products so far: 0
Getting products from 2019-03-22 to 2019-03-31
Products so far: 0
Getting products from 2019-04-01 to 2019-04-07
Products so far: 0
Getting products from 2019-04-08 to 2019-04-14
Products so far: 0
Getting products from 2019-04-15 to 2019-04-21
Products so far: 0
Getting pr

SentinelAPIError: HTTP status 500 Internal Server Error: SolrServerException : Timeout occured while waiting response from server at: http://172.30.175.89:8983/solr/dhus

In [14]:
df = get_products_df_for_year(api, footprint2, 2019, (0, 100), df_list, date_range_list)

Getting products from 2019-05-08 to 2019-05-14
Products so far: 0
Getting products from 2019-05-15 to 2019-05-21
Products so far: 0
Getting products from 2019-05-22 to 2019-05-31
Products so far: 0
Getting products from 2019-06-01 to 2019-06-07
Products so far: 0
Getting products from 2019-06-08 to 2019-06-14
Products so far: 0
Getting products from 2019-06-15 to 2019-06-21
Products so far: 0
Getting products from 2019-06-22 to 2019-06-30
Products so far: 0
Getting products from 2019-07-01 to 2019-07-07


SentinelAPIError: HTTP status 500 Internal Server Error: SolrServerException : Timeout occured while waiting response from server at: http://172.30.175.89:8983/solr/dhus

In [15]:
df = get_products_df_for_year(api, footprint2, 2019, (0, 100), df_list, date_range_list)

# Got no results for the islands

Getting products from 2019-07-01 to 2019-07-07
Products so far: 0
Getting products from 2019-07-08 to 2019-07-14
Products so far: 0
Getting products from 2019-07-15 to 2019-07-21
Products so far: 0
Getting products from 2019-07-22 to 2019-07-31
Products so far: 0
Getting products from 2019-08-01 to 2019-08-07
Products so far: 0
Getting products from 2019-08-08 to 2019-08-14
Products so far: 0
Getting products from 2019-08-15 to 2019-08-21
Products so far: 0
Getting products from 2019-08-22 to 2019-08-31
Products so far: 0
Getting products from 2019-09-01 to 2019-09-07
Products so far: 0
Getting products from 2019-09-08 to 2019-09-14
Products so far: 0
Getting products from 2019-09-15 to 2019-09-21
Products so far: 0
Getting products from 2019-09-22 to 2019-09-30
Products so far: 0
Getting products from 2019-10-01 to 2019-10-07
Products so far: 0
Getting products from 2019-10-08 to 2019-10-14
Products so far: 0
Getting products from 2019-10-15 to 2019-10-21
Products so far: 0
Getting pr

### Downloading one product per tile ID

To test my results, I download one product per tile ID and then view them in QGIS.

In [2]:
ee_index = pd.read_csv('earth-engine-index.csv')

In [3]:
ee_index.head()

Unnamed: 0,GRANULE_ID,PRODUCT_ID,DATATAKE_IDENTIFIER,MGRS_TILE,SENSING_TIME,TOTAL_SIZE,CLOUD_COVER,GEOMETRIC_QUALITY_FLAG,GENERATION_TIME,NORTH_LAT,SOUTH_LAT,WEST_LON,EAST_LON,BASE_URL
0,L1C_T51HWC_A021621_20190813T014402,S2A_MSIL1C_20190813T013321_N0208_R031_T51HWC_2...,GS2A_20190813T013321_021621_N02.08,51HWC,2019-08-13T01:47:02.634000Z,472312038.0,0.0,,2019-08-13T05:44:52.000000Z,-33.433323,-34.429078,123.192969,124.194586,gs://gcp-public-data-sentinel-2/tiles/51/H/WC/...
1,L1C_T21HYT_A011547_20190523T133233,S2B_MSIL1C_20190523T133239_N0207_R081_T21HYT_2...,GS2B_20190523T133239_011547_N02.07,21HYT,2019-05-23T13:43:06.000000Z,93794242.0,0.0,,2019-05-23T15:10:06.000000Z,-37.894755,-38.160337,-54.58066,-53.464817,gs://gcp-public-data-sentinel-2/tiles/21/H/YT/...
2,L1C_T11SLA_A016512_20180820T184735,S2A_MSIL1C_20180820T183921_N0206_R070_T11SLA_2...,GS2A_20180820T183921_016512_N02.06,11SLA,2018-08-20T18:47:35.340000Z,852706489.0,4.608,,2018-08-20T23:54:18.000000Z,37.042336,36.036258,-119.248493,-118.007274,gs://gcp-public-data-sentinel-2/tiles/11/S/LA/...
3,L1C_T02KMG_A003029_20171004T213912,S2B_MSIL1C_20171004T213909_N0205_R143_T02KMG_2...,GS2B_20171004T213909_003029_N02.05,02KMG,2017-10-04T21:39:12.460000Z,502814591.0,9.4476,PASSED,2017-10-04T21:39:12.000000Z,-16.280273,-17.273285,-171.686702,-170.908268,gs://gcp-public-data-sentinel-2/tiles/02/K/MG/...
4,L1C_T49NHB_A001931_20170720T024456,S2B_MSIL1C_20170720T022549_N0205_R046_T49NHB_2...,GS2B_20170720T022549_001931_N02.05,49NHB,2017-07-20T02:44:56.730000Z,176714634.0,12.6707,PASSED,2017-07-20T02:44:56.000000Z,1.806308,0.814825,114.385102,114.681769,gs://gcp-public-data-sentinel-2/tiles/49/N/HB/...


In [8]:
with open('tile_ids.txt') as f:
    tile_ids = f.readlines()

In [9]:
tile_ids_2 = [tile.split('\n')[0] for tile in tile_ids]

tile_ids_2

['nan',
 '33NXD',
 '33MXU',
 '33NUA',
 '33NWE',
 '33MUV',
 '33NXA',
 '33NVC',
 '33NWA',
 '33NYE',
 '33NWD',
 '33NYD',
 '33NXE',
 '33MTU',
 '33NXB',
 '32MRV',
 '33NUB',
 '33MTS',
 '33NXC',
 '33NVE',
 '33NVA',
 '33NUC',
 '33NVB',
 '33MTT',
 '35NQC',
 '35NPB',
 '35NPA',
 '35NQB',
 '35NNC',
 '35NPC',
 '35NNB',
 '35NND',
 '35NNA',
 '35NQA',
 '35MMR',
 '35MNV',
 '35MMV',
 '35NMC',
 '35MNT',
 '35MNS',
 '35MNU',
 '35MMT',
 '35NMA',
 '35MPV',
 '33NVD',
 '35MMS',
 '35MMU',
 '35NMB',
 '35MPU',
 '35MPT',
 '35NMD',
 '35MPS',
 '35NLA',
 '35MPR',
 '35MQV',
 '35MLU',
 '35MLV',
 '35MLS',
 '35MLT',
 '33MXV',
 '33NWB',
 '33NWC',
 '33MWV',
 '33NYC',
 '34MCD',
 '34NFJ',
 '34NCG',
 '34NCH',
 '34NDJ',
 '34MBC',
 '34MBD',
 '34MCE',
 '34NFK',
 '34MDD',
 '34NEG',
 '34NDH',
 '34NBF',
 '34MED',
 '34NEJ',
 '34NBH',
 '34NCJ',
 '34NDF',
 '34MDC',
 '34NDG',
 '34NBJ',
 '34NEH',
 '34NCF',
 '34MDE',
 '34MEE',
 '34MBE',
 '33MZT',
 '33MZV',
 '34NEF',
 '34MCC',
 '33NZA',
 '33MZU',
 '34NBG',
 '34MDA',
 '34MCA',
 '34MDB',
 '

In [10]:
# I need to limit the EE Index only to congo basin rows.
# So let's put this list into a dataframe and merge it with the EE Index.

tile_df = pd.DataFrame()

tile_df['tile_id'] = tile_ids_2

tile_df.head()

Unnamed: 0,tile_id
0,
1,33NXD
2,33MXU
3,33NUA
4,33NWE


In [16]:
ee_index_2 = ee_index.reset_index()

merged = tile_df.merge(ee_index_2, left_on='tile_id', right_on='MGRS_TILE')
# drop duplicates, since we only need one row per tile ID
merged2 = merged.drop_duplicates(subset=['MGRS_TILE'])
# To download the products, we just need a list of row numbers
rows = merged2['index'].tolist()

In [17]:
len(rows)

234

In [18]:
len(tile_ids_2)

235

In [3]:
# Copy functions from the "Download TCI Files with Google API" notebook and run the second one

def generate_tci_uri(ee_index, row):
    uri = ee_index.loc[row, 'BASE_URL']
    uri += '/GRANULE/'
    granule_id = ee_index.loc[row, 'GRANULE_ID']
    uri += granule_id
    uri += '/IMG_DATA/'
    tile_id = granule_id.split('_')[1]
    date = ee_index.loc[row, 'DATATAKE_IDENTIFIER'].split('_')[1]
    uri += f'{tile_id}_{date}_TCI.jp2'
    
    return uri


def download_tcis(ee_index, rows, dest_folder):
    
    cloud_env = r"C:\Users\David\AppData\Local\Google\Cloud SDK\cloud_env.bat"
    
    for row in rows:
        uri = generate_tci_uri(ee_index, row)
        subprocess.run([cloud_env, '&&', 'gsutil', 'cp', uri, dest_folder])

In [20]:
download_tcis(ee_index, rows, 'D:/canopy_data/full_basin_test/')

### Download new tiles

In [44]:
# After running the code in "mgrs-test", I now have additional tile IDs.
# I want to download these, but only the ones I didn't already download.

ee_index = pd.read_csv('earth-engine-index.csv')

In [26]:
with open('tile_ids.txt') as f:
    tile_ids = f.readlines()
    
with open('tile_ids_2.txt') as f2:
    tile_ids_2 = f2.readlines()

In [27]:
tile_ids = [tile.split('\n')[0] for tile in tile_ids]
tile_ids.pop(0)

tile_ids_2 = [tile.split('\n')[0] for tile in tile_ids_2]

In [30]:
# I need the tile IDs in tile_ids_2 that aren't in tile_ids
# https://stackoverflow.com/a/2104348/12685847

tile_ids_3 = set(tile_ids_2) - set(tile_ids)

In [31]:
assert len(tile_ids_2) - len(tile_ids) - 1 == len(tile_ids_3)

# Hmm...what happened?

AssertionError: 

In [32]:
len(tile_ids)

234

In [33]:
len(tile_ids_2)

335

In [34]:
len(tile_ids_3)

104

In [35]:
tile_ids

['33NXD',
 '33MXU',
 '33NUA',
 '33NWE',
 '33MUV',
 '33NXA',
 '33NVC',
 '33NWA',
 '33NYE',
 '33NWD',
 '33NYD',
 '33NXE',
 '33MTU',
 '33NXB',
 '32MRV',
 '33NUB',
 '33MTS',
 '33NXC',
 '33NVE',
 '33NVA',
 '33NUC',
 '33NVB',
 '33MTT',
 '35NQC',
 '35NPB',
 '35NPA',
 '35NQB',
 '35NNC',
 '35NPC',
 '35NNB',
 '35NND',
 '35NNA',
 '35NQA',
 '35MMR',
 '35MNV',
 '35MMV',
 '35NMC',
 '35MNT',
 '35MNS',
 '35MNU',
 '35MMT',
 '35NMA',
 '35MPV',
 '33NVD',
 '35MMS',
 '35MMU',
 '35NMB',
 '35MPU',
 '35MPT',
 '35NMD',
 '35MPS',
 '35NLA',
 '35MPR',
 '35MQV',
 '35MLU',
 '35MLV',
 '35MLS',
 '35MLT',
 '33MXV',
 '33NWB',
 '33NWC',
 '33MWV',
 '33NYC',
 '34MCD',
 '34NFJ',
 '34NCG',
 '34NCH',
 '34NDJ',
 '34MBC',
 '34MBD',
 '34MCE',
 '34NFK',
 '34MDD',
 '34NEG',
 '34NDH',
 '34NBF',
 '34MED',
 '34NEJ',
 '34NBH',
 '34NCJ',
 '34NDF',
 '34MDC',
 '34NDG',
 '34NBJ',
 '34NEH',
 '34NCF',
 '34MDE',
 '34MEE',
 '34MBE',
 '33MZT',
 '33MZV',
 '34NEF',
 '34MCC',
 '33NZA',
 '33MZU',
 '34NBG',
 '34MDA',
 '34MCA',
 '34MDB',
 '34MCB',


In [36]:
tile_ids_2

['34MEE',
 '35MJV',
 '33MZU',
 '35MLT',
 '34MBV',
 '34MFA',
 '34NAG',
 '32NPH',
 '33NSF',
 '35MPU',
 '34MBE',
 '32MRE',
 '34NFH',
 '35MKP',
 '32NRH',
 '33NTF',
 '32NMN',
 '35NLA',
 '35NME',
 '33MUR',
 '33MWU',
 '33MTV',
 '34MCC',
 '36NSH',
 '32NNF',
 '32NPF',
 '35NPA',
 '35MNR',
 '33MUT',
 '35MKU',
 '35NLE',
 '33NSD',
 '34NAK',
 '33NTD',
 '34MGD',
 '35NMB',
 '35MJR',
 '33NWF',
 '33MZV',
 '33NUD',
 '32NQM',
 '34NFK',
 '33NVA',
 '33MYT',
 '35NLC',
 '34MBC',
 '34MDU',
 '35MPS',
 '34MAD',
 '35NKC',
 '33MUQ',
 '33NSE',
 '35MQR',
 '34NEK',
 '32NML',
 '34NAF',
 '34NAJ',
 '35NKB',
 '33NUA',
 '33NTA',
 '33MXV',
 '34NDJ',
 '33MTU',
 '34MEV',
 '34MEU',
 '34MEC',
 '34NEF',
 '33NVC',
 '34NGK',
 '33NXE',
 '35NMD',
 '34MGE',
 '35NRB',
 '33NXD',
 '33MTQ',
 '33NSG',
 '34NHM',
 '34NGF',
 '35NJE',
 '34MHE',
 '34MCA',
 '33MST',
 '35MKR',
 '34MAC',
 '32MRC',
 '33NWB',
 '35NLB',
 '35MNU',
 '33MSS',
 '34NGL',
 '32NQL',
 '32NQG',
 '35NPB',
 '33MSQ',
 '34NCF',
 '33NZF',
 '33NUE',
 '33MZS',
 '33MUP',
 '35NJA',


In [37]:
len(set(tile_ids))

234

In [38]:
len(set(tile_ids_2))

335

In [40]:
# Try a different method

tile_ids_4 = [tile for tile in tile_ids_2 if tile not in tile_ids]

In [41]:
len(tile_ids_4)

# Still has a weird length

104

In [42]:
335 - 234

# Well whatever...let's just go with it

101

In [45]:
tile_df = pd.DataFrame()

tile_df['tile_id'] = tile_ids_4

ee_index_2 = ee_index.reset_index()

merged = tile_df.merge(ee_index_2, left_on='tile_id', right_on='MGRS_TILE')
merged2 = merged.drop_duplicates(subset=['MGRS_TILE'])
rows = merged2['index'].tolist()

len(rows)

# Only 64 of them are even in the EE Index

64

In [46]:
download_tcis(ee_index, rows, 'D:/canopy_data/full_basin_test/')