In [3]:
# import Libraries
import pandas as pd
import numpy as np
import urllib
import os
import requests
import shutil
from sentinelsat import SentinelAPI, read_geojson, geojson_to_wkt
from multiprocessing.pool import ThreadPool
import time
import geopandas as gpd

In [4]:
# Import Folders
data_folder = os.path.abspath("data")
output_folder = os.path.abspath("output")
#check if outfolder exists if not create it
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

In [5]:
# Credentials
username = "myuser"
password = "mypassword"

# Declarables
url = "https://scihub.copernicus.eu/apihub/odata/v1"
searchapi = "https://scihub.copernicus.eu/dhus/search"

In [None]:
# Helping Functions

# Authenticate
session_store = {}

global multicred

# MultiCredentials
def init_session():
    global multicred
    multicred = pd.read_csv('credentials.csv').to_dict(orient='records')
    #multicred = multicred[25]
    parallelfactor = 2
    for i, j in enumerate(multicred):
        session = get_search_api_session(i)
        if session.get('https://scihub.copernicus.eu/dhus/search?q=*').status_code != 200:
            del multicred[i]
        else:
            print('----Successfully Validated Credential:', multicred[i])
    for i in list(range(1, parallelfactor)):
        multicred+=multicred
    print("Total", len(multicred), "Connections Available for Download")
    #return multicred


def get_search_api_session(i=0):
    global multicred
    #Configuration
    username = multicred[i]['username']
    password = multicred[i]['password']
    try:
        session = session_store[i]
        if session.get('https://scihub.copernicus.eu/dhus/search?q=*').status_code != 200:
            raise ValueError
        else:
            print('----using previous session')
    except :
        print('----creating session with username', username)
        session = requests.Session()
        session.auth = (username, password)
        auth = session.get(searchapi)
        session_store[i] = session
    return session

def download_scenes_multi(chunk):
    multi_chunk_size = len(chunk)
    chunk = chunk.reset_index().drop(columns=['index'])
    chunk = chunk.reset_index()
    print("\n----Downloading %s Scenes" % multi_chunk_size)
    start = time.time()
    pool = ThreadPool(multi_chunk_size)
    for i in pool.imap_unordered(download_scenes, chunk.to_dict(orient='records')):
        #print i
        pass
    pool.close()
    pool.join()
    print ('----All Files Downloaded Time elapsed: %s' % (time.time() - start))

def download_scenes(row):
    filename = row['title'] + ".zip"
    filepath = os.path.join(output_folder, filename)
    url = row['link'].replace('apihub', 'dhus')
    if not os.path.exists(filepath):
        print("\n----Downloading File at Url:", url, " | Session", multicred[row['index']])
        session = get_search_api_session(row['index'])
        with session.get(url, stream=True) as r:
            with open(filepath, 'wb') as f:
                shutil.copyfileobj(r.raw, f, 10240)
                #for chunk in r.iter_content(chunk_size=10240):
                #    f.write(chunk)
        print("\n----File successfuly downloaded to ", filepath, " | Url:", url, " | Session", multicred[row['index']])
    else:
        print("\n----Using Previouslly downloaded file ", filepath)
    return filepath

# Cleanup
def cleanup():
    output_files = os.listdir(output_folder)
    output_files = [os.path.join(output_folder, x) for x in output_files if x.split('.')[-1].upper() == 'ZIP']
    for i in output_files:
        size = os.stat(i).st_size
        if size < 1000:
            os.remove(i)
            print("----Invalid File", i, " with size ", size, " bytes deleted")
        else:
            print("----#Valid File", i, " with size ", (size/1024/1024), " MB")
            
# Initialize sessions
init_session()

In [None]:
# Get Area of interest
aoiFilePath = os.path.join(data_folder, 'Ap_Sentinel_Brow.shp')
aoiDf = gpd.read_file(aoiFilePath)
aoiDf = aoiDf.to_crs(epsg =4326)
aoi = aoiDf.unary_union.simplify(0.1)
aoiWkt = aoi.wkt
aoi

In [None]:
# Connect to API
api = SentinelAPI(username, password)

# Search for Data
#footprint = geojson_to_wkt(read_geojson('search_polygon.geojson'))
#footprint = "POLYGON ((79.2076887512349 15.7097283184072,81.5605925962714 15.7097283184072,81.5605925962714 17.1532438033589,79.2076887512349 17.1532438033589,79.2076887512349 15.7097283184072))"
products = api.query(aoiWkt,
                     producttype='S2MSI2A',
                     platformname='Sentinel-2',
                     beginposition='[2020-01-01T00:00:00.000Z TO 2020-02-01T00:00:00.000Z]',
                    )



In [None]:
# Show Products data
product_df = api.to_geodataframe(products)
product_df

In [None]:
# Export to File
time_columns = product_df.loc[:, product_df.dtypes == 'datetime64[ns]'].columns
for x in time_columns:
    product_df[x] = product_df[x].apply(lambda x: x.strftime('%Y-%m-%d'))
product_df.to_file(driver='ESRI Shapefile', filename=os.path.join(output_folder, 'products_ap.shp'))
product_df.reset_index()

In [None]:
# Check Previous Files
cleanup()
output_files = os.listdir(output_folder)
output_files = [ x[:-4] for x in output_files if x.split('.')[-1].upper() == 'ZIP']
if len(output_files) > 0:
    odf = pd.DataFrame(output_files)
    download_df = product_df[~product_df['title'].isin(odf[0])]
    print("Using ", (product_df.shape[0] - download_df.shape[0]), " already saved files")
else:
    download_df = product_df

# Download Data
#download_df = download_df.head(60)
#files = os.listdir(output_folder)
#files
#product_df['saved_file'] = product_df.apply(download_scenes, axis=1)
chunk_size = len(multicred)
print('Downloading', len(download_df), 'files using', chunk_size, 'parallel downloads')
df_chunks = download_df.groupby(np.arange(len(download_df))//chunk_size)
for k,_df in df_chunks:
    pass
    download_scenes_multi(_df)