In [1]:
from requests import get
import datetime 
from datetime import datetime, timedelta
from bs4 import BeautifulSoup
from itertools import permutations, islice
import os
from multiprocessing.pool import ThreadPool
from time import time as timer

In [7]:
def paralise_downloads(params):    
    """
    params[0]   :  baseimportdir (to save netcdf)
    params[1]   :  source (model naam matroos)
    params[2]   :  variable (variable naam matroos)
    params[3]   :  from_time (start time matroos)
    params[4]   :  to_time (end time matroos)
    params[5]   :  strnow (current time matroos)
    params[6]   :  stridetime (interval matroos)
    params[7]   :  cellx (cell size x)
    params[8]   :  celly (cell size y)    
    params[9]   :  xn (cell count x)    
    params[10]  :  yn (cell count y)        
    """
    try:
        # print(params)
        url = r'http://matroos.rws.nl:80//matroos/scripts/matroos.pl?source='+params[1]+'&anal=000000000000&z=0&xmin=99000.000000&xmax=285400.000000&ymin=540000.000000&ymax=642750.000000&coords=RD&xmin_abs=&xmax_abs=285280&ymin_abs=347988&ymax_abs=679972&color='+params[2]+'&interpolate=size&now='+params[5]+'&to='+params[4]+'&from='+params[3]+'&outputformat=nc&stridex=&stridey=&stridetime='+params[6]+'&xn='+params[9]+'&yn='+params[10]+'&celly='+params[8]+'&cellx='+params[7]+'&fieldoutput='+params[2]+'&format=nc'
        out = os.path.join(params[0], params[1], params[2], params[2]+'_'+params[3]+'_'+params[4]+'.nc')
        print('-----\n',url,'\n-----')        
        download(url, out)
        #response = get(url)
        return url, out, None
    except Exception as e:
        return url, out, e

def download(url, file_name):
    # open in binary mode
    with open(file_name, "wb") as file:
        # get request
        matroos_pl = get(url)
        # write to file
        file.write(matroos_pl.content) 
        

def nth(iterable, n, default=None):
    "Returns the nth item or a default value"
    return next(islice(iterable, n, None), default)        

In [3]:
baseImportDir = r'D:\OMS_Waddenzee\trunk\fews\Import\rws\matroos'
source = 'hmcn_kustfijn'
variables = ['VELV', 'VELU', 'SEP'] # 'H' 
data_php_url = r'http://matroos.rws.nl/maps/start/data.php?source=hmcn_kustfijn&addmap=&format=txt&coords=RD&xmin_abs=-71712.4&xmax_abs=285280&ymin_abs=347988&ymax_abs=679972&interpolate=size&xmin=99000.000000&xmax=285400.000000&ymin=540000.000000&ymax=642750.000000&anal=000000000000'

In [4]:
## FIRST CHECK WHAT IS THE CURRENT DATE AND END DATES AND USE TO SET START DATE
data_php = get(data_php_url, auth=('HKVtestAccount', 'Mi2eez0azeiga'))
html = data_php.content

soup = BeautifulSoup(html, "html.parser")

# get strnow element from soup
iso_strnow = datetime.strptime(soup.find('input', attrs={'name': 'strnow'}).get('value'), '%Y-%m-%d %H:%M:%S')
strnow = iso_strnow.strftime('%Y%m%d%H%M')

# get stridetime from soup
stridetime = soup.find('option', text='  0.2 hour').get('value')

# get cell size / count size source
cellx = soup.find('input', attrs={'name': 'cellx'}).get('value')
celly = soup.find('input', attrs={'name': 'celly'}).get('value')
xn = soup.find('input', attrs={'name': 'xn'}).get('value')
yn = soup.find('input', attrs={'name': 'yn'}).get('value')

# get all dates of the from list
soup_from = soup.find('select', attrs={'name':'from'})
dates_str = []
for ix in range(len(soup_from)):
    try:
        dates_str.append(nth(soup_from.children, ix).text)
    except:
        pass
dates_str = dates_str[1::]

# convert date_str to datetime dates list
dates = [datetime.strptime(date, '%Y-%m-%d %H:%M:%S') for date in dates_str]

# get current time 
now = datetime.now()

# get closest date in dates list to current time
iso_from_time = min(dates, key=lambda d: abs(d-now))
from_time = iso_from_time.strftime('%Y%m%d%H%M')
iso_to_time = dates[0]
to_time = iso_to_time.strftime('%Y%m%d%H%M')

# get parameters as list option
parameters = []
for variable in variables:
    parameters.append([baseImportDir,source,variable,from_time,to_time,strnow,stridetime, cellx, celly, xn, yn])
print (parameters)

[['D:\\OMS_Waddenzee\\trunk\\fews\\Import\\rws\\matroos', 'hmcn_kustfijn', 'VELV', '201611241320', '201611260420', '201611240000', '1', '197', '255', '942', '402'], ['D:\\OMS_Waddenzee\\trunk\\fews\\Import\\rws\\matroos', 'hmcn_kustfijn', 'VELU', '201611241320', '201611260420', '201611240000', '1', '197', '255', '942', '402'], ['D:\\OMS_Waddenzee\\trunk\\fews\\Import\\rws\\matroos', 'hmcn_kustfijn', 'SEP', '201611241320', '201611260420', '201611240000', '1', '197', '255', '942', '402']]


In [8]:
# retrieve matroos data in parallel
start = timer()
results = ThreadPool(20).imap_unordered(paralise_downloads, parameters)

for url, out, error in results:
    if error is None:
        print("%r fetched in %ss" % (out, timer() - start))
    else:
        print("error fetching %r: %s" % (out, error))
        
print("Elapsed Time: %s" % (timer() - start,))

-----
-----
 http://matroos.rws.nl:80//matroos/scripts/matroos.pl?source=hmcn_kustfijn&anal=000000000000&z=0&xmin=99000.000000&xmax=285400.000000&ymin=540000.000000&ymax=642750.000000&coords=RD&xmin_abs=&xmax_abs=285280&ymin_abs=347988&ymax_abs=679972&color=VELU&interpolate=size&now=201611240000&to=201611260420&from=201611241320&outputformat=nc&stridex=&stridey=&stridetime=1&xn=942&yn=402&celly=255&cellx=197&fieldoutput=VELU&format=nc 
-----
-----
 http://matroos.rws.nl:80//matroos/scripts/matroos.pl?source=hmcn_kustfijn&anal=000000000000&z=0&xmin=99000.000000&xmax=285400.000000&ymin=540000.000000&ymax=642750.000000&coords=RD&xmin_abs=&xmax_abs=285280&ymin_abs=347988&ymax_abs=679972&color=SEP&interpolate=size&now=201611240000&to=201611260420&from=201611241320&outputformat=nc&stridex=&stridey=&stridetime=1&xn=942&yn=402&celly=255&cellx=197&fieldoutput=SEP&format=nc 
-----
 http://matroos.rws.nl:80//matroos/scripts/matroos.pl?source=hmcn_kustfijn&anal=000000000000&z=0&xmin=99000.000000&x