In [1]:
import hkvsobekpy as hkv
import pandas as pd
import os
import zipfile
import requests

In [2]:
def read_his_from_zip(zip_path, his_file, locs, params):
    archive = zipfile.ZipFile(zip_path, 'r')
    his_path = archive.extract(his_file)
    
    his_object = hkv.read_his.LeesMetadata(his_path)
    his_locs = his_object.KrijgLokaties()
    his_params = his_object.KrijgParameters()
    print(his_params)
    his_data = his_object.MultiWaardenArray(locations=locs, parameters=params, jaarmax_as='none', drop_lege_jaren=False) 
    
    return his_data

def validate_locations(zip_path, his_file, locs, sequence_simularity):
    locs_column = 'locs_his'
    df_locs = pd.DataFrame({'locs_his':locs})
    archive = zipfile.ZipFile(zip_path, 'r')
    his_path = archive.extract(his_file)
    
    his_object = hkv.read_his.LeesMetadata(his_path)
    his_locs = his_object.KrijgLokaties()    
    
    df_locs = hkv.core.utils.compare_df_column_his_list(df_locs, 
                                                        locs_column, 
                                                        his_locs, 
                                                        include_simularity=True, 
                                                        sequence_simularity=sequence_simularity)
    return df_locs

def create_subfolder(werkmap, folder):
    path = os.path.join(werkmap,folder)
    if (not(os.path.exists(path))): 
        os.makedirs(path)
    return path

In [3]:
werkmap = r'T:\2Lexy\waterbalans100j'
excel_idfile = os.path.join(werkmap, 'OpenDapFileList_WABES_NWM1440.xlsx')
excel_in = os.path.join(werkmap, 'ids_waterbalans_Waal.xlsx')

model = 'LSM_LT'
scenario = 'REF2015S1'
baseurl = 'http://opendap-dm1.knmi.nl:8080/thredds/fileServer/deltamodel/Archief/ZW'

In [4]:
# create filesystem
locpad = create_subfolder(werkmap, 'zip')
csvpad = create_subfolder(werkmap, 'csv')

In [5]:
# read excel file as DataFrame
df_knmi = pd.read_excel(excel_idfile)
df_knmi.head()

Unnamed: 0,Run,Model,Scenario,Year,T0,Filename Modelzip
0,LHM REF2015S1 1911 (1911-2011),LHM,REF2015S1;,1911,191201010000,ZW_LHM_191201010000_NLKDMC00_000095159_REF2015...
1,LHM REF2015S1Z0 1911 (1911-2011),LHM,REF2015S1Z0;,1911,191201010000,ZW_LHM_191201010000_NLKDMC00_000075014_REF2015...
2,LHM REF2015S2 1911 (1911-2011),LHM,REF2015S2;,1911,191201010000,ZW_LHM_191201010000_NLKDMC00_000095192_REF2015...
3,LHM REF2015S2Z0 1911 (1911-2011),LHM,REF2015S2Z0;,1911,191201010000,ZW_LHM_191201010000_NLKDMC00_000075115_REF2015...
4,LHM W2050S1 1911 (1911-2011),LHM,W2050S1;,1911,191201010000,ZW_LHM_191201010000_NLKDMC00_000095239_W2050S1...


In [6]:
fileset = df_knmi[(df_knmi['Model']==model)&(df_knmi['Scenario']==scenario)]
fileset.reset_index(drop=True, inplace=True)

In [7]:
fileset.head()

Unnamed: 0,Run,Model,Scenario,Year,T0,Filename Modelzip
0,LSMLT REF2015S1 1911 (1911-2011),LSM_LT,REF2015S1,1911,191112310100,ZW_LSM_LT_191112310100_NLKDMC00_000125375_REF2...
1,LSMLT REF2015S1 1912 (1911-2011),LSM_LT,REF2015S1,1912,191212310100,ZW_LSM_LT_191212310100_NLKDMC00_000125376_REF2...
2,LSMLT REF2015S1 1913 (1911-2011),LSM_LT,REF2015S1,1913,191312310100,ZW_LSM_LT_191312310100_NLKDMC00_000125377_REF2...
3,LSMLT REF2015S1 1914 (1911-2011),LSM_LT,REF2015S1,1914,191412310100,ZW_LSM_LT_191412310100_NLKDMC00_000125378_REF2...
4,LSMLT REF2015S1 1915 (1911-2011),LSM_LT,REF2015S1,1915,191512310100,ZW_LSM_LT_191512310100_NLKDMC00_000125379_REF2...


In [8]:
fileset[0:1]

Unnamed: 0,Run,Model,Scenario,Year,T0,Filename Modelzip
0,LSMLT REF2015S1 1911 (1911-2011),LSM_LT,REF2015S1,1911,191112310100,ZW_LSM_LT_191112310100_NLKDMC00_000125375_REF2...


In [9]:
# read excel id_def file as DataFrame
df_config = pd.read_excel(excel_in, sheet_name='id_def')
df_config.head()

Unnamed: 0,SobekID,SobekType,Plus,Reservoir,Type,Beschrijving
0,R_RT_002_1,Reach,IN,Waal,afvoer_in,Waal Pannerdense Kop
1,R_MS_014_1,Reach,UIT,Waal,afvoer_uit,Maas Waalkanaal
2,R_ARK_6146_1,Reach,UIT,Waal,afvoer_uit,Amsterdam Rijnkanaal
3,R_NDB_23019_1,Reach,UIT,Waal,afvoer_uit,Boven Merwede Sleeuwijk
4,LatPrcp_DM6023,Lat,IN,Waal,neerslag,


In [10]:
sids_reach = df_config[df_config['SobekType']=='Reach']['SobekID'].tolist() # 'R_RT_002_2' and 'R_NDB_23019_2' not existing
sids_lat = df_config[df_config['SobekType']=='Lat']['SobekID'].tolist()
sids_wl = df_config[df_config['SobekType']=='WL']['SobekID'].tolist()

In [14]:
# init empty dataframe
data_all = pd.DataFrame()
# download zip files and process
for idx, row in fileset.iterrows():
    
    
    # remove if statement to download all years
    if idx in [0,1,2]:7
        
        # print(idx, row.Year)
        file = row['Filename Modelzip']

        url = '{}/{}/{}'.format(baseurl, model, file)
        zip_path = os.path.join(locpad,file)
        if not(os.path.exists(zip_path)):            
            print('Downloading '+file+' ('+str(idx)+'/'+str(len(fileset))+')') 
            r= requests.get(url)
            with open(zip_path, 'wb') as f:  
                f.write(r.content)
            
            os
        else:
            print('{} already downloaded'.format(file))
        
        # validate_locations(zip_path=zip_path, his_file = 'reachseg.his', locs=sids_reach, sequence_simularity=1)
        data_reach = read_his_from_zip(zip_path = zip_path, 
                                       his_file = 'reachseg.his', 
                                       locs = sids_reach, 
                                       params = ['Discharge mean(m³/s)']
                                      )
        
        
        # validate_locations(zip_path=zip_path, his_file = 'QLAT.HIS', locs=sids_lat, sequence_simularity=1)
        data_lat = read_his_from_zip(zip_path = zip_path, 
                                     his_file = 'QLAT.HIS', 
                                     locs = sids_lat,
                                     params = ['Lat.disch.mean(m3/s)']
                                    ) 
        
        # sids_wl locations are not existing
        # validate_locations(zip_path=zip_path, his_file='measstat.his', locs=sids_wl, sequence_simularity=1)
#         data_wl = read_his_from_zip(zip_path = zip_path, 
#                                        his_file = 'measstat.his', 
#                                        locs = sids_wl, 
#                                        params = ['Water Level (m AD)  ']
#                                     ) 
        
        data_merged = data_reach.merge(data_lat, left_index=True, right_index=True)
        # only possible if data_wl returns data
        # data_merged = data_merged.merge(data_wl, left_index=True, right_index=True)  
        
        data_all = data_all.append(data_merged)
        
data_all.to_csv(os.path.join(csvpad,'all_{}_{}.csv'.format(model, scenario)))

ZW_LSM_LT_191112310100_NLKDMC00_000125375_REF2015S1.zip already downloaded
['Discharge mean(m³/s)', 'Velocity mean (m/s) ']
['Lat.disch.mean(m3/s)', 'Defined Lateral Mean', 'Diff. Actual-Defined']
ZW_LSM_LT_191212310100_NLKDMC00_000125376_REF2015S1.zip already downloaded
['Discharge mean(m³/s)', 'Velocity mean (m/s) ']
['Lat.disch.mean(m3/s)', 'Defined Lateral Mean', 'Diff. Actual-Defined']
ZW_LSM_LT_191312310100_NLKDMC00_000125377_REF2015S1.zip already downloaded
['Discharge mean(m³/s)', 'Velocity mean (m/s) ']
['Lat.disch.mean(m3/s)', 'Defined Lateral Mean', 'Diff. Actual-Defined']


In [15]:
data_all.head()

parameters,Discharge mean(m³/s),Discharge mean(m³/s),Discharge mean(m³/s),Discharge mean(m³/s),Lat.disch.mean(m3/s),Lat.disch.mean(m3/s),Lat.disch.mean(m3/s),Lat.disch.mean(m3/s),Lat.disch.mean(m3/s),Lat.disch.mean(m3/s),Lat.disch.mean(m3/s),Lat.disch.mean(m3/s),Lat.disch.mean(m3/s),Lat.disch.mean(m3/s),Lat.disch.mean(m3/s),Lat.disch.mean(m3/s),Lat.disch.mean(m3/s),Lat.disch.mean(m3/s),Lat.disch.mean(m3/s),Lat.disch.mean(m3/s),Lat.disch.mean(m3/s)
locations,R_RT_002_1,R_MS_014_1,R_ARK_6146_1,R_NDB_23019_1,LatPrcp_DM6023,LatPrcp_DM6020,LatPrcp_DM6021,LatPrcp_DM6022,LatEvap_DM6021,LatEvap_DM6023,...,ER_62804,RWZI220206,Onl_DM6021,Onl_DM6022,Onl_DM6023_54d,Onl_DM6020,ER_62819,ER_62932,RWZI220226,ER_69073
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1911-01-01,5570.5,-2.0,1.805414,4842.900391,0.03824,0.102919,0.096067,0.034598,-0.001359,-0.005238,...,0.0,0.045,1.09,0.0,2.64,0.0,0.0,0.0,0.035,0.0
1911-01-02,2691.737549,-2.0,-2.305328,4406.526367,0.038477,0.102753,0.095933,0.034697,-0.001357,-0.005232,...,0.0,0.045,1.09,0.0,2.64,0.0,0.0,0.0,0.035,0.0
1911-01-03,2390.390137,-2.0,-0.509345,2786.27417,0.139902,0.031162,0.038088,0.077327,-0.000679,-0.002619,...,0.0,0.045,1.09,0.0,2.64,0.0,0.0,0.0,0.035,0.0
1911-01-04,2251.71875,-2.0,0.386871,2474.795898,0.0,0.027947,0.030339,0.032595,-0.000679,-0.002619,...,0.0,0.045,1.09,0.0,2.64,0.0,0.0,0.0,0.035,0.0
1911-01-05,2133.328369,-2.0,0.605815,2242.54126,0.000418,0.007141,0.002428,0.00042,-0.000682,-0.002628,...,0.0,0.045,1.09,0.0,2.64,0.0,0.0,0.0,0.035,0.0
