In [47]:
import h5py
import numpy as np
from IU_scrape_func import scrape_dwd
from IU_scrape_func import scrape_wetter_com
from IU_scrape_func import scrape_wetter_de
from IU_scrape_func import scrape_sites_append
from IU_scrape_func import scrape_sites_open
import datetime

file_name_sites = 'IU_scrape_sites.csv'
file_name_data = 'IU_scrape_data'
ds_name_fl = 'data_float'
ds_name_st = 'data_string'

#Create csv file with websites to scrape
#websites = ['https://www.dwd.de/DE/leistungen/klimadatendeutschland/klimadatendeutschland.html',
#            'https://www.wetter.com/wetter_aktuell/wettervorhersage/morgen/deutschland/berlin/berlin-tempelhof/DE2823538.html',
#            'https://www.wetter.de/wetter/r/162894/diese-woche'
#           ]
#for x in websites:
#    scrape_sites_append(file_name_sites, x)

#Open csv file with websites to scrape
sites = scrape_sites_open('IU_scrape_sites.csv')

#Create & open file if it does not exist
try:
    file = h5py.File(file_name_data,'r+')
except FileNotFoundError:
    file = h5py.File(file_name_data,'w')    

#Created datasets if they do not exist
if not ds_name_fl in list(file.keys()):
    dset_fl = file.create_dataset(ds_name_fl, (0,4), maxshape=(None,4), dtype = 'float64') 
else:
    dset_fl = file[f'/{ds_name_fl}']
    
if not ds_name_st in list(file.keys()):
    dt = h5py.string_dtype(encoding='utf-8')
    dset_st = file.create_dataset(ds_name_st, (0,3), maxshape=(None,3), dtype = dt) 
else:
    dset_st = file[f'/{ds_name_st}']

if not dset_fl.shape[0] == dset_st.shape[0]:
    print('Dataset-lengths do not correspond!')

data_fl = np.array([scrape_dwd(sites[0]),scrape_wetter_com(sites[1]),scrape_wetter_de(sites[2])])
data_st = np.array([[datetime.datetime.now().strftime("%Y%m%d %H:%M:%S"),'DWD','0'], \
                    [datetime.datetime.now().strftime("%Y%m%d %H:%M:%S"),'Wetter.com','1'], \
                    [datetime.datetime.now().strftime("%Y%m%d %H:%M:%S"),'Wetter.de','2']
                   ]
                  )

dset_st.resize(dset_st.shape[0]+3, axis=0)
dset_fl.resize(dset_fl.shape[0]+3, axis=0)
dset_st[dset_st.shape[0]-3:dset_st.shape[0],:] = data_st
dset_fl[dset_fl.shape[0]-3:dset_fl.shape[0],:] = data_fl

#To get rid of b: .decode("utf-8")
#for x in range(0,len(dset_st)):
#    print(dset_st[x,:], dset_fl[x,:])

print(dset_fl[...])
print(dset_st[...])

#Close file
file.close()


[[2.02309180e+07 0.00000000e+00 1.65000000e+01 2.62999992e+01]
 [2.02309200e+07 1.00000000e+00 1.50000000e+01 2.40000000e+01]
 [2.02309200e+07 2.00000000e+00 1.50000000e+01 2.50000000e+01]
 [2.02309190e+07 0.00000000e+00 1.58000000e+01 2.11000000e+01]
 [2.02309210e+07 1.00000000e+00 1.50000000e+01 2.70000000e+01]
 [2.02309210e+07 2.00000000e+00 1.40000000e+01 2.80000000e+01]]
[[b'20230919 23:41:54' b'DWD' b'0']
 [b'20230919 23:41:54' b'Wetter.com' b'1']
 [b'20230919 23:41:54' b'Wetter.de' b'2']
 [b'20230920 12:57:33' b'DWD' b'0']
 [b'20230920 12:57:33' b'Wetter.com' b'1']
 [b'20230920 12:57:33' b'Wetter.de' b'2']]


In [46]:
file1 = h5py.File('IU_scrape_data','r+')
file2 = h5py.File('IU_scrape_data_BACKUP_1','r+')
file1['data_float'].resize(3,axis=0)
file1['data_string'].resize(3,axis=0)
print(file1['data_float'][...])
print(file1['data_string'][...])
file1['data_float'][...] = file2['data_float'][...]
file1['data_string'][...] = file2['data_string'][...]
print(file1['data_float'][...])
print(file1['data_string'][...])
file1.close()
file2.close()

[[2.02309180e+07 0.00000000e+00 1.65000000e+01 2.62999992e+01]
 [2.02309200e+07 1.00000000e+00 1.50000000e+01 2.40000000e+01]
 [2.02309200e+07 2.00000000e+00 1.50000000e+01 2.50000000e+01]]
[[b'20230920 12:45:27' b'DWD' b'0']
 [b'20230920 12:45:27' b'Wetter.com' b'1']
 [b'20230920 12:45:27' b'Wetter.de' b'2']]
[[2.02309180e+07 0.00000000e+00 1.65000000e+01 2.62999992e+01]
 [2.02309200e+07 1.00000000e+00 1.50000000e+01 2.40000000e+01]
 [2.02309200e+07 2.00000000e+00 1.50000000e+01 2.50000000e+01]]
[[b'20230919 23:41:54' b'DWD' b'0']
 [b'20230919 23:41:54' b'Wetter.com' b'1']
 [b'20230919 23:41:54' b'Wetter.de' b'2']]
