# Preprocessing of real data

This notebook allows to create hourly data with precipitation, streamflow, potential evapotranspiration and date from the following files:

- MeteoSwiss combiprecip product
- Hourly streamflow data
- Daily minimum, maximal and mean temperature to compute the daily potential evapotranspiration, which is then distributed evenly over 24 hours and saved at an hourly resolution.
- Daily precipitation and streamflow data is also used to fill some possible gaps in the hourly ones.

/!\ The Pyeto Python package should be downloaded from [here](https://github.com/woodcrafty/PyETo) if it is not already present in the root folder of your repository.

# START by dowloading the file 'data.zip' using the link below and extract it. Save the resulting 'data' folder in the folder 'experiments'.

https://www.dropbox.com/scl/fi/mo4xg95ktj7yt09e1o78a/data.zip?rlkey=9m8e4nshv831g7q129c1vsy53&st=rgy8wp05&dl=0

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import math
from datetime import datetime, timedelta, date
import sys
sys.path.append('../../')
import pyeto


ALL_GIS_IDs = ['44']


def toYearFraction(date):
    def sinceEpoch(date): # returns seconds since epoch
        return (date.timestamp())
    s = sinceEpoch

    year = date.year
    startOfThisYear = datetime(year=year, month=1, day=1)
    startOfNextYear = datetime(year=year+1, month=1, day=1)

    yearElapsed = s(date) - s(startOfThisYear)
    yearDuration = s(startOfNextYear) - s(startOfThisYear)
    fraction = yearElapsed/yearDuration

    return date.year + fraction

In [2]:
pd.read_csv('../data/real_data/polygons_CH1903_LV95_area_weighted_combiprecip/44.csv')

Unnamed: 0,GIS_ID,min,max,sum,count,mean,std,datetime
0,44,0.082580,0.962813,13.472302,58.72076,0.229430,0.162004,20050101000000
1,44,0.000000,0.354984,4.575424,58.72076,0.077918,0.066220,20050101010000
2,44,0.044690,0.354984,5.555367,58.72076,0.094607,0.056280,20050101020000
3,44,0.060749,1.039622,15.512222,58.72076,0.264169,0.178919,20050101030000
4,44,0.035498,0.708286,10.640811,58.72076,0.181210,0.127028,20050101040000
...,...,...,...,...,...,...,...,...
155824,44,0.000000,0.000000,0.000000,58.72076,0.000000,0.000000,20221106190000
155825,44,0.000000,0.006075,0.002030,58.72076,0.000035,0.000457,20221106200000
155826,44,0.000000,0.015260,0.425136,58.72076,0.007240,0.004407,20221106210000
155827,44,0.000000,0.000000,0.000000,58.72076,0.000000,0.000000,20221106220000


In [3]:
path_J = '../data/real_data/polygons_CH1903_LV95_area_weighted_combiprecip/'
path_daily = '../data/real_data/Daily_Data/'
path_Q  = '../data/real_data/hourly_streamflow/'
file_catchprop = '../data/CH_Catchments_Geodata_MF_20221209.csv'

In [4]:
df = pd.read_csv(file_catchprop, header=None,  engine='python')
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,149,150,151,152,153,154,155,156,157,158
0,,,,,,,QUALITY,,,,...,,Quaternary Deposits CH 1:25000,,,,,,,,
1,gridcode,gridcode,Gewaesser,Station,Betreiber,Flaeche [km2],LAGE,WaterBalance,Hydrograph,Datenqualität,...,Alps_Sediments,TOTAL,artificial,other,alluvial,glacial,swamp,debris,landslide,waters
2,GIS_ID,org_ID,Gewaesser,Station,Betreiber,EZG,1 = Alpine / 2 = Midlands / 3 = Other,1 = good / 2 = ok / 3 = poor,1 = good / 2 = ok / 3 = poor,INFO,...,sedimentary rocks,total,artificial,other,alluvial,glacial,swamp,debris,landslide,waters
3,0,2019,Aare,Brienzwiler,BAFU,5540,1,3,,hydropeaking,...,165,556,01,00,21,330,02,175,13,14
4,1,2034,Broye,"Payerne, Caserne d'aviation",BAFU,4178,2,1,,,...,130,759,07,37,47,540,35,60,31,00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
392,407,,Durach,Schaffhausen,SCHAFFHAUSEN,449,3,2,,,...,00,349,02,02,126,39,00,176,04,00
393,408,,Durach,Merishausen,SCHAFFHAUSEN,106,3,2,,,...,00,344,00,02,109,10,00,213,10,00
394,409,,Fochtelgraben,Neunkirch,SCHAFFHAUSEN,65,3,2,,,...,00,827,00,00,265,00,01,512,49,00
395,410,,Halbach,Hallau,SCHAFFHAUSEN,107,3,1,,,...,00,739,01,00,210,82,00,415,30,00


In [5]:
df = pd.read_csv(file_catchprop, header=None,  engine='python')

data_type = df.iloc[0,:]
description = df.iloc[1,:]
IDs = df.iloc[2,:]
df = df.drop([0,1,2])
rows_idx = list(df.iloc[:,0])

df_info = df.iloc[:,[0,9]]
df_info.columns = ['GIS_ID', 'INFO']

categories = ['Area', 'Quality', 'Response', 'Climate', 'Altitude', 'Slope', 'runoff accumulation', 
              'storage capacity', 'permeability', 'waterlogging', 'thoroughness', 'land use',
             'ground cover' ] + 5*['Geology'] + ['Quaternary Deposits']
category = None
category2feature = {'Area': []}
count = 0

for i in range(len(data_type)):
    if type(data_type[i])!=str:
        category2feature[categories[count]].append(IDs[i])
    else:
        count += 1
        try:
            category2feature[categories[count]].append(IDs[i])
        except:
            category2feature[categories[count]] = [IDs[i]]
        
features2idxcategory = {}
for i,cat in enumerate(categories):
    for feature in category2feature[cat]:
        features2idxcategory[feature] = i
df.columns = IDs


df_catchment_names = df[list(IDs[:5])+['H_MIN','H_MAX', 'H_MEAN']]#df[list(IDs[:5])]
df_catchment_names.head()


df.columns = IDs
gis_id = df['GIS_ID']
df = df.drop(columns=['GIS_ID'])
df.index = rows_idx

df_infos = df.iloc[:,1:4]
df_data = df.iloc[:,4:]
df_data = df_data.drop(df_data.columns[[3, 4]],axis = 1)


Betreiber2acronym = {'BAFU':'BAFU-',
                     'AARGAU':'AG',
                     'SOLOTHURN':'SO-',
                     'BERN':'BE-',
                     'BASEL LANDSCHAFT': 'BL-',
                     'LUZERN':'LU-',
                     'ZUERICH':'ZH-'}

def get_acronym(serie):
    res = []
    for name in serie:
        res.append(Betreiber2acronym[name])
    return res
df_catchment_names = df_catchment_names.dropna(subset=['org_ID'])
df_catchment_names['catchment_name'] = get_acronym(df_catchment_names['Betreiber'].astype(str)) + df_catchment_names["org_ID"]
df_catchment_names.index = range(len(df_catchment_names['catchment_name']))
df_catchment_names['catchment_name'] = df_catchment_names['catchment_name'].apply(lambda x: x.replace('_','-'))

In [6]:
df_catchment_names

2,GIS_ID,org_ID,Gewaesser,Station,Betreiber,H_MIN,H_MAX,H_MEAN,catchment_name
0,0,2019,Aare,Brienzwiler,BAFU,570,4273,2131,BAFU-2019
1,1,2034,Broye,"Payerne, Caserne d'aviation",BAFU,440,1514,721,BAFU-2034
2,2,2053,Drance,"Martigny, Pont de Rossettan",BAFU,478,4312,2247,BAFU-2053
3,3,2070,Emme,"Emmenmatt, nur Hauptstation",BAFU,639,2221,1070,BAFU-2070
4,4,2078,Poschiavino,"Le Prese, stazione principale",BAFU,965,3891,2126,BAFU-2078
...,...,...,...,...,...,...,...,...,...
268,336,4329,Hintere Frenke,Reigoldswil,BASEL LANDSCHAFT,486,1168,735,BL-4329
269,389,FG_0346,Aabach,Lenzburg,AARGAU,392,878,563,AGFG-0346
270,391,4502,Stüsslingerbach,Lostorf,SOLOTHURN,416,957,575,SO-4502
271,401,A093,Weisse Luetschine,Grindelwald,BERN,992,4107,2686,BE-A093


In [7]:
df_catchment_names = df_catchment_names[df_catchment_names['GIS_ID'].apply(lambda x : (x in ALL_GIS_IDs))]

# PET

In [8]:
import pickle
if True:
    from geopy.geocoders import Nominatim
    geolocator = Nominatim(user_agent='myapplication')

    def get_latitude(name_station):
        location = geolocator.geocode(name_station)
        try:
            return location.raw['lat']
        except:
            print(name_station)
            return 'NAN'
    latitudes = list(map(lambda x: get_latitude(x), df_catchment_names['Gewaesser']))
    dic_latitudes = {}
    for i in range(len(df_catchment_names['Gewaesser'])):
        dic_latitudes[df_catchment_names['GIS_ID'].iloc[i]] = latitudes[i]
    with open('../data/real_data/dic_latitudes.pkl', 'wb') as handle:
        pickle.dump(dic_latitudes, handle, protocol=pickle.HIGHEST_PROTOCOL)
else:
    with open('../data/real_data/dic_latitudes.pkl', 'rb') as handle:
        dic_latitudes = pickle.load(handle)
    #latitudes = np.load('/mydata/watres/quentin/code/FLOW/data/latitudes.npy')

In [9]:
print(len(dic_latitudes))
df_catchment_names

1


2,GIS_ID,org_ID,Gewaesser,Station,Betreiber,H_MIN,H_MAX,H_MEAN,catchment_name
43,44,2300,Minster,"Euthal, Rüti",BAFU,890,2282,1351,BAFU-2300


In [10]:
df_catchment_names['latitude'] = [dic_latitudes[GISID] for GISID in df_catchment_names['GIS_ID']]
print('Number of catchments: ', df_catchment_names.shape[0])
print('Add Latitudes')
df_catchment_names = df_catchment_names.drop(df_catchment_names[df_catchment_names['latitude'] == 'NAN'].index)
print('Number of catchments: ', df_catchment_names.shape[0])

# Filter catchment with data issues
locations_pb = df_info.dropna(subset=['INFO'])['GIS_ID'].to_numpy()
print('Remove catchments with data issues')
df_catchment_names = df_catchment_names[df_catchment_names['GIS_ID'].apply(lambda x: not(x in locations_pb))]
print('Number of catchments: ', df_catchment_names.shape[0])

Number of catchments:  1
Add Latitudes
Number of catchments:  1
Remove catchments with data issues
Number of catchments:  1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_catchment_names['latitude'] = [dic_latitudes[GISID] for GISID in df_catchment_names['GIS_ID']]


In [11]:
def get_PET_hargreaves(tmin,tmean,tmax,Date,latitude):
    lat = pyeto.deg2rad(float(latitude))  # Convert latitude to radians
    day_of_year = Date.timetuple().tm_yday
    sol_dec = pyeto.sol_dec(day_of_year)            # Solar declination
    sha = pyeto.sunset_hour_angle(lat, sol_dec)
    ird = pyeto.inv_rel_dist_earth_sun(day_of_year)
    et_rad = pyeto.et_rad(lat, sol_dec, sha, ird)   # Extraterrestrial radiation
    tmax = max(tmax,tmin)
    tmin = min(tmin,tmax)
    tmean = max(min(tmean,tmax),tmin)
    return pyeto.hargreaves(tmin, tmax, tmean, et_rad)

# Precipitation

In [12]:
def process_precip(GISID):
    path_fluxes = path_J + '{0}.csv'.format(GISID)
    path_hydro = path_Q + '{0}.csv'.format(GISID)
    fluxes = pd.read_csv(path_fluxes, sep=",")
    fluxes['datetime'] = fluxes['datetime'].apply(lambda x: datetime.strptime(str(x), '%Y%m%d%H%M%S'))
    fluxes = fluxes.sort_values(by='datetime', ascending=True)
    fluxes['t'] = np.array([toYearFraction(t) for t in fluxes['datetime']])
    fluxes = fluxes.sort_values(by=['datetime'])
    fluxes = fluxes.reset_index()
    fluxes = fluxes[['mean', 'datetime', 't']]
    fluxes = fluxes.rename(columns={"mean": "precip"})

    dayfluxes = pd.read_csv(path_daily+'sw_rainfall_timeseries/psw_{0}.csv'.format(GISID), sep=",")
    dayfluxes['datetime'] = dayfluxes['datetime'].apply(lambda x: datetime.strptime(str(x), '%Y-%m-%d'))

    diffhour = (fluxes['datetime'].diff() > pd.to_timedelta('1 hour')).to_numpy()
    idxs = np.where(diffhour>0.5)[0]

    missing_dates_as_day = []
    for idx in idxs:
        current_date_hour = fluxes.iloc[idx-1]['datetime']
        next_date = (fluxes.iloc[idx]['datetime']+ pd.to_timedelta('24 hour')).date()
        while current_date_hour.date()!=next_date:
            date = current_date_hour.date()
            day_idx = np.where(dayfluxes['datetime'].apply(lambda x:x.date())==date)[0]
            if len(day_idx)==1:
                precipday = dayfluxes.iloc[day_idx[0]]['precip']
                missing_dates_in_hour = []
                date_in_hour = datetime.strptime(str(date), '%Y-%m-%d')
                for j in range(24):
                    idx_date_in_hour = np.where(fluxes['datetime']==date_in_hour)[0]
                    if len(idx_date_in_hour)==0:
                        missing_dates_in_hour.append(date_in_hour)
                    else:
                        precipday = precipday - fluxes.iloc[idx_date_in_hour[0]]['precip']
                        precipday = max(0,precipday) 
                    date_in_hour = pd.to_timedelta('1 hour') + date_in_hour

                for date_in_hour in missing_dates_in_hour:
                    idx_date_in_hour = np.where(fluxes['datetime']==date_in_hour)[0]
                    new_row = {'datetime':date_in_hour, 't':toYearFraction(date_in_hour), 'precip': precipday/len(missing_dates_in_hour) }
                    fluxes = pd.concat([fluxes, pd.DataFrame([new_row])], ignore_index=True)
            else:
                missing_dates_as_day.append(date)


            current_date_hour = current_date_hour + pd.to_timedelta('24 hour')

    fluxes = fluxes.sort_values(by=['datetime']) 
    fluxes = fluxes.reset_index(drop=True)

    idxlow = np.where([date<=datetime.strptime('2010-01-01', '%Y-%m-%d').date() for date in missing_dates_as_day])[0]
    if len(idxlow)!=0:
        datelow = np.array(missing_dates_as_day)[idxlow][-1]
        datelow = datelow + pd.to_timedelta('24 hour')
        fluxes = fluxes[fluxes['datetime']>=datetime.strptime(str(datelow), '%Y-%m-%d')]
        fluxes = fluxes.sort_values(by=['datetime']) 
        fluxes = fluxes.reset_index(drop=True)

    idxup = np.where([date>=datetime.strptime('2010-01-01', '%Y-%m-%d').date() for date in missing_dates_as_day])[0]
    if len(idxup)!=0:
        dateup = np.array(missing_dates_as_day)[idxup][0]
        dateup = dateup - pd.to_timedelta('24 hour')
        fluxes = fluxes[fluxes['datetime']<=datetime.strptime(str(dateup), '%Y-%m-%d')]
        fluxes = fluxes.sort_values(by=['datetime']) 
        fluxes = fluxes.reset_index(drop=True)
    
    return fluxes

# Streamflow

In [13]:
def process_hydro(GISID):
    files = os.listdir(path_Q)
    cat_name = df_catchment_names[df_catchment_names['GIS_ID']==GISID]['catchment_name'].values[0]

    idx_file = np.where([(cat_name in file) for file in files])[0][0]
    file = files[idx_file]

    hydro = pd.read_csv(path_Q+file, sep=",")
    hydro['datetime'] = hydro['datetime'].apply(lambda x: datetime.strptime(str(x), '%Y-%m-%d %H:%M:%S'))
    hydro = hydro.sort_values(by='datetime', ascending=True)
    hydro['t'] = np.array([toYearFraction(t) for t in hydro['datetime']])
    hydro = hydro.sort_values(by=['datetime'])
    hydro = hydro.reset_index()
    hydro = hydro[['cms', 'datetime', 't']]
    hydro = hydro.rename(columns={"cms": "discharge"})


    dayhydro = pd.read_csv(path_daily+'sw_hydrographs/sw_{0}.csv'.format(GISID), sep=",")
    dayhydro['datetime'] = dayhydro['datetime'].apply(lambda x: datetime.strptime(str(x), '%Y-%m-%d'))

    diffhour = (hydro['datetime'].diff() > pd.to_timedelta('1 hour')).to_numpy()
    idxs = np.where(diffhour>0.5)[0]
    
    missing_dates_as_day = []
    for idx in idxs:
        current_date_hour = hydro.iloc[idx-1]['datetime']
        next_date = (hydro.iloc[idx]['datetime']+ pd.to_timedelta('24 hour')).date()
        while current_date_hour.date()!=next_date:
            date = current_date_hour.date()
            day_idx = np.where(dayhydro['datetime'].apply(lambda x:x.date())==date)[0]
            if len(day_idx)==1:
                dischargeday = dayhydro.iloc[day_idx[0]]['discharge']
                missing_dates_in_hour = []
                date_in_hour = datetime.strptime(str(date), '%Y-%m-%d')
                for j in range(24):
                    idx_date_in_hour = np.where(hydro['datetime']==date_in_hour)[0]
                    if len(idx_date_in_hour)==0:
                        missing_dates_in_hour.append(date_in_hour)
                    else:
                        dischargeday = dischargeday - hydro.iloc[idx_date_in_hour[0]]['discharge']
                        dischargeday = max(0,dischargeday) 
                    date_in_hour = pd.to_timedelta('1 hour') + date_in_hour

                for date_in_hour in missing_dates_in_hour:
                    idx_date_in_hour = np.where(hydro['datetime']==date_in_hour)[0]
                    new_row = {'datetime':date_in_hour, 't':toYearFraction(date_in_hour), 'discharge': dischargeday/len(missing_dates_in_hour) }
                    hydro = pd.concat([hydro, pd.DataFrame([new_row])], ignore_index=True)
            else:
                missing_dates_as_day.append(date)


            current_date_hour = current_date_hour + pd.to_timedelta('24 hour')

    hydro = hydro.sort_values(by=['datetime']) 
    hydro = hydro.reset_index(drop=True)

    idxlow = np.where([date<=datetime.strptime('2010-01-01', '%Y-%m-%d').date() for date in missing_dates_as_day])[0]
    if len(idxlow)!=0:
        datelow = np.array(missing_dates_as_day)[idxlow][-1]
        datelow = datelow + pd.to_timedelta('24 hour')
        hydro = hydro[hydro['datetime']>=datetime.strptime(str(datelow), '%Y-%m-%d')]
        hydro = hydro.sort_values(by=['datetime']) 
        hydro = hydro.reset_index(drop=True)

    idxup = np.where([date>=datetime.strptime('2010-01-01', '%Y-%m-%d').date() for date in missing_dates_as_day])[0]
    if len(idxup)!=0:
        dateup = np.array(missing_dates_as_day)[idxup][0]
        dateup = dateup - pd.to_timedelta('24 hour')
        hydro = hydro[hydro['datetime']<=datetime.strptime(str(dateup), '%Y-%m-%d')]
        hydro = hydro.sort_values(by=['datetime']) 
        hydro = hydro.reset_index(drop=True)

    return hydro

# Merging the dataset

In [14]:
def merging_dfs(fluxes, hydro):
    mindate = hydro['datetime'].to_numpy()[0]
    mindate = max(mindate, fluxes['datetime'].to_numpy()[0])

    maxdate = hydro['datetime'].to_numpy()[-1]
    maxdate = min(maxdate, fluxes['datetime'].to_numpy()[-1])
    hydro = hydro[hydro['datetime']>=mindate]
    hydro = hydro[hydro['datetime']<=maxdate]
    hydro = hydro.sort_values(by=['datetime']) 
    hydro = hydro.reset_index(drop=True)

    fluxes = fluxes[fluxes['datetime']>=mindate]
    fluxes = fluxes[fluxes['datetime']<=maxdate]
    fluxes = fluxes.sort_values(by=['datetime']) 
    fluxes = fluxes.reset_index(drop=True)

    dfdata = {'discharge':hydro['discharge'].to_numpy(),
              'precip': fluxes['precip'].to_numpy(),
              't': fluxes['t'].to_numpy(),
              'datetime': fluxes['datetime'].to_numpy()}
    dfdata = pd.DataFrame(dfdata)
    return dfdata

# Processing all stations

In [15]:
from tqdm import tqdm
for index, row in tqdm(df_catchment_names.iterrows()):
        GISID = row['GIS_ID']
        hydro = process_hydro(GISID)
        fluxes = process_precip(GISID)
        data = merging_dfs(fluxes, hydro)
    
        # Temperature
        Tmax = pd.read_csv(path_daily+'Tmax/TmaxD_GIS_ID-{0}.csv'.format(GISID), header=0,  engine='python')[['mean','datetime']]
        Tmax['datetime'] = Tmax['datetime'].apply(lambda x: datetime.strptime(str(x), '%Y-%m-%d'))
        Tmax = Tmax.rename(columns={'mean': 'tmax'})
        Tmax.sort_values(by=['datetime'])
        Tmin = pd.read_csv(path_daily+'Tmin/TminD_GIS_ID-{0}.csv'.format(GISID), header=0,  engine='python')[['mean','datetime']]
        Tmin['datetime'] = Tmin['datetime'].apply(lambda x: datetime.strptime(str(x), '%Y-%m-%d'))
        Tmin.sort_values(by=['datetime'])
        Tmin = Tmin.rename(columns={'mean': 'tmin'})
        Tabs = pd.read_csv(path_daily+'Tabs/TabsD_GIS_ID-{0}.csv'.format(GISID), header=0,  engine='python')[['mean','datetime']]
        Tabs['datetime'] = Tabs['datetime'].apply(lambda x: datetime.strptime(str(x), '%Y-%m-%d'))
        Tabs.sort_values(by=['datetime'])
        Tabs = Tabs.rename(columns={'mean': 'tabs'})
    
        data_daily = Tmin
        data_daily = pd.merge(data_daily, Tmax, on="datetime", how="inner")
        data_daily = pd.merge(data_daily, Tabs, on="datetime", how="inner")
    
        # PET
        lst = np.arange(0,len(data_daily),1)
        PET = np.array(list(map(lambda t: get_PET_hargreaves(data_daily.iloc[t]['tmin'], data_daily.iloc[t]['tabs'], data_daily.iloc[t]['tmax'], data_daily['datetime'][t], dic_latitudes[GISID]), lst) ))
        data_daily['pet'] = PET
        data_daily.set_index('datetime', inplace=True)
        data_hourly = data_daily.resample('H').ffill()
        data = pd.merge(data, data_hourly, on="datetime", how="inner")
        data.to_csv('../data/real_data/GISID2hourly_data_withPET/{0}.csv'.format(GISID), index=False, header=True)    

1it [00:14, 14.62s/it]


In [16]:
data

Unnamed: 0,discharge,precip,t,datetime,tmin,tmax,tabs,pet
0,0.350167,0.229430,2005.000114,2005-01-01 00:00:00,-2.403909,3.026066,0.379802,0.540212
1,0.351000,0.077918,2005.000228,2005-01-01 01:00:00,-2.403909,3.026066,0.379802,0.540212
2,0.351833,0.094607,2005.000342,2005-01-01 02:00:00,-2.403909,3.026066,0.379802,0.540212
3,0.352500,0.264169,2005.000457,2005-01-01 03:00:00,-2.403909,3.026066,0.379802,0.540212
4,0.353167,0.181210,2005.000571,2005-01-01 04:00:00,-2.403909,3.026066,0.379802,0.540212
...,...,...,...,...,...,...,...,...
136580,0.618667,0.000000,2020.581626,2020-07-31 20:00:00,15.530965,26.443121,21.434227,4.725457
136581,0.611667,0.000000,2020.581740,2020-07-31 21:00:00,15.530965,26.443121,21.434227,4.725457
136582,0.607000,0.000000,2020.581853,2020-07-31 22:00:00,15.530965,26.443121,21.434227,4.725457
136583,0.605000,0.000000,2020.581967,2020-07-31 23:00:00,15.530965,26.443121,21.434227,4.725457
