In [None]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import datetime as dt
import os

import scheme_mar2023 as scheme
import mytools as mt
import baikalfunctions as bf

In [None]:
plt.style.use('Solarize_Light2')   # try another styles: 'classic'
plt.rcParams['figure.figsize'] = [15, 15]
driver = 'G:/'  # '~/'
pwd = os.path.join(driver, "1_Data1/sea/")
destData = os.path.join(pwd, "AllSeaGisOut.txt")
obrisData = os.path.join(driver, "1_Data1/obris/obris.dat")
batimetrFile = os.path.join(driver, "1_Data1/batimetria/b1.txt")
pwd

In [None]:
obris = pd.read_csv(obrisData,
                    header=0,
                    na_values='--',
                    sep='\t',
                    decimal=',',
                    )
obris.head(3)

In [None]:
DLAT = 0.01  # 0.01 => 1.11km
DLNG = 0.02  # 0.02 => 1.4km.. 1.24km (@ lat = 51.. 56)


In [None]:
gdf = pd.read_csv('wholeBaikalGrid001002.dat', sep='\t')
gdf['Longitude'] = gdf['long']/1000
gdf['Latitude'] = gdf['lat']/1000
gdf['norm_x'] = (gdf['toWest']/(gdf['toWest'] + gdf['toEast'])*25).astype('int')
gdf['norm_y'] = (gdf['toSud']/(gdf['toSud'] + gdf['toNord'])*400).astype('int')

gdf.loc[(gdf['long']==106000) & (gdf['lat']==52000), :]

In [None]:
df = pd.read_csv(os.path.join(pwd, "AllSeaO1.txt"),
                 header=0,
                 # skip_blank_lines=True,
                 skipinitialspace=True,
                 na_values='--',
                 sep='\t',
                 decimal=',',
                 #nrows=10000,
                 parse_dates=['DateTime'],
                 dayfirst=False,
                 )

In [None]:
df.loc[df['WaterFlow1'] < 1.5, 'TempWtr'] = np.nan

In [None]:
df.shape, df.columns

In [None]:
## Work with few coloumns
cols_stay = ['DateTime', 'Longitude', 'Latitude', 'pCO2Air', 'pCH4Air', 'pCO2Wtr', 'pCH4Wtr', 'cCO2Wtr', 'cCH4Wtr', 'TempWtr', 'WaterFlow1']
df.drop(columns=[x for x in df.columns if x not in cols_stay], inplace=True)

In [None]:
df['year'] = df['DateTime'].dt.year
df['dateSec'] = df['DateTime'].astype('int64')/10**9  ## seconds
df['dateSec'] = df['dateSec'].astype('int64')
df

In [None]:
param = 'cCO2Wtr'
#param = 'TempWtr'
#param = 'WaterFlow1'

In [None]:
df.dropna(subset=param)

In [None]:
bloome_time = [['2021-06-09 13:40:00', '2021-06-12 16:50:00'],
              ['2022-06-03 07:30:00', '2022-06-04 22:45:00'],
              ['2022-06-05 16:50:00', '2022-06-05 23:30:00'],
              ['2022-06-09 12:40:00', '2022-06-09 21:30:00'],
              ['2022-06-13 15:00:00', '2022-06-14 03:20:00'],
              ['2023-06-05 03:40:00', '2023-06-07 20:00:00'],
              ['2024-06-04 16:40:00', '2024-06-07 02:30:00'],
              ['2024-06-11 13:50:00', '2024-06-12 18:40:00'],]

df.loc[:, 'blooming'] = 0
for event in bloome_time:
    df.loc[df['DateTime'].between(pd.to_datetime(event[0], yearfirst=True), pd.to_datetime(event[1], yearfirst=True)),'blooming'] = 1
df.loc[:,'blooming_an'] = ((df['WaterFlow1'].shift(-1).rolling(5).mean() - df['WaterFlow1'].shift(1).rolling(5).mean())*10)**2

df['blooming_an'].describe()

In [None]:
for yr in range(2013, 2026):
    t = df.loc[(df['year'] == yr), param].dropna().shape
    print(f'{yr}: {t[0]}')

In [None]:
plt.rcParams['figure.figsize'] = [15, 5]
plt.scatter('DateTime', param, data=df, s=1)
plt.grid(True)

In [None]:
df.dropna(subset=param).sort_values(by=param).tail(10)

## Bloooming test

In [None]:
years = [2013, 2016, 2017, 2018, 2021, 2022, 2023, 2024]
plt.style.use('default')   # try another styles: 'classic'
plt.rcParams['figure.figsize'] = [15, 15]
for y in years:
    data=df.loc[df['year']==y,:]
    plt.plot('long', 'lat', '-', data=obris, alpha=1)
    plt.scatter('Longitude', 'Latitude', data=data, c=data[param], s=10, alpha=1, cmap='rainbow')
    plt.clim(0.2, 2.0)
    plt.colorbar()
    plt.title(f'{param}_{y}')
    plt.savefig(f'{param}_{y}.png')
    plt.close()

## ensuit

In [None]:
central_stations = {}
for key, coor in scheme.central.items():
    central_stations[key] = scheme.get_center(coor)
central_stations

In [None]:
dfc = pd.DataFrame.from_dict(central_stations, orient='index')
dfc.columns = ['Latitude', 'Longitude']
dfp = pd.DataFrame.from_dict(scheme.points, orient='index')
dfp.columns = ['Latitude', 'Longitude']
dfp.reset_index(drop=False, inplace=True, names='place')

In [None]:
plt.rcParams['figure.figsize'] = [15, 17]
plt.plot('long', 'lat', 'b-', data=obris, alpha=1)
plt.scatter('Longitude', 'Latitude', data=df.loc[df['year'] == 2013,:], s=2, alpha=0.1)
#plt.scatter('Longitude', 'Latitude', data=dfj, s=2, c='green', alpha=0.1)
plt.scatter('Longitude', 'Latitude', data=dfc, s=30, c='red', alpha=1)
plt.scatter('Longitude', 'Latitude', data=dfp, s=20, c='red', alpha=1)
#plt.xlim(106.8, 107)
#plt.ylim(52.6, 52.8)
#plt.xlim(104, 105)
#plt.ylim(51.4, 52)
for name, coor in central_stations.items():
    plt.annotate(name, (coor[1]+0.001, coor[0]+0.002))
for name, coor in scheme.points.items():
    plt.annotate(name, (coor[1]+0.001, coor[0]+0.002))
plt.savefig('track_2_3.png')

In [None]:
##  distance routines

In [None]:
df['Station'] = ''
DLAT = 0.01  # 0.01 => 1.11km
DLNG = 0.02  # 0.02 => 1.4km.. 1.24km (@ lat = 51.. 56)
print(bf.dist((82, 51), (82, 51.01)))
for k, v in central_stations.items():
    df.loc[(df['Latitude'].between(v[0]-DLAT, v[0]+DLAT) & df['Longitude'].between(v[1]-DLNG, v[1]+DLNG)), 'Station'] = k

In [None]:
dfg = df.groupby(by=['year', 'Station'])[param].agg(['count', 'mean', 'median', 'std', 'min', 'max'])
dfg

In [None]:
dfg.to_csv(os.path.join(pwd, f'{param}CentralStation.dat'), sep='\t')

In [None]:
meteo = pd.read_csv(os.path.join(pwd, 'meteo.txt'),
                    sep='\t',
                    decimal='.',
                    parse_dates=['DateTime'],
                    dayfirst=False,
                   )

colsneeded = ['DateTime', 'vx', 'vy', 'Wh', 'Wdir']
meteo.drop(columns=[col for col in meteo.columns if col not in colsneeded], axis='columns', inplace=True)
meteo = meteo.loc[meteo['DateTime'] < pd.to_datetime('2024-07-01'), :]


In [None]:
## some wrong data while severe wind unsyncronized the UltraSonic threshold 
meteo.loc[meteo['vy'] > 20, 'vy'] = np.nan

In [None]:
plt.rcParams['figure.figsize'] = [12,5]
plt.scatter('DateTime', 'vx', data=meteo, alpha=0.4, s=1, c='red')
plt.scatter('DateTime', 'vy', data=meteo, alpha=0.4, s=1, c='blue')
plt.xlim(pd.to_datetime('2021-06-06 12:00'), pd.to_datetime('2021-06-07'))

In [None]:
meteo = meteo.resample('1min', on='DateTime').mean()
meteo.reset_index(drop=False, inplace=True)
meteo

In [None]:
meteo.dropna(axis='index', subset=['vx', 'vy'], inplace=True)

In [None]:
meteo

In [None]:
meteo['dateSec'] = meteo['DateTime'].astype('int')/10**9  ## seconds
meteo['dateSec'] = meteo['dateSec'].astype('int64')
meteo = meteo.drop(['DateTime'], axis='columns', errors='ignore')
meteo

In [None]:
df

In [None]:
df = pd.merge(left=df, right=meteo, left_on='dateSec', right_on='dateSec', how='outer').sort_values(by='dateSec')

In [None]:
df

In [None]:
## vesx: RV x offset along longitude, vesy: RV y offset along latitude (meters)
df['vesx'] = bf.dist((df['Longitude'], df['Latitude']), (df['Longitude'].shift(1), df['Latitude'])) * np.sign(df['Longitude']-df['Longitude'].shift(1)) * 1000  ## meters per sec
df['vesy'] = bf.dist((df['Longitude'], df['Latitude']), (df['Longitude'], df['Latitude'].shift(1))) * np.sign(df['Latitude']-df['Latitude'].shift(1)) * 1000
df['dt'] = (df['dateSec'] - df['dateSec'].shift(1))
df['vesSpeed'] = np.sqrt(np.power(df['vesx'], 2) + np.power(df['vesy'], 2)) / df['dt']

In [None]:
df['winSpeed'] = np.sqrt(np.power(df['vx'],2) + np.power((df['vy'] - df['vesy']/df['dt']),2))

In [None]:
df.loc[(df['DateTime'] > pd.to_datetime('2023-06-01 01:10')) & (df['DateTime'] < pd.to_datetime('2023-06-01 01:30')), ['DateTime', 'Longitude', 'vesx', 'Latitude', 'vesy', 'vesSpeed', 'vx', 'vy', 'winSpeed', 'Wh']]

In [None]:
plt.rcParams['figure.figsize'] = [25, 10]
plt.scatter('DateTime', 'vesSpeed', data=df.loc[df['year']==2023,:], s=1, alpha=0.5)
plt.scatter('DateTime', 'winSpeed', data=df.loc[df['year']==2023,:], s=6, alpha=0.5, c='green')
plt.scatter('DateTime', 'Wh', data=df.loc[df['year']==2023,:], s=3, alpha=0.2, c='blue')
#plt.plot('DateTime', 'winSpeed', data=df.loc[df['year']==2023,:], c='green')
plt.legend()
plt.ylim(0, 10)
plt.xlim(pd.to_datetime('2023-06-01 00:00'), pd.to_datetime('2023-06-02 15:00'))

In [None]:
df = df.dropna(axis='index', subset=['Longitude', 'Latitude'], inplace=False)

In [None]:
plt.rcParams['figure.figsize'] = [8, 10]
plt.scatter('Longitude', 'Latitude', data=gdf, c='green', s=0.1)
plt.plot('long', 'lat', 'b-', data=obris, linewidth=1)
plt.grid(alpha=0.1)

In [None]:
df.loc[:,'TimeMinute'] = df['DateTime'].dt.hour * 60 + df['DateTime'].dt.minute
df.loc[:,'TimeHour'] = df['DateTime'].dt.hour
df

In [None]:
res=df.loc[df['depth'].between(-100, 0),:].groupby(['year', 'blooming'], as_index=False).agg({'cCO2Wtr': ['mean','std', 'median', 'var', 'min', 'max']})
#res.columns = res.columns.get_level_values(0)
#res=res.groupby(['TimeHour'], as_index=False).agg({'cCO2Wtr': 'mean'})
res.to_clipboard()
res

In [None]:
res=df.loc[df['depth'].between(-2000, -400),['cCO2Wtr']]
res.to_clipboard()
res

In [None]:
df.loc[:,['year', 'depth']]

In [None]:
plt.scatter('TimeHour', 'cCO2Wtr', data=res, s=10, alpha=1)

In [None]:
longt = [105.02, 105.04, 105.06, 105.08, ]

plt.style.use('default')   # try another styles: 'classic'
plt.rcParams['figure.figsize'] = [7, 7]
plt.fill('long', 'lat', data=obris, alpha=0.1)
plt.scatter('Longitude', 'Latitude', data=df, s=10, c=df['year'], cmap='Grays', marker='o', linewidth=0, alpha=0.5)
plt.plot([105.05, 105.07, 105.07, 105.05, 105.05], [51.905, 51.905, 51.895, 51.895, 51.905], 'g--', alpha=0.5)
plt.scatter([105.06], [51.9], s=100, c='g', alpha=0.5)
plt.plot([105.07, 105.09, 105.09, 105.07, 105.07], [51.895, 51.895, 51.885, 51.885, 51.895], 'g--', alpha=0.5)
plt.scatter([105.08], [51.89], s=100, c='g', alpha=0.5)
plt.plot(obris['long'], obris['lat'], 'b-')

plt.xlim(105.02, 105.12)
plt.ylim(51.86, 51.91)
plt.grid(True, 'major', 'both', c='lightgrey', linestyle='dotted')
plt.savefig('howtoaveraged.png')
# plt.colorbar()

In [None]:
longStep = 0.02
latStep = 0.01

df.loc[:, 'long'] = (df['Longitude']/longStep).round()*longStep
df.loc[:, 'lat'] = (df['Latitude']/latStep).round()*latStep

In [None]:
#func_list = ['mean', 'count']
func_list = ['mean']
cols_to_calc = {
                #'pCO2Wtr': func_list,
                #'pCH4Wtr': func_list,
                'cCO2Wtr': func_list,
                #'cCH4Wtr': func_list,
                #'pCO2Air': func_list,
                #'pCH4Air': func_list,
                #'TempWtr': func_list,
                #'depth': ['mean']
                }
res = df.groupby(['year', 'long', 'lat'], as_index=False).agg(cols_to_calc)
res

In [None]:
res.info()

In [None]:
for yr in range(2013, 2025):
    t = res.loc[res['year'] == yr].dropna(subset=[(param, 'mean')]).shape
    print(f'{yr}: {t[0]}')

In [None]:
plt.rcParams['figure.figsize'] = [9,10]
plt.plot('long', 'lat', data=obris, markersize=1, alpha=1)
plt.scatter('long', 'lat', data=res, s=20, alpha=0.1, c=res[(param, 'mean')], cmap='rainbow')
plt.colorbar()
plt.clim(0,2)
#for yr in [2013, 2016, 2017, 2018, 2020, 2021, 2022, 2023, 2024]:
#for yr in [2024]:
#    plt.scatter('long', 'lat', data=res.loc[res['year']==yr, :], s=20, alpha=0.1, c=res.loc[res['year']==yr, ('dpCO2', 'mean')], cmap='Paired')
    #plt.colorbar()

In [None]:
res['Time'] = res

In [None]:
func_list = ['mean', 'std', 'count']
cols_to_calc = {(param, 'mean'): func_list,
                ('depth', 'mean'): func_list,
               }
res1 = res.groupby(['long', 'lat'], as_index=False).agg(cols_to_calc)
res1

In [None]:
destData

In [None]:
res.to_csv(destData,
           sep='\t',
           )

In [None]:
df.to_csv(os.path.join(pwd, 'AllSeaO1.txt'),
          sep='\t',
          decimal=',',
          )

In [None]:
%matplotlib inline
valMax = res[(param, 'mean')].max

fig, ax = plt.subplots()
ax.set_xlim(106.2, 107.8)
ax.set_ylim(52, 53)

# wether this or those
ax.scatter(res['long'], res['lat'], c=res[('cCH4Wtr', 'mean', 'mean')], s=40, marker='s', linewidth=0, cmap='rainbow', alpha=0.1)
ax.plot(obris['long'], obris['lat'], 'b-')

fig.savefig('gis.png')

In [None]:
df.info()

In [None]:
df.to_csv(os.path.join(pwd, 'all_.txt'), sep='\t')

In [None]:
pwd