In [None]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import datetime as dt
import os

import baikalfunctions as bfunc
import scheme_mar2023 as scheme


In [None]:
plt.style.use('Solarize_Light2')   # try another styles: 'classic'
plt.rcParams['figure.figsize'] = [15, 15]
driver = '~/'
pwd = os.path.join(driver, "1_Data1/sea/")
destData = os.path.join(pwd, "AllSeaGisOut.txt")
obrisData = os.path.join(driver, "1_Data1/obris/obris.dat")
batimetrFile = os.path.join(driver, "1_Data1/batimetria/b1.txt")

In [None]:
obris = pd.read_csv(obrisData,
                    header=0,
                    na_values='--',
                    sep='\t',
                    decimal=',',
                    )
obris.head(3)

In [None]:
df = pd.read_csv(os.path.join(pwd, "AllSea.txt"),
                 header=0,
                 # skip_blank_lines=True,
                 skipinitialspace=True,
                 na_values='',
                 sep='\t',
                 decimal='.',
                 # nrows=100000,
                 parse_dates=['DateTime'],
                 dayfirst=False,
                 )
df.drop(['Unnamed: 0'], axis='columns', inplace=True)

In [None]:
df = pd.read_csv(os.path.join(pwd+'BaikalJul2024Sea.dat'),
                 sep='\t',
                 skiprows=[1],
                 parse_dates=['DateTime'],
                 dayfirst=False,
                 na_values='',
                 decimal='.',
                )

In [None]:
df.shape, df.columns

In [None]:
## Work with few coloumns
cols_stay = ['DateTime', 'Longitude', 'Latitude', 'pCO2Air', 'pCH4Air', 'pCO2Wtr', 'pCH4Wtr', 'dpCO2', 'dpCH4', 'cCO2Wtr', 'cCH4Wtr']
df.drop(columns=[x for x in df.columns if x not in cols_stay], inplace=True)

In [None]:
df.info()

In [None]:
df = df.resample('1min', on='DateTime').mean()

In [None]:
#df = df.dropna(axis='index', subset=['Longitude', 'Latitude', 'cCO2Wtr', 'cCH4Wtr'], inplace=False)
df = df.dropna(axis='index', subset=['Longitude', 'Latitude'], inplace=False)
#dfco2 = df.dropna(axis='index', subset=['Longitude', 'Latitude', 'cCO2Wtr'], inplace=False)
#dfch4 = df.dropna(axis='index', subset=['Longitude', 'Latitude', 'cCH4Wtr'], inplace=False)
#dfco2.shape, dfch4.shape

In [None]:
df.reset_index(drop=False, inplace=True)
df

In [None]:
df['year'] = df['DateTime'].dt.year

In [None]:
df

In [None]:
for yr in range(2013, 2026):
    t = df.loc[(df['year'] == yr), ['cCO2Wtr']].shape
    print(f'{yr}: {t[0]}')

In [None]:
df.dropna(subset='cCO2Wtr').sort_values(by='cCO2Wtr').tail(10)

In [None]:
## wrong data while starting measurements
filter_list = [{'date_start': '2024-05-20 00:00', 'date_stop': '2024-05-30 04:48', 'cols': ['pCO2Air', 'pCH4Air'], 'fill_with': np.NaN},   # Wrong data
               {'date_start': '2021-06-02 18:50', 'date_stop': '2021-06-02 18:54', 'cols': ['pCO2Wtr', 'dpCO2'], 'fill_with': np.NaN},   # Wrong data
               {'date_start': '2021-06-02 20:56', 'date_stop': '2021-06-02 20:57', 'cols': ['pCO2Wtr', 'dpCO2'], 'fill_with': np.NaN},   # Wrong data
               {'date_start': '2021-06-12 07:41', 'date_stop': '2021-06-12 07:41', 'cols': ['pCO2Wtr', 'dpCO2'], 'fill_with': np.NaN},   # Wrong data
               {'date_start': '2024-06-03 03:33', 'date_stop': '2024-06-03 03:33', 'cols': ['pCO2Wtr', 'dpCO2'], 'fill_with': np.NaN},   # Wrong data
              ]

for cycle in filter_list:
    for col in cycle['cols']:
        date_start = pd.to_datetime(cycle['date_start'])
        date_stop = pd.to_datetime(cycle['date_stop'])
        df.loc[(df['DateTime'] >= date_start) & (df['DateTime'] <= date_stop), col] = cycle['fill_with']

df.loc[(df['year'] == 2018) & (df['dpCO2'] >= 600), ['dpCO2', 'cCO2Wtr', 'pCO2Wtr']] = np.NaN
df.loc[(df['year'] == 2021) & (df['dpCO2'] <= -350), ['dpCO2', 'cCO2Wtr', 'pCO2Wtr']] = np.NaN
df.loc[(df['year'] == 2023) & (df['dpCO2'] >= 600), ['dpCO2', 'cCO2Wtr', 'pCO2Wtr']] = np.NaN
df.loc[(df['year'] == 2024) & (df['dpCO2'] >= 600), ['dpCO2', 'cCO2Wtr', 'pCO2Wtr']] = np.NaN
df.loc[df['cCO2Wtr'] >= 3.25, ['dpCO2', 'cCO2Wtr', 'pCO2Wtr']] = np.NaN


In [None]:
plt.rcParams['figure.figsize'] = [10,5]
#plt.ylim(-450, -250)
plt.scatter('DateTime', 'cCO2Wtr', s=1, data=df.loc[df['year']==2024, :], c='red')
plt.scatter('DateTime', 'cCO2Wtr', s=1, data=df)

#plt.scatter('DateTime', 'vCO2Air', s=1, data=df.loc[(df['DateTime'] > pd.to_datetime('30.05.2024 00:00', dayfirst=True)) & (df['DateTime'] < pd.to_datetime('30.05.2024 07:00', dayfirst=True)), :])

In [None]:
central_stations = {}
for key, coor in scheme.central.items():
    central_stations[key] = scheme.get_center(coor)
central_stations

In [None]:
dfc = pd.DataFrame.from_dict(central_stations, orient='index')
dfc.columns = ['Latitude', 'Longitude']
dfp = pd.DataFrame.from_dict(scheme.points, orient='index')
dfp.columns = ['Latitude', 'Longitude']
dfp.reset_index(drop=False, inplace=True, names='place')

In [None]:
plt.rcParams['figure.figsize'] = [30, 30]
plt.scatter('Longitude', 'Latitude', data=df.loc[df['year'] == 2013,:], s=2, alpha=0.1)
#plt.scatter('Longitude', 'Latitude', data=dfj, s=2, c='green', alpha=0.1)
plt.scatter('Longitude', 'Latitude', data=dfc, s=20, c='red', alpha=1)
plt.scatter('Longitude', 'Latitude', data=dfp, s=10, c='darkred', alpha=1)
plt.plot('long', 'lat', 'b-', data=obris, alpha=1)
#plt.xlim(106.8, 107)
#plt.ylim(52.6, 52.8)
#plt.xlim(104, 105)
#plt.ylim(51.4, 52)
for name, coor in central_stations.items():
    plt.annotate(name, (coor[1]+0.001, coor[0]+0.002))
for name, coor in scheme.points.items():
    plt.annotate(name, (coor[1]+0.001, coor[0]+0.002))
plt.savefig('track_2_3.png')

In [None]:
df['Station'] = ''
DLAT = 0.01
DLNG = 0.02

for k, v in central_stations.items():
    df.loc[(df['Latitude'].between(v[0]-DLAT, v[0]+DLAT) & df['Longitude'].between(v[1]-DLNG, v[1]+DLNG)), 'Station'] = k

In [None]:
dfg = df.groupby(by=['year', 'Station'])['cCO2Wtr'].agg(['count', 'mean', 'median', 'std'])
dfg

In [None]:
dfg.to_csv('CO2CentralStationAllSpring.dat', sep='\t')

In [None]:
meteo = pd.read_csv(os.path.join(pwd, 'meteo.txt'),
                    sep='\t',
                    decimal='.',
                    parse_dates=['DateTime'],
                    dayfirst=False,
                   )

colsneeded = ['DateTime', 'vx', 'vy', 'Wh', 'Wdir']
meteo.drop(columns=[col for col in meteo.columns if col not in colsneeded], axis='columns', inplace=True)
# meteo = meteo.loc[meteo['DateTime'] < pd.to_datetime('2024-07-01'), :]
meteo

In [None]:
## some wrong data while severe wind unsyncronized the UltraSonic threshold 
meteo.loc[meteo['vy'] > 20, 'vy'] = np.nan

In [None]:
plt.rcParams['figure.figsize'] = [12,5]
plt.scatter('DateTime', 'vx', data=meteo, alpha=0.4, s=1, c='red')
plt.scatter('DateTime', 'vy', data=meteo, alpha=0.4, s=1, c='blue')
plt.xlim(pd.to_datetime('2021-06-06 12:00'), pd.to_datetime('2021-06-07'))

In [None]:
meteo = meteo.resample('1min', on='DateTime').mean()
meteo.reset_index(drop=False, inplace=True)
meteo

In [None]:
meteo.dropna(axis='index', subset=['vx', 'vy'], inplace=True)

In [None]:
meteo

In [None]:
df = pd.merge(left=df, right=meteo, left_on='DateTime', right_on='DateTime', how='outer').sort_values(by='DateTime')

In [None]:
df

In [None]:
## vesx: RV x offset along longitude, vesy: RV y offset along latitude (meters)
df['vesx'] = dist((df['Longitude'], df['Latitude']), (df['Longitude'].shift(1), df['Latitude'])) * np.sign(df['Longitude']-df['Longitude'].shift(1)) * 1000  ## meters per sec
df['vesy'] = dist((df['Longitude'], df['Latitude']), (df['Longitude'], df['Latitude'].shift(1))) * np.sign(df['Latitude']-df['Latitude'].shift(1)) * 1000
df['dateSec'] = df['DateTime'].astype('int64')/10**9  ## seconds
df['dt'] = (df['dateSec'] - df['dateSec'].shift(1))
df['vesSpeed'] = np.sqrt(np.power(df['vesx'], 2) + np.power(df['vesy'], 2)) / df['dt']

In [None]:
df['winSpeed'] = np.sqrt(np.power(df['vx'],2) + np.power((df['vy'] - df['vesy']/df['dt']),2))

In [None]:
df.loc[(df['DateTime'] > pd.to_datetime('2023-06-01 01:10')) & (df['DateTime'] < pd.to_datetime('2023-06-01 01:30')), ['DateTime', 'Longitude', 'vesx', 'Latitude', 'vesy', 'Speed', 'vesSpeed', 'vx', 'vy', 'winSpeed', 'Wh']]

In [None]:
plt.rcParams['figure.figsize'] = [25, 10]
#plt.scatter('DateTime', 'vesSpeed', data=df.loc[df['year']==2023,:], s=1, alpha=0.5)
plt.scatter('DateTime', 'Speed', data=df.loc[df['year']==2023,:], s=1, alpha=0.5, c='red')
plt.scatter('DateTime', 'winSpeed', data=df.loc[df['year']==2023,:], s=6, alpha=0.5, c='green')
plt.scatter('DateTime', 'Wh', data=df.loc[df['year']==2023,:], s=3, alpha=0.2, c='blue')
plt.plot('DateTime', 'winSpeed', data=df.loc[df['year']==2023,:], c='green')
plt.legend()
plt.ylim(0, 10)
#plt.xlim(pd.to_datetime('2023-06-01 00:00'), pd.to_datetime('2023-06-02 15:00'))

In [None]:
batlegend = pd.read_csv(batimetrFile,
                         nrows=6,
                         decimal='.',
                         sep=r'\s+',
                         index_col=0,
                         header=None,
                        )
batlegend = batlegend.T
na_val = batlegend['nodata_value']

In [None]:
batimetria = pd.read_csv(batimetrFile,
                         skiprows=6,
                         decimal='.',
                         sep=r'\s+',
                         na_values=na_val,
                         index_col=False,
                         header=None,
                        )

# batimetria.dtypes\
xllcorner = batlegend.loc[1, 'xllcorner']
yllcorner = batlegend.loc[1, 'yllcorner']
cellsize = batlegend.loc[1, 'cellsize']

In [None]:
yllcorner

In [None]:
batimetria

In [None]:
## plt.rcParams['figure.figsize'] = [4, 4]
plt.imshow(batimetria)

In [None]:
#df['depth'] = batimetria.iloc[((df['Latitude'] - yllcorner) / cellsize).astype('int32'),((df['Longitude'] - xllcorner) / cellsize).astype('int32')]
yllcorner_m = yllcorner + batimetria.shape[0] * cellsize
yllcorner_m

In [None]:
df = df.dropna(axis='index', subset=['Longitude', 'Latitude'], inplace=False)

In [None]:
df['batix'] = ((df['Longitude'] - xllcorner) / cellsize).astype('int32')
df['batiy'] = ((yllcorner_m - df['Latitude']) / cellsize).astype('int32')
df.columns

In [None]:
#df = (pd.merge(df, batimetria.stack().rename("depth"), left_on=["batiy", "batix"], right_index=True, how="left"))
df['depth'] = batimetria.to_numpy()[df['batiy'], df['batix']]
df

In [None]:
plt.rcParams['figure.figsize'] = [10, 10]
plt.scatter(df['Longitude'], df['Latitude'], s=10, c=df['year'], marker='o', linewidth=0, alpha=0.5)
plt.plot(obris['long'], obris['lat'], 'b-')
plt.colorbar()

In [None]:
longStep = 0.02
latStep = 0.01

df['long'] = (df['Longitude']/longStep).round()*longStep
df['lat'] = (df['Latitude']/latStep).round()*latStep

In [None]:
dist((80, 56), (80.02, 56))

In [None]:
func_list = ['mean', 'count']
#func_list = ['mean']
cols_to_calc = {'pCO2Wtr': func_list,
                'pCH4Wtr': func_list,
                'cCO2Wtr': func_list,
                'cCH4Wtr': func_list,
                'dpCO2': func_list,
                'dpCH4': func_list,
                'pCO2Air': func_list,
                'pCH4Air': func_list,
                'depth': ['mean']}
res = df.groupby(['year', 'long', 'lat'], as_index=False).agg(cols_to_calc)
res

In [None]:
for yr in range(2013, 2026):
    t = res.loc[res['year'] == yr].shape
    print(f'{yr}: {t[0]}')

In [None]:
plt.rcParams['figure.figsize'] = [8,10]
plt.plot('long', 'lat', data=obris, markersize=1, alpha=1)
plt.scatter('long', 'lat', data=res, s=20, alpha=0.1, c=res[('dpCO2', 'mean')], cmap='rainbow')
plt.colorbar()
plt.clim(-200,200)
#for yr in [2013, 2016, 2017, 2018, 2020, 2021, 2022, 2023, 2024]:
#for yr in [2024]:
#    plt.scatter('long', 'lat', data=res.loc[res['year']==yr, :], s=20, alpha=0.1, c=res.loc[res['year']==yr, ('dpCO2', 'mean')], cmap='Paired')
    #plt.colorbar()

In [None]:
res.describe()

In [None]:
func_list = ['mean', 'std', 'count']

cols_to_calc = {('cCH4Wtr', 'mean'): func_list,
                ('cCO2Wtr', 'mean'): func_list,
                ('depth', 'mean'): ['mean'],
               }
res = res.groupby(['long', 'lat'], as_index=False).agg(cols_to_calc)
res

In [None]:
destData

In [None]:
res.to_csv(destData,
           sep='\t',
           )

In [None]:
df.to_csv(os.path.join(pwd, 'AllSea.txt'),
           sep='\t'
          )

In [None]:
%matplotlib inline
#valMax = res[('cCO2Wtr','mean')].max
plt.rcParams['figure.figsize'] = [10,10]
fig, ax = plt.subplots()
ax.set_xlim(103.2, 110)
ax.set_ylim(51, 56)

# wether this or those
ax.scatter(res['long'], res['lat'], c=res[('cCO2Wtr', 'mean', 'mean')], s=40, marker='s', linewidth=0, cmap='rainbow', alpha=0.1)
ax.plot(obris['long'], obris['lat'], 'b-')

fig.savefig('gis.png')

In [None]:
df.info()

In [None]:
df.to_csv(os.path.join(pwd, 'all.txt'), sep='\t')

In [None]:
pwd