In [None]:
import pandas as pd
import numpy as np

from matplotlib import pyplot as plt
from pykrige import OrdinaryKriging

import os

## Data reading    
 - [ ] read origin data
 - [ ] read baikal obris
 - [ ] read baikal normalazed matrix 

In [None]:
plt.style.use('Solarize_Light2')   # try another styles: 'classic', 'Solarize_Light2'
driver = "~/"  # "G:/
pwd = os.path.join(driver, '1_Data1/sea/')
srcDataPath = os.path.join(pwd, "AllSea.txt")
obrisDataPath = os.path.join(driver, "1_Data1/obris/obris.dat")
batimetrFilePath = os.path.join(driver, "1_Data1/batimetria/b1.txt")
# srcData = "~/methaneSea.dat
srcDataPath

In [None]:
obris = pd.read_csv(obrisDataPath,
                    header=0,
                    na_values='--',
                    sep='\t',
                    decimal=',',
                   )

In [None]:
## set grid
longStep = 0.02
latStep = 0.01

In [None]:
gdf = pd.read_csv('wholeBaikalGrid.dat', sep='\t')
gdf.drop('Unnamed: 0', axis='columns', inplace=True)
gdf['long'] = ((gdf['long']/longStep).round()*longStep*1000).round().astype('int')
gdf['lat'] = ((gdf['lat']/latStep).round()*latStep*1000).round().astype('int')
gdf['Longitude'] = gdf['long']/1000
gdf['Latitude'] = gdf['lat']/1000
gdf['norm_x'] = (gdf['toWest']/(gdf['toWest'] + gdf['toEast'])*25).astype('int')
gdf['norm_y'] = (gdf['toSud']/(gdf['toSud'] + gdf['toNord'])*400).astype('int')

gdf.loc[(gdf['long']==104000) & (gdf['lat']==51630), :]

In [None]:
gdf

In [None]:
df = pd.read_csv(os.path.join(pwd, "BaikalJul2024Sea.dat"),  #"AllSea.txt"
                 header=0,
                 usecols=['Longitude', 'Latitude', 'DateTime', 'cCH4Wtr', 'cCO2Wtr', 'dpCO2', 'dpCH4'],
                 # skip_blank_lines=True,
                 skipinitialspace=True,
                 na_values='',
                 skiprows=[],
                 sep='\t',
                 decimal='.',
                 #nrows=5000,
                 parse_dates=['DateTime'],
                 dayfirst=False
                 )

In [None]:
df['year'] = df['DateTime'].dt.year

In [None]:
df.reset_index(drop=True, inplace=True)

In [None]:
## check for bad data
df.dropna(subset=['dpCO2']).sort_values(by='dpCO2').tail(2)

In [None]:
for yr in range(2013, 2025):
    t = df.loc[(df['year'] == yr) & df['dpCO2']].shape
    print(f'{yr}: {t[0]}')

In [None]:
df['long'] = ((df['Longitude']/longStep).round()*longStep*1000).round().astype('int')
df['lat'] = ((df['Latitude']/latStep).round()*latStep*1000).round().astype('int')
df

In [None]:
#func_list = ['mean', 'count']
func_list = ['mean']
cols_to_calc = {#'cCH4Wtr': func_list,
                #'cCO2Wtr': func_list,
                'dpCO2': func_list,
                #'dpCH4': func_list,
               }
res = df.groupby(['year', 'long', 'lat'], as_index=False).agg(cols_to_calc)
res.columns = res.columns.get_level_values(0)
res

In [None]:
res = pd.merge(left=res, right=gdf, left_on=['long', 'lat'], right_on=['long', 'lat'], how='left')

In [None]:
res['norm_x'] = res['toWest']/(res['toWest'] + res['toEast'])
res['norm_y'] = res['toSud']/(res['toSud'] + res['toNord'])
res = res.dropna(subset=['dpCO2', 'norm_x', 'norm_y'])

In [None]:
res.describe()

In [None]:
plt.rcParams['figure.figsize'] = [9, 9]

plt.scatter(res['long']/1000, res['lat']/1000, c=res['dpCO2'], cmap='rainbow', s=2, alpha=0.2)
plt.colorbar()
plt.clim(-300, 300)
#plt.xlim(103.6, 105)
#plt.ylim(51.4, 52)
plt.plot('long', 'lat', data=obris)

In [None]:
lat = res['norm_y'].values
long = res['norm_x'].values
dpCO2 = res['dpCO2'].values
gridx = np.linspace(0, 1, 25)
gridy = np.linspace(0, 1, 400)

In [None]:
plt.rcParams['figure.figsize'] = [3, 3]
ok = OrdinaryKriging(long,
                     lat,
                     dpCO2,
                     variogram_model='spherical',
                     coordinates_type='geographic',
                     verbose=True,
                     enable_plotting=True)

In [None]:
z_interp, ss = ok.execute('grid', gridx, gridy)

In [None]:
z_interp.data.shape

In [None]:
surdf = pd.DataFrame([[y, x, z_interp[x, y]] for x in range(400) for y in range(25)], columns=['norm_x', 'norm_y', 'dpCO2All'])

In [None]:
plt.rcParams['figure.figsize'] = [3, 10]
plt.scatter('norm_x', 'norm_y', data=surdf, c=surdf['dpCO2All'], cmap='rainbow')
plt.colorbar()

In [None]:
gdf = pd.merge(left=gdf, right=surdf, left_on=['norm_x', 'norm_y'], right_on=['norm_x', 'norm_y'], how='left')

In [None]:
gdf

In [None]:
plt.rcParams['figure.figsize'] = [12, 12]

plt.scatter('Longitude', 'Latitude', data=gdf, c=gdf['dpCO2All'], cmap='rainbow', s=20, alpha=0.5, linewidth=0)
plt.clim(-200, 200)
plt.colorbar(alpha=1)
plt.scatter('Longitude', 'Latitude', data=df, c=df['dpCO2'], cmap='rainbow', s=5, alpha=0.1, linewidth=0)
plt.clim(-200, 200)
plt.xlim(103.6, 110)
plt.ylim(51.2, 56)
plt.plot('long', 'lat', data=obris, alpha=0.5, linewidth=0.5)
plt.fill('long', 'lat', data=obris.iloc[4845:,:], c='white', alpha=1, linewidth=0.5)
plt.title(f'dpCO2 Jul-Aug 2024, mkatm')
plt.savefig(f'surf_dpCO2_jul2024.png')


In [None]:
#for year in [2013, 2016, 2017, 2018, 2021, 2022, 2023, 2024]:
for year in [2024]:
    lat = res.loc[res['year']==year,'norm_y'].values
    long = res.loc[res['year']==year,'norm_x'].values
    value = res.loc[res['year']==year,'dpCO2'].values
    gridx = np.linspace(0, 1, 25)
    gridy = np.linspace(0, 1, 400)
    
    ok = OrdinaryKriging(long,
                         lat,
                         value,
                         variogram_model='spherical',
                         coordinates_type='geographic',
                         verbose=False,
                         enable_plotting=False)
    z_interp, ss = ok.execute('grid', gridx, gridy)
    
    surdf = pd.DataFrame([[y, x, z_interp[x, y]] for x in range(400) for y in range(25)], columns=['norm_x', 'norm_y', f'dpCO2{year}'])
    gdf = pd.merge(left=gdf, right=surdf, left_on=['norm_x', 'norm_y'], right_on=['norm_x', 'norm_y'], how='left')

In [None]:
cycles=['dpCO22013', 'dpCO22016', 'dpCO22017', 'dpCO22018', 'dpCO22021', 'dpCO22022', 'dpCO22023', 'dpCO22024']
gdf['dpCO2All']=0
for cycle in cycles:
    gdf['dpCO2All'] += gdf[cycle]
gdf['dpCO2All']/=len(cycles)
gdf

In [None]:
gdf.to_csv(os.path.join(pwd, 'pdCO2Surfer.txt'), sep='\t')

In [None]:
plt.style.use('default')
plt.rcParams['figure.figsize'] = [12, 12]

In [None]:
cycles=['dpCO2All', 'dpCO22013', 'dpCO22016', 'dpCO22017', 'dpCO22018', 'dpCO22021', 'dpCO22022', 'dpCO22023', 'dpCO22024']
cycles=['All', 2013, 2016, 2017, 2018, 2021, 2022, 2023, 2024]
for cycle in cycles:
    if cycle == 'All':
        plt.scatter('Longitude', 'Latitude', data=df, c=df['dpCO2'], cmap='rainbow', s=5, alpha=0.1, linewidth=0)
    else:
        plt.scatter('Longitude', 'Latitude', data=df.loc[df['year']==cycle,:], c=df.loc[df['year']==cycle,'dpCO2'], cmap='rainbow', s=5, alpha=0.3, linewidth=0)
    plt.clim(-200, 200)
    plt.scatter('Longitude', 'Latitude', data=gdf, c=gdf[f'dpCO2{cycle}'], cmap='rainbow', s=5, alpha=0.5, linewidth=0)
    plt.colorbar(alpha=1)
    plt.clim(-200, 200)
    plt.xlim(103.6, 110)
    plt.ylim(51.2, 56)
    plt.plot('long', 'lat', data=obris, alpha=0.5, linewidth=0.5)
    plt.fill('long', 'lat', data=obris.iloc[4845:,:], c='white', alpha=1, linewidth=0.5)
    plt.title(f'dpCO2_{cycle}, mkatm')
    plt.savefig(f'surf_dpCO2_{cycle}.png')
    plt.close()


In [None]:
gdf.describe()