In [None]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import datetime as dt

In [None]:
from numpy import radians, sin, cos, arcsin, sqrt
def dist(a, b):
    """
    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees)
    """
    x1, y1, x2, y2 = map(radians, [a[0], a[1], b[0], b[1]])
    dx = x2 - x1
    dy = y2 - y1
    a = sin(dy/2)**2 + cos(y1) * cos(y2) * sin(dx/2)**2
    c = 2 * arcsin(sqrt(a))
    # Earth`s radius in kilometers
    km = 6372.795 * c
    return km

In [None]:
plt.style.use('classic')   # try another styles: 'classic'

srcData = "G:/1_Data1/sea/AllSea.txt"
destData = "G:/1_Data1/sea/AllSeaGisOut.txt"
obrisData = "G:/1_Data1/obris/obris.dat"
batimetrFile = "G:/1_Data1/batimetria/b1.txt"
# srcData = "~/methaneSea.dat

In [None]:
df = pd.read_csv(srcData,
                 header=0,
                 # skip_blank_lines=True,
                 skipinitialspace=True,
                 na_values='--',
                 skiprows=[1,2,3,4],
                 sep='\t',
                 decimal=',',
                 # nrows=100000,
                 parse_dates=['DateTime'],
                 dayfirst=True
                 )

In [None]:
df['year'] = df['DateTime'].dt.year

In [None]:
df22 = pd.read_csv("G:/1_Data1/90_BaikalJun2022Sea/jun2022sea.dat",
                  header=0,
                  na_values='--',
                  skiprows=[0],
                  sep='\t',
                  decimal=',',
                  parse_dates=['DateTime'],
                  dayfirst=True
                  )
df22['cCH4Wtr'] = df22['cCH4Wtr'] * 1e9

In [None]:
for yr in range(2013, 2024):
    t = df.loc[(df['year'] == yr) & df['cCH4Wtr']].shape
    print(f'{yr}: {t[0]}')

In [None]:
df.sample(100)

In [None]:
obris = pd.read_csv(obrisData,
                    header=0,
                    na_values='--',
                    sep='\t',
                    decimal=',',
                    )

In [None]:
batlegend = pd.read_csv(batimetrFile,
                         nrows=6,
                         decimal='.',
                         sep='\s+',
                         index_col=0,
                         header=None,
                        )
batlegend= batlegend.T
na_val = batlegend['nodata_value']
batimetria = pd.read_csv(batimetrFile,
                         skiprows=6,
                         decimal='.',
                         sep='\s+',
                         na_values=na_val,
                         index_col=False,
                         header=None,
                        )

# batimetria.dtypes\
xllcorner = batlegend['xllcorner']
yllcorner = batlegend['yllcorner']
cellsize = batlegend['cellsize']

In [None]:
batimetria

In [None]:
# def coor2index(x, y):
x, y = 110.5, 56
ix = int((x - xllcorner) / cellsize)
iy = int((y - yllcorner) / cellsize)
ix, iy
l = ((df['Longitude'] - xllcorner) / cellsize)
l

In [None]:
df['depth'] = 0
df['depth'] = batimetria.iloc[1,3]
df

In [None]:
longStep = 0.01
latStep = 0.005

df['long'] = (df['Longitude']/longStep).round()*longStep
df['lat'] = (df['Latitude']/latStep).round()*latStep
df['year'] = df['DateTime'].dt.year

In [None]:
func_list = ['mean', 'count']
cols_to_calc = {'pCO2Wtr': func_list,
                'pCH4Wtr': func_list,
                'cCO2Wtr': func_list,
                'cCH4Wtr': func_list,
                'dpCO2': func_list,
                'dpCH4': func_list,
                'pCO2Air': func_list,
                'pCH4Air': func_list,}
res = df.groupby(['year', 'long', 'lat'], as_index=False).agg(cols_to_calc)
res.columns

In [None]:
cols_to_calc = {('cCH4Wtr', 'mean'): ['mean', 'count']}
res = res.groupby(['long', 'lat'], as_index=False).agg(cols_to_calc)
res

In [None]:
# df = df.loc[df['year'] < 2022]
for yr in range(2013, 2024):
    t = res.loc[res['year'] == yr].shape
    print(f'{yr}: {t[0]}')

In [None]:
res['toobris'] = 0

for i in range(len(res)):
    min_dist = 100
    for j in range(len(obris)):
        dista = dist((res['long'][i], res['lat'][i]), (obris['long'][j], obris['lat'][j]))
        if (min_dist > dista):
            min_dist = dista
    res.at[i, 'toobris'] = min_dist

In [None]:
res.to_csv(destData,
           sep='\t',
           )

In [None]:
%matplotlib inline
valMax = res['pCH4Wtr'].max

fig = plt.figure()

# wether this or those
plt.scatter(res['long'], res['lat'], c='r', s=10, marker='s')
plt.plot(obris['long'], obris['lat'], 'b-')

fig.savefig('gis.png')

print(df)
print(res)