In [None]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import datetime as dt

In [None]:
from numpy import radians, sin, cos, arcsin, sqrt
def dist(a, b):
    """
    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees)
    """
    x1, y1, x2, y2 = map(radians, [a[0], a[1], b[0], b[1]])
    dx = x2 - x1
    dy = y2 - y1
    a = sin(dy/2)**2 + cos(y1) * cos(y2) * sin(dx/2)**2
    c = 2 * arcsin(sqrt(a))
    # Earth`s radius in kilometers
    km = 6372.795 * c
    return km

In [None]:
plt.style.use('classic')   # try another styles: 'classic'
plt.rcParams['figure.figsize'] = [15, 15]

srcData = "G:/1_Data1/sea/AllSea.txt"
destData = "G:/1_Data1/sea/AllSeaGisOut.txt"
obrisData = "G:/1_Data1/obris/obris.dat"
batimetrFile = "G:/1_Data1/batimetria/b2.txt"
# srcData = "~/methaneSea.dat

In [None]:
df = pd.read_csv(srcData,
                 header=0,
                 # skip_blank_lines=True,
                 skipinitialspace=True,
                 na_values='--',
                 skiprows=[1,2,3,4],
                 sep='\t',
                 decimal=',',
                 # nrows=100000,
                 parse_dates=['DateTime'],
                 dayfirst=True
                 )

In [None]:
df.shape

In [None]:
df.dropna(axis='index', subset=['Longitude', 'Latitude', 'cCH4Wtr'], inplace=True)
df.shape

In [None]:
df['year'] = df['DateTime'].dt.year

In [None]:
df.reset_index(drop=True, inplace=True)
df

In [None]:
for yr in range(2013, 2024):
    t = df.loc[(df['year'] == yr) & df['cCH4Wtr']].shape
    print(f'{yr}: {t[0]}')

In [None]:
obris = pd.read_csv(obrisData,
                    header=0,
                    na_values='--',
                    sep='\t',
                    decimal=',',
                    )
obris.head(3)

In [None]:
batlegend = pd.read_csv(batimetrFile,
                         nrows=6,
                         decimal='.',
                         sep='\s+',
                         index_col=0,
                         header=None,
                        )
batlegend= batlegend.T
na_val = batlegend['nodata_value']

In [None]:
batimetria = pd.read_csv(batimetrFile,
                         skiprows=6,
                         decimal='.',
                         sep='\s+',
                         na_values=na_val,
                         index_col=False,
                         header=None,
                        )

# batimetria.dtypes\
xllcorner = batlegend.loc[1, 'xllcorner']
yllcorner = batlegend.loc[1, 'yllcorner']
cellsize = batlegend.loc[1, 'cellsize']

In [None]:
batimetria

In [None]:
plt.imshow(batimetria)

In [None]:
#df['depth'] = batimetria.iloc[((df['Latitude'] - yllcorner) / cellsize).astype('int32'),((df['Longitude'] - xllcorner) / cellsize).astype('int32')]
yllcorner_m = yllcorner + batimetria.shape[0] * cellsize
yllcorner_m

In [None]:
df['batiy'] = ((yllcorner_m - df['Latitude']) / cellsize).astype('int32')
df['batix'] = ((df['Longitude'] - xllcorner) / cellsize).astype('int32')
df

In [None]:
#df = (pd.merge(df, batimetria.stack().rename("depth"), left_on=["batiy", "batix"], right_index=True, how="left"))
df['depth'] = batimetria.to_numpy()[df['batiy'], df['batix']]
df

In [None]:
plt.scatter(df['Longitude'], df['Latitude'], s=30, c=df['depth'], marker='o', linewidth=0, alpha=0.1)
plt.plot(obris['long'], obris['lat'], 'b-')

In [None]:
longStep = 0.01
latStep = 0.005

df['long'] = (df['Longitude']/longStep).round()*longStep
df['lat'] = (df['Latitude']/latStep).round()*latStep
df['year'] = df['DateTime'].dt.year

In [None]:
#func_list = ['mean', 'count']
func_list = ['mean']
cols_to_calc = {'pCO2Wtr': func_list,
                'pCH4Wtr': func_list,
                'cCO2Wtr': func_list,
                'cCH4Wtr': func_list,
                'dpCO2': func_list,
                'dpCH4': func_list,
                'pCO2Air': func_list,
                'pCH4Air': func_list,
                'depth': ['mean']}
res = df.groupby(['year', 'long', 'lat'], as_index=False).agg(cols_to_calc)
res

In [None]:
for yr in range(2013, 2024):
    t = res.loc[res['year'] == yr].shape
    print(f'{yr}: {t[0]}')

In [None]:
func_list = ['mean', 'std', 'count']

cols_to_calc = {('cCH4Wtr', 'mean'): func_list,
                ('depth', 'mean'): func_list,
               }
res = res.groupby(['long', 'lat'], as_index=False).agg(cols_to_calc)
res

In [None]:
res.to_csv(destData,
           sep='\t',
           )

In [None]:
res

In [None]:
plt.style.available

In [None]:
%matplotlib inline
plt.style.use('bmh')   # try another styles: 'classic'
valMax = res[('cCH4Wtr','mean')].max

fig = plt.figure()

# wether this or those
plt.scatter(res['long'], res['lat'], c=res[('cCH4Wtr', 'mean', 'mean')], s=10, marker='s', linewidth=0)
plt.plot(obris['long'], obris['lat'], 'b-')

fig.savefig('gis.png')