In [None]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import datetime as dt

In [None]:
from numpy import radians, sin, cos, arcsin, sqrt
def dist(a, b):
    """
    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees)
    """
    x1, y1, x2, y2 = map(radians, [a[0], a[1], b[0], b[1]])
    dx = x2 - x1
    dy = y2 - y1
    a = sin(dy/2)**2 + cos(y1) * cos(y2) * sin(dx/2)**2
    c = 2 * arcsin(sqrt(a))
    # Earth`s radius in kilometers
    km = 6372.795 * c
    return km

In [None]:
plt.style.use('classic')   # try another styles: 'classic'

srcData = "G:/1_Data1/sea/AllSea.txt"
destData = "G:/1_Data1/sea/AllSeaGisOut.txt"
obrisData = "G:/1_Data1/obris/obris.dat"
# srcData = "~/methaneSea.dat

In [42]:
df = pd.read_csv(srcData,
                 header=0,
                 # skip_blank_lines=True,
                 skipinitialspace=True,
                 na_values='--',
                 skiprows=[1,2,3,4],
                 sep='\t',
                 decimal=',',
                 # nrows=100000,
                 parse_dates=['DateTime'],
                 dayfirst=True
                 )

In [43]:
df['year'] = df['DateTime'].dt.year

In [None]:
df22 = pd.read_csv("G:/1_Data1/90_BaikalJun2022Sea/jun2022sea.dat",
                  header=0,
                  na_values='--',
                  skiprows=[0],
                  sep='\t',
                  decimal=',',
                  parse_dates=['DateTime'],
                  dayfirst=True
                  )
df22['cCH4Wtr'] = df22['cCH4Wtr'] * 1e9

In [68]:
for yr in range(2013, 2024):
    t = df.loc[(df['year'] == yr) & df['cCH4Wtr']].shape
    print(f'{yr}: {t[0]}')

2013: 11890
2014: 0
2015: 0
2016: 72915
2017: 15972
2018: 30268
2019: 0
2020: 0
2021: 32634
2022: 33051
2023: 0


In [59]:
df.sample(100)

Unnamed: 0,DateTime,Longitude,Latitude,Tair,Pair,Twater,pCO2Air,pCH4Air,pCO2Wtr,pCH4Wtr,dpCO2,dpCH4,cCO2Wtr,cCH4Wtr,year,long,lat
45170,2016-05-29 07:08:19,105.954506,52.027733,5.600000,95626.00000,3.60,,,,,,,,,2016,105.95,52.030
120888,2017-05-28 07:28:58,105.448900,51.710960,6.272727,96947.27000,3.19,,,,,,,,,2017,105.45,51.710
306317,2022-06-11 18:58:33,108.232455,53.975590,9.560000,95974.10000,2.94,394.174624,1.942642,389.010372,2.285117,-5.164252,0.342475,1.166749,85.888949,2022,108.23,53.975
166703,2018-05-28 05:02:03,105.102042,51.840500,8.060000,96281.79924,2.74,,,501.111363,3.221545,98.611363,1.376545,1.513428,118.456545,2018,105.10,51.840
52599,2016-05-30 02:48:32,106.752510,52.618759,6.200000,95159.00000,,,,,,,,,,2016,106.75,52.620
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133801,2017-05-31 07:20:55,108.588000,53.379290,3.600000,95812.80000,2.44,,,,,,,,,2017,108.59,53.380
60907,2016-05-31 00:48:31,107.692001,53.150299,6.500000,95902.00000,4.30,,,,,,,,,2016,107.69,53.150
120110,2017-05-28 03:09:29,105.500100,51.666290,4.400000,97015.00000,3.25,,,,,,,,,2017,105.50,51.665
292124,2022-06-08 11:16:36,108.705851,54.530796,10.980000,96426.30000,2.44,402.979387,1.927950,371.634905,2.710666,-31.344481,0.782716,1.133587,103.370999,2022,108.71,54.530


In [60]:
obris = pd.read_csv(obrisData,
                    header=0,
                    na_values='--',
                    sep='\t',
                    decimal=',',
                    )

In [61]:
longStep = 0.01
latStep = 0.005

df['long'] = (df['Longitude']/longStep).round()*longStep
df['lat'] = (df['Latitude']/latStep).round()*latStep
df['year'] = df['DateTime'].dt.year

In [62]:
func_list = ['mean', 'count']
cols_to_calc = {'pCO2Wtr': func_list,
                'pCH4Wtr': func_list,
                'cCO2Wtr': func_list,
                'cCH4Wtr': func_list,
                'dpCO2': func_list,
                'dpCH4': func_list,
                'pCO2Air': func_list,
                'pCH4Air': func_list,}
res = df.groupby(['year', 'long', 'lat'], as_index=False).agg(cols_to_calc)
res.columns

MultiIndex([(   'year',      ''),
            (   'long',      ''),
            (    'lat',      ''),
            ('pCO2Wtr',  'mean'),
            ('pCO2Wtr', 'count'),
            ('pCH4Wtr',  'mean'),
            ('pCH4Wtr', 'count'),
            ('cCO2Wtr',  'mean'),
            ('cCO2Wtr', 'count'),
            ('cCH4Wtr',  'mean'),
            ('cCH4Wtr', 'count'),
            (  'dpCO2',  'mean'),
            (  'dpCO2', 'count'),
            (  'dpCH4',  'mean'),
            (  'dpCH4', 'count'),
            ('pCO2Air',  'mean'),
            ('pCO2Air', 'count'),
            ('pCH4Air',  'mean'),
            ('pCH4Air', 'count')],
           )

In [55]:
cols_to_calc = {('cCH4Wtr', 'mean'): ['mean', 'count']}
res = res.groupby(['long', 'lat'], as_index=False).agg(cols_to_calc)
res

Unnamed: 0_level_0,long,lat,cCH4Wtr,cCH4Wtr
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,mean
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,mean,count
0,103.75,51.695,120.548499,1
1,103.76,51.690,123.197871,1
2,103.76,51.695,114.449653,1
3,103.77,51.690,119.207194,1
4,103.77,51.695,107.322251,1
...,...,...,...,...
16155,109.86,55.695,567.232920,1
16156,109.86,55.700,9439.994097,2
16157,109.86,55.705,9448.638860,2
16158,109.87,55.695,,0


In [49]:
# df = df.loc[df['year'] < 2022]
for yr in range(2013, 2024):
    t = res.loc[res['year'] == yr].shape
    print(f'{yr}: {t[0]}')

2013: 3683
2014: 0
2015: 0
2016: 3608
2017: 3990
2018: 4209
2019: 0
2020: 0
2021: 4072
2022: 4614
2023: 0


In [None]:
res['toobris'] = 0

for i in range(len(res)):
    min_dist = 100
    for j in range(len(obris)):
        dista = dist((res['long'][i], res['lat'][i]), (obris['long'][j], obris['lat'][j]))
        if (min_dist > dista):
            min_dist = dista
    res.at[i, 'toobris'] = min_dist

In [None]:
res.to_csv(destData,
           sep='\t',
           )

In [None]:
%matplotlib inline
valMax = res['pCH4Wtr'].max

fig = plt.figure()

# wether this or those
plt.scatter(res['long'], res['lat'], c='r', s=10, marker='s')
plt.plot(obris['long'], obris['lat'], 'b-')

fig.savefig('gis.png')

print(df)
print(res)