In [56]:
import pandas as pd
import numpy as mp
import glob

In [57]:
from math import radians, cos, sin, asin, sqrt

def dist(a, b):
    """
    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees)
    """
    x1, y1, x2, y2 = map(radians, [a[0], a[1], b[0], b[1]])
    dx = x2 - x1
    dy = y2 - y1
    a = sin(dy/2)**2 + cos(y1) * cos(y2) * sin(dx/2)**2
    c = 2 * asin(sqrt(a))
    # Earth`s radius in kilometers
    km = 6372.795 * c
    return km
def f(a):
    return a+2

In [58]:
meteo_path = "G:/1_Data1/90_BaikalJun2022Sea/Meteo/"
origin_path = "G:/1_Data1/90_BaikalJun2022Sea/jun2022sea.dat"

In [59]:
origin_df = pd.read_csv(origin_path,
                        header=0,
                        skiprows=1,
                        sep="\t",
                        na_values='--',
                        decimal=',',
                        parse_dates=['DateTime'],
                        dayfirst=True,
                        )
#origin_df.info()
origin_df.sort_values(by='DateTime', inplace=True)

In [60]:
print(origin_df.columns)

Index(['DateTime', 'deltaTime', 'Longitude', 'Latitude', 'TempAir', 'PressAir',
       'vCO2', 'vCO2max', 'vCO2min', 'vCH4', 'vCH4max', 'vCH4min', 'TempEqu1',
       'TempEqu2', 'TempEqu3', 'AirFlow', 'channel', 'WaterFlowEqu1',
       'WaterFlowEqu2', 'WaterFlowEqu3', 'dateSec', 'time', 'dT', 'pCO2',
       'pCH4', 'vCO2Air', 'vCH4Air', 'pCO2Air', 'pCH4Air', 'WaterFlowEquCur',
       'TempEquCur', 'vGas', 'cCO2Wtr', 'pCO2Wtr', 'cCH4Wtr', 'pCH4Wtr',
       'dpCO2', 'dpCH4', 'long', 'lat'],
      dtype='object')


In [61]:
cols = ['Longitude', 'Latitude', 'TempAir', 'PressAir',
       'TempEqu1', 'dateSec', 'vCO2Air', 'vCH4Air', 'pCO2Air', 'pCH4Air', 'cCO2Wtr', 'pCO2Wtr', 'cCH4Wtr', 'pCH4Wtr']
cols_dict = {}
for item in cols:
    cols_dict[item] = 'mean'

In [62]:
origin_df_minutly = origin_df.resample('1T', on='DateTime').agg(cols_dict).ffill()
origin_df_minutly

Unnamed: 0_level_0,Longitude,Latitude,TempAir,PressAir,TempEqu1,dateSec,vCO2Air,vCH4Air,pCO2Air,pCH4Air,cCO2Wtr,pCO2Wtr,cCH4Wtr,pCH4Wtr
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2022-05-30 21:12:00,105.063879,51.899681,20.915000,95583.55,3.94,1.653945e+09,,,,,,,,
2022-05-30 21:13:00,105.063879,51.899681,20.915000,95583.55,3.94,1.653945e+09,,,,,,,,
2022-05-30 21:14:00,105.063879,51.899681,20.915000,95583.55,3.94,1.653945e+09,,,,,,,,
2022-05-30 21:15:00,105.063879,51.899681,20.915000,95583.55,3.94,1.653945e+09,,,,,,,,
2022-05-30 21:16:00,105.063879,51.899681,20.915000,95583.55,3.94,1.653945e+09,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-06-14 03:40:00,105.517401,51.876775,10.716667,95632.10,3.75,1.655178e+09,429.678674,2.09796,405.537369,1.980087,0.001633,559.370264,6.257581e-08,1.703969
2022-06-14 03:41:00,105.514048,51.876514,10.942000,95629.12,3.75,1.655178e+09,429.678674,2.09796,405.524732,1.980025,0.001633,559.370264,6.257581e-08,1.703969
2022-06-14 03:42:00,105.514048,51.876514,10.942000,95629.12,3.75,1.655178e+09,429.678674,2.09796,405.524732,1.980025,0.001633,559.370264,6.257581e-08,1.703969
2022-06-14 03:43:00,105.514048,51.876514,10.942000,95629.12,3.75,1.655178e+09,429.678674,2.09796,405.524732,1.980025,0.001633,559.370264,6.257581e-08,1.703969


In [63]:
origin_df['lat_shifted'] = origin_df['Latitude'].shift(1)
origin_df['long_shifted'] = origin_df['Longitude'].shift(1)
origin_df['distance'] = 0
origin_df['vessel_speed'] = 0

In [64]:
meteo_files_list = glob.glob(meteo_path+"*.txt")
dfl = []
for f in meteo_files_list:
    df = pd.read_csv(f,
                     encoding_errors="ignore",
                     header=None,
                     skiprows=1,
                     sep=" ",
                     names=['date', 'time', 't_air', 'w_hor', 'w_dir', 'wx', 'wy', 'wz', 'press', 'rh']
                    )
    dfl.append(df)

meteo_df = pd.concat(dfl)
meteo_df['DateTime'] = pd.to_datetime(meteo_df['date']+' '+meteo_df['time'], dayfirst=True, cache=True)
meteo_df.sort_values(by='DateTime', inplace=True)
meteo_df


Unnamed: 0,date,time,t_air,w_hor,w_dir,wx,wy,wz,press,rh,DateTime
0,28.05.2022,20:03:19,5.112,0.306,282.2,-0.023,-0.105,0.070,720.2,100.0,2022-05-28 20:03:19
1,28.05.2022,20:05:19,5.060,0.454,337.9,-0.300,-0.122,0.084,720.3,100.0,2022-05-28 20:05:19
2,28.05.2022,20:07:19,5.110,0.314,348.8,-0.090,-0.018,0.131,720.3,100.0,2022-05-28 20:07:19
3,28.05.2022,20:09:19,5.063,0.274,153.2,0.029,0.015,0.098,720.3,100.0,2022-05-28 20:09:19
4,28.05.2022,20:11:19,5.258,0.356,323.4,-0.143,-0.106,0.112,720.3,100.0,2022-05-28 20:11:19
...,...,...,...,...,...,...,...,...,...,...,...
651,12.06.2022,21:09:48,4.029,0.872,337.8,-0.794,-0.324,0.068,717.8,100.0,2022-06-12 21:09:48
652,12.06.2022,21:10:49,4.017,0.964,323.3,-0.764,-0.568,0.185,717.8,100.0,2022-06-12 21:10:49
653,12.06.2022,21:11:49,3.994,1.220,310.8,-0.792,-0.917,0.291,717.8,100.0,2022-06-12 21:11:49
654,12.06.2022,21:12:49,4.023,1.499,302.7,-0.804,-1.250,0.457,717.8,100.0,2022-06-12 21:12:49


In [69]:
print(df.columns)
cols = ['t_air', 'w_hor', 'w_dir', 'press', 'rh']
cols_dict = {}
for item in cols:
    cols_dict[item] = 'mean'

Index(['Longitude', 'Latitude', 'TempAir', 'PressAir', 'TempEqu1', 'dateSec',
       'vCO2Air', 'vCH4Air', 'pCO2Air', 'pCH4Air', 'cCO2Wtr', 'pCO2Wtr',
       'cCH4Wtr', 'pCH4Wtr', 't_air', 'w_hor', 'w_dir', 'press'],
      dtype='object')


In [70]:
meteo_df_minutly = meteo_df.resample("1T", on="DateTime", ).agg(cols_dict).ffill()
meteo_df_minutly

Unnamed: 0_level_0,t_air,w_hor,w_dir,press,rh
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-05-28 20:03:00,5.112,0.306,282.2,720.2,100.0
2022-05-28 20:04:00,5.112,0.306,282.2,720.2,100.0
2022-05-28 20:05:00,5.060,0.454,337.9,720.3,100.0
2022-05-28 20:06:00,5.060,0.454,337.9,720.3,100.0
2022-05-28 20:07:00,5.110,0.314,348.8,720.3,100.0
...,...,...,...,...,...
2022-06-12 21:09:00,4.029,0.872,337.8,717.8,100.0
2022-06-12 21:10:00,4.017,0.964,323.3,717.8,100.0
2022-06-12 21:11:00,3.994,1.220,310.8,717.8,100.0
2022-06-12 21:12:00,4.023,1.499,302.7,717.8,100.0


In [80]:
df = pd.merge(origin_df_minutly, meteo_df_minutly, how='outer', on='DateTime')
df.sort_values(by='DateTime')

Unnamed: 0_level_0,Longitude,Latitude,TempAir,PressAir,TempEqu1,dateSec,vCO2Air,vCH4Air,pCO2Air,pCH4Air,cCO2Wtr,pCO2Wtr,cCH4Wtr,pCH4Wtr,t_air,w_hor,w_dir,press,rh
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2022-05-28 20:03:00,,,,,,,,,,,,,,,5.112,0.306,282.2,720.2,100.0
2022-05-28 20:04:00,,,,,,,,,,,,,,,5.112,0.306,282.2,720.2,100.0
2022-05-28 20:05:00,,,,,,,,,,,,,,,5.060,0.454,337.9,720.3,100.0
2022-05-28 20:06:00,,,,,,,,,,,,,,,5.060,0.454,337.9,720.3,100.0
2022-05-28 20:07:00,,,,,,,,,,,,,,,5.110,0.314,348.8,720.3,100.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-06-14 03:40:00,105.517401,51.876775,10.716667,95632.10,3.75,1.655178e+09,429.678674,2.09796,405.537369,1.980087,0.001633,559.370264,6.257581e-08,1.703969,,,,,
2022-06-14 03:41:00,105.514048,51.876514,10.942000,95629.12,3.75,1.655178e+09,429.678674,2.09796,405.524732,1.980025,0.001633,559.370264,6.257581e-08,1.703969,,,,,
2022-06-14 03:42:00,105.514048,51.876514,10.942000,95629.12,3.75,1.655178e+09,429.678674,2.09796,405.524732,1.980025,0.001633,559.370264,6.257581e-08,1.703969,,,,,
2022-06-14 03:43:00,105.514048,51.876514,10.942000,95629.12,3.75,1.655178e+09,429.678674,2.09796,405.524732,1.980025,0.001633,559.370264,6.257581e-08,1.703969,,,,,


In [52]:
df.to_csv('G:/1_Data1/90_BaikalJun2022Sea/jun2022sea_pandas.out', sep='\t')