In [None]:
import pandas as pd
from pathlib import Path

pd.options.mode.copy_on_write = True

Объединение наземных данных за 2021-2023

In [2]:
logs = list(Path(".").rglob("*.log"))
df_lst_21_23 = pd.concat([pd.read_csv(log, sep="|") for log in logs]).drop(columns=["TF", "SurfTermo", "SurfTermoEqu", "ConductSBE", "ScSBE", "ScSBEequ", "WaterIR", "AirIR", "Speed", "Cours"])
df_lst_21_23 = df_lst_21_23.rename(columns={"DateTime" : "Time", "TermoSBE": "T"})
df_lst_21_23["T"] = df_lst_21_23['T'].astype(str).str.replace(',', '.')
df_lst_21_23["Lat"] = df_lst_21_23['Lat'].astype(str).str.replace(',', '.')
df_lst_21_23["Lon"] = df_lst_21_23['Lon'].astype(str).str.replace(',', '.')
df_lst_21_23 = df_lst_21_23[df_lst_21_23["Lat"].astype(float) != 0] # какие-то лишние измерения
df_lst_21_23.to_csv("Наземеные_измерения/ready/Temp21-23.csv", index=False)

KeyboardInterrupt: 

Объединение наземных данных за 2024

In [9]:
df_june = pd.read_csv('Наземеные_измерения/ready/TempJune.csv')
df_july = pd.read_csv('Наземеные_измерения/ready/TempJuly.csv')
df_sept = pd.read_csv('Наземеные_измерения/ready/TempSept.csv')
df_lst_24 = pd.concat([df_june, df_july, df_sept]).drop(columns="Speed")
old_format = "%d.%m.%Y %H:%M"
new_format = "%d.%m.%Y %H:%M:%S"
df_lst_24["Time"] = df_lst_24["Time"].apply(lambda dt: pd.to_datetime(dt, format=old_format))
df_lst_24["Time"] = df_lst_24["Time"].dt.strftime(new_format)
df_lst_24["T"] = df_lst_24['T'].astype(str).str.replace(',', '.')
df_lst_24["Lat"] = df_lst_24['Lat'].astype(str).str.replace(',', '.')
df_lst_24["Lon"] = df_lst_24['Lon'].astype(str).str.replace(',', '.')
df_lst_24.to_csv('Наземеные_измерения/ready/Temp24.csv', index=False)

Объединение данных за 2021-2024

In [None]:
df_lst = pd.concat([df_lst_21_23, df_lst_24])
df_lst.to_csv('Наземеные_измерения/ready/Temp21-24.csv', index=False)

Сокращение данных, чтобы они шли не чаще чем с определённой частотой

In [3]:
def parse_dateTime(df: pd.DataFrame) -> pd.DataFrame:
   format = "%d.%m.%Y  %H:%M:%S"
   return df["Time"].apply(lambda dt: pd.to_datetime(dt, format=format))

df_lst = pd.read_csv('Наземеные_измерения/ready/Temp21-24.csv')
df_lst["Time"] = parse_dateTime(df_lst)
df_lst = df_lst.sort_values(by=['Time'])
df_lst

Unnamed: 0,Time,T,Lat,Lon
731173,2021-05-25 18:42:01,2.56830,51.896156,105.098007
731174,2021-05-25 18:42:06,2.58880,51.896130,105.098320
731175,2021-05-25 18:42:14,2.61300,51.896072,105.098824
731176,2021-05-25 18:42:19,2.60840,51.896034,105.099136
731177,2021-05-25 18:42:27,2.62660,51.895969,105.099632
...,...,...,...,...
1733140,2024-10-03 14:37:00,9.79160,51.847950,104.870730
1733141,2024-10-03 14:38:00,9.79175,51.847950,104.870730
1733142,2024-10-03 14:39:00,9.79290,51.847950,104.870730
1733143,2024-10-03 14:40:00,9.79430,51.847950,104.870730


In [5]:
def shorten_data(df: pd.DataFrame, limit_frequency_minutes: int) -> pd.DataFrame:
   margin = pd.Timedelta(limit_frequency_minutes, 'minutes')
   i = 0
   while i<df.shape[0]:
      time=df["Time"].iloc[i]
      start_dt = time - margin
      end_dt = time + margin
      df = df[(df["Time"] == time) | (df["Time"] <= start_dt) | (end_dt <= df["Time"])]
      i = i + 1;
   return df

df_lst_15 = shorten_data(df_lst, 15)
df_lst_1 = shorten_data(df_lst, 1)

In [6]:
df_lst_15.to_csv('Наземеные_измерения/ready/Temp21-24_cut15.csv', index=False)
df_lst_1.to_csv('Наземеные_измерения/ready/Temp21-24_cut1.csv', index=False)

# Фильтрация точек около берега

In [None]:
import ee
import geemap

In [None]:
geemap.ee_initialize()
oeel = geemap.requireJS()
ee.Authenticate()
ee.Initialize(project='ee-amazyar-test1')
baikal_shape = ee.FeatureCollection('projects/ee-amazyar-test/assets/baikal')

baikal_geometry = baikal_shape.geometry()
baikal_polylines = baikal_geometry.coordinates()
baikal_combined_points = []
for point_list in baikal_polylines.getInfo():
   for point in point_list:
      baikal_combined_points.append(ee.Geometry.Point(point[0], point[1]))
baikal_combined_points
baikal_points =  ee.Geometry.MultiPoint(baikal_combined_points)

## Modis

In [None]:
result_15_read = pd.read_csv('Данные_валидации/modis_validation_ext_15.csv')
result_15_read

Unnamed: 0,Time,T,Lat,Lon,Image Time,Image T,MODIS,Day/Night
0,2021-05-27 11:02:10+08:00,2.22800,52.465443,106.586891,2021-05-27 11:06:00+08:00,2.35,Modis11_Terra,Day
1,2021-05-28 11:37:54+08:00,1.42000,52.052254,105.978180,2021-05-28 11:48:00+08:00,1.69,Modis11_Terra,Day
2,2021-06-03 10:58:21+08:00,2.44340,51.606743,104.731491,2021-06-03 11:12:00+08:00,3.05,Modis11_Terra,Day
3,2021-06-04 11:49:31+08:00,2.57930,51.626816,105.359306,2021-06-04 11:54:00+08:00,4.11,Modis11_Terra,Day
4,2021-06-05 10:50:54+08:00,10.08810,52.292828,106.244370,2021-06-05 11:00:00+08:00,10.57,Modis11_Terra,Day
...,...,...,...,...,...,...,...,...
807,2024-09-21 03:25:00+08:00,10.60505,51.876760,105.205520,2024-09-20 19:24:00+00:00,7.93,Modis21_Aqua,Night
808,2024-09-22 03:58:00+08:00,11.99560,52.100490,106.120810,2024-09-21 20:12:00+00:00,11.61,Modis21_Aqua,Night
809,2024-09-23 05:01:00+08:00,11.86775,52.575820,106.606930,2024-09-22 20:48:00+00:00,10.59,Modis21_Aqua,Night
810,2024-09-25 03:36:00+08:00,12.23610,53.521020,108.145930,2024-09-24 19:48:00+00:00,9.59,Modis21_Aqua,Night


In [None]:
point_distance = result_15_read[['Lon','Lat']].apply(lambda x: (ee.Geometry.Point(*x)).distance(baikal_points).getInfo(), axis=1)
result_15_with_distance = result_15_read.assign(distance_to_shore_m=pd.Series(point_distance).values)

In [None]:
result_15_with_distance.to_csv('Данные_валидации/modis_validation_ext_15_distance.csv', index=False)

Добавление колонки с разностью

In [4]:
result_15_read = pd.read_csv('Данные_валидации/modis_validation_ext_15_distance.csv')
result_15_read

Unnamed: 0,Time,T,Lat,Lon,Image Time,Image T,MODIS,Day/Night,distance_to_shore_m
0,2021-05-27 11:02:10+08:00,2.22800,52.465443,106.586891,2021-05-27 11:06:00+08:00,2.35,Modis11_Terra,Day,10263.662946
1,2021-05-28 11:37:54+08:00,1.42000,52.052254,105.978180,2021-05-28 11:48:00+08:00,1.69,Modis11_Terra,Day,13368.793886
2,2021-06-03 10:58:21+08:00,2.44340,51.606743,104.731491,2021-06-03 11:12:00+08:00,3.05,Modis11_Terra,Day,14584.820130
3,2021-06-04 11:49:31+08:00,2.57930,51.626816,105.359306,2021-06-04 11:54:00+08:00,4.11,Modis11_Terra,Day,3211.487032
4,2021-06-05 10:50:54+08:00,10.08810,52.292828,106.244370,2021-06-05 11:00:00+08:00,10.57,Modis11_Terra,Day,1620.903932
...,...,...,...,...,...,...,...,...,...
807,2024-09-21 03:25:00+08:00,10.60505,51.876760,105.205520,2024-09-20 19:24:00+00:00,7.93,Modis21_Aqua,Night,3760.460237
808,2024-09-22 03:58:00+08:00,11.99560,52.100490,106.120810,2024-09-21 20:12:00+00:00,11.61,Modis21_Aqua,Night,6311.388764
809,2024-09-23 05:01:00+08:00,11.86775,52.575820,106.606930,2024-09-22 20:48:00+00:00,10.59,Modis21_Aqua,Night,13472.346742
810,2024-09-25 03:36:00+08:00,12.23610,53.521020,108.145930,2024-09-24 19:48:00+00:00,9.59,Modis21_Aqua,Night,24556.604522


In [5]:
result_15_read["T diff"] = result_15_read["Image T"] - result_15_read["T"]
result_15_read.to_csv('Данные_валидации/modis_validation_ext_15_distance.csv', index=False)

Применения фильтра Хампеля

In [20]:
import numpy as np
from sktime.transformations.series.outlier_detection import HampelFilter

In [51]:
result_15_read = pd.read_csv('Данные_валидации/modis_validation_ext_15_distance.csv')
result_15_read

Unnamed: 0,Time,T,Lat,Lon,Image Time,Image T,MODIS,Day/Night,distance_to_shore_m,T diff
0,2021-05-27 11:02:10+08:00,2.22800,52.465443,106.586891,2021-05-27 11:06:00+08:00,2.35,Modis11_Terra,Day,10263.662946,0.12200
1,2021-05-28 11:37:54+08:00,1.42000,52.052254,105.978180,2021-05-28 11:48:00+08:00,1.69,Modis11_Terra,Day,13368.793886,0.27000
2,2021-06-03 10:58:21+08:00,2.44340,51.606743,104.731491,2021-06-03 11:12:00+08:00,3.05,Modis11_Terra,Day,14584.820130,0.60660
3,2021-06-04 11:49:31+08:00,2.57930,51.626816,105.359306,2021-06-04 11:54:00+08:00,4.11,Modis11_Terra,Day,3211.487032,1.53070
4,2021-06-05 10:50:54+08:00,10.08810,52.292828,106.244370,2021-06-05 11:00:00+08:00,10.57,Modis11_Terra,Day,1620.903932,0.48190
...,...,...,...,...,...,...,...,...,...,...
807,2024-09-21 03:25:00+08:00,10.60505,51.876760,105.205520,2024-09-20 19:24:00+00:00,7.93,Modis21_Aqua,Night,3760.460237,-2.67505
808,2024-09-22 03:58:00+08:00,11.99560,52.100490,106.120810,2024-09-21 20:12:00+00:00,11.61,Modis21_Aqua,Night,6311.388764,-0.38560
809,2024-09-23 05:01:00+08:00,11.86775,52.575820,106.606930,2024-09-22 20:48:00+00:00,10.59,Modis21_Aqua,Night,13472.346742,-1.27775
810,2024-09-25 03:36:00+08:00,12.23610,53.521020,108.145930,2024-09-24 19:48:00+00:00,9.59,Modis21_Aqua,Night,24556.604522,-2.64610


In [52]:
transformer = HampelFilter(window_length=7)
result_filter_outliers = transformer.fit_transform(result_15_read["T diff"])
result_filter_outliers

0      0.12200
1      0.27000
2      0.60660
3          NaN
4      0.48190
        ...   
807   -2.67505
808   -0.38560
809   -1.27775
810   -2.64610
811        NaN
Name: T diff, Length: 812, dtype: float64

In [54]:
result_15_read["T diff"] = result_filter_outliers
result_15_read = result_15_read[result_15_read["T diff"].notnull()]
result_15_read

Unnamed: 0,Time,T,Lat,Lon,Image Time,Image T,MODIS,Day/Night,distance_to_shore_m,T diff
0,2021-05-27 11:02:10+08:00,2.22800,52.465443,106.586891,2021-05-27 11:06:00+08:00,2.35,Modis11_Terra,Day,10263.662946,0.12200
1,2021-05-28 11:37:54+08:00,1.42000,52.052254,105.978180,2021-05-28 11:48:00+08:00,1.69,Modis11_Terra,Day,13368.793886,0.27000
2,2021-06-03 10:58:21+08:00,2.44340,51.606743,104.731491,2021-06-03 11:12:00+08:00,3.05,Modis11_Terra,Day,14584.820130,0.60660
4,2021-06-05 10:50:54+08:00,10.08810,52.292828,106.244370,2021-06-05 11:00:00+08:00,10.57,Modis11_Terra,Day,1620.903932,0.48190
5,2021-06-12 11:04:02+08:00,2.01110,53.814648,109.120308,2021-06-12 11:06:00+08:00,2.29,Modis11_Terra,Day,3250.812287,0.27890
...,...,...,...,...,...,...,...,...,...,...
806,2024-09-20 04:11:00+08:00,13.05815,51.608270,105.100390,2024-09-19 20:12:00+00:00,11.31,Modis21_Aqua,Night,5086.948500,-1.74815
807,2024-09-21 03:25:00+08:00,10.60505,51.876760,105.205520,2024-09-20 19:24:00+00:00,7.93,Modis21_Aqua,Night,3760.460237,-2.67505
808,2024-09-22 03:58:00+08:00,11.99560,52.100490,106.120810,2024-09-21 20:12:00+00:00,11.61,Modis21_Aqua,Night,6311.388764,-0.38560
809,2024-09-23 05:01:00+08:00,11.86775,52.575820,106.606930,2024-09-22 20:48:00+00:00,10.59,Modis21_Aqua,Night,13472.346742,-1.27775


In [56]:
result_15_read.to_csv('Данные_валидации/modis_validation_ext_15_distance_filtered.csv', index=False)

## Landsat

In [None]:
result_1_read = pd.read_csv('Данные_валидации/landsat_ext_validation1.csv')
result_1_read

Unnamed: 0,Time,T,Lat,Lon,Image Time,Image T,MODIS,Day/Night
0,2021-05-27 11:02:10+08:00,2.22800,52.465443,106.586891,2021-05-27 11:06:00+08:00,2.35,Modis11_Terra,Day
1,2021-05-28 11:37:54+08:00,1.42000,52.052254,105.978180,2021-05-28 11:48:00+08:00,1.69,Modis11_Terra,Day
2,2021-06-03 10:58:21+08:00,2.44340,51.606743,104.731491,2021-06-03 11:12:00+08:00,3.05,Modis11_Terra,Day
3,2021-06-04 11:49:31+08:00,2.57930,51.626816,105.359306,2021-06-04 11:54:00+08:00,4.11,Modis11_Terra,Day
4,2021-06-05 10:50:54+08:00,10.08810,52.292828,106.244370,2021-06-05 11:00:00+08:00,10.57,Modis11_Terra,Day
...,...,...,...,...,...,...,...,...
807,2024-09-21 03:25:00+08:00,10.60505,51.876760,105.205520,2024-09-20 19:24:00+00:00,7.93,Modis21_Aqua,Night
808,2024-09-22 03:58:00+08:00,11.99560,52.100490,106.120810,2024-09-21 20:12:00+00:00,11.61,Modis21_Aqua,Night
809,2024-09-23 05:01:00+08:00,11.86775,52.575820,106.606930,2024-09-22 20:48:00+00:00,10.59,Modis21_Aqua,Night
810,2024-09-25 03:36:00+08:00,12.23610,53.521020,108.145930,2024-09-24 19:48:00+00:00,9.59,Modis21_Aqua,Night


In [None]:
point_distance = result_1_read[['Lon','Lat']].apply(lambda x: (ee.Geometry.Point(*x)).distance(baikal_points).getInfo(), axis=1)
result_1_with_distance = result_1_read.assign(distance_to_shore_m=pd.Series(point_distance).values)

In [None]:
result_1_with_distance.to_csv('Данные_валидации/landsat_ext_validation1_distance.csv', index=False)

Добавление колонки с разностью

In [None]:
result_1_read = pd.read_csv('Данные_валидации/landsat_ext_validation1_distance.csv')
result_1_read

Unnamed: 0,Time,T,Lat,Lon,Image Time,Image T,MODIS,Day/Night,distance_to_shore_m
0,2021-05-27 11:02:10+08:00,2.22800,52.465443,106.586891,2021-05-27 11:06:00+08:00,2.35,Modis11_Terra,Day,10263.662946
1,2021-05-28 11:37:54+08:00,1.42000,52.052254,105.978180,2021-05-28 11:48:00+08:00,1.69,Modis11_Terra,Day,13368.793886
2,2021-06-03 10:58:21+08:00,2.44340,51.606743,104.731491,2021-06-03 11:12:00+08:00,3.05,Modis11_Terra,Day,14584.820130
3,2021-06-04 11:49:31+08:00,2.57930,51.626816,105.359306,2021-06-04 11:54:00+08:00,4.11,Modis11_Terra,Day,3211.487032
4,2021-06-05 10:50:54+08:00,10.08810,52.292828,106.244370,2021-06-05 11:00:00+08:00,10.57,Modis11_Terra,Day,1620.903932
...,...,...,...,...,...,...,...,...,...
807,2024-09-21 03:25:00+08:00,10.60505,51.876760,105.205520,2024-09-20 19:24:00+00:00,7.93,Modis21_Aqua,Night,3760.460237
808,2024-09-22 03:58:00+08:00,11.99560,52.100490,106.120810,2024-09-21 20:12:00+00:00,11.61,Modis21_Aqua,Night,6311.388764
809,2024-09-23 05:01:00+08:00,11.86775,52.575820,106.606930,2024-09-22 20:48:00+00:00,10.59,Modis21_Aqua,Night,13472.346742
810,2024-09-25 03:36:00+08:00,12.23610,53.521020,108.145930,2024-09-24 19:48:00+00:00,9.59,Modis21_Aqua,Night,24556.604522


In [None]:
result_1_read["T diff"] = result_1_read["Image T"] - result_1_read["T"]
result_1_read.to_csv('Данные_валидации/landsat_ext_validation1_distance.csv', index=False)

Применения фильтра Хампеля

In [None]:
import numpy as np
from sktime.transformations.series.outlier_detection import HampelFilter

In [None]:
result_15_read = pd.read_csv('Данные_валидации/modis_validation_ext_15_distance.csv')
result_15_read

Unnamed: 0,Time,T,Lat,Lon,Image Time,Image T,MODIS,Day/Night,distance_to_shore_m,T diff
0,2021-05-27 11:02:10+08:00,2.22800,52.465443,106.586891,2021-05-27 11:06:00+08:00,2.35,Modis11_Terra,Day,10263.662946,0.12200
1,2021-05-28 11:37:54+08:00,1.42000,52.052254,105.978180,2021-05-28 11:48:00+08:00,1.69,Modis11_Terra,Day,13368.793886,0.27000
2,2021-06-03 10:58:21+08:00,2.44340,51.606743,104.731491,2021-06-03 11:12:00+08:00,3.05,Modis11_Terra,Day,14584.820130,0.60660
3,2021-06-04 11:49:31+08:00,2.57930,51.626816,105.359306,2021-06-04 11:54:00+08:00,4.11,Modis11_Terra,Day,3211.487032,1.53070
4,2021-06-05 10:50:54+08:00,10.08810,52.292828,106.244370,2021-06-05 11:00:00+08:00,10.57,Modis11_Terra,Day,1620.903932,0.48190
...,...,...,...,...,...,...,...,...,...,...
807,2024-09-21 03:25:00+08:00,10.60505,51.876760,105.205520,2024-09-20 19:24:00+00:00,7.93,Modis21_Aqua,Night,3760.460237,-2.67505
808,2024-09-22 03:58:00+08:00,11.99560,52.100490,106.120810,2024-09-21 20:12:00+00:00,11.61,Modis21_Aqua,Night,6311.388764,-0.38560
809,2024-09-23 05:01:00+08:00,11.86775,52.575820,106.606930,2024-09-22 20:48:00+00:00,10.59,Modis21_Aqua,Night,13472.346742,-1.27775
810,2024-09-25 03:36:00+08:00,12.23610,53.521020,108.145930,2024-09-24 19:48:00+00:00,9.59,Modis21_Aqua,Night,24556.604522,-2.64610


In [None]:
transformer = HampelFilter(window_length=7)
result_filter_outliers = transformer.fit_transform(result_15_read["T diff"])
result_filter_outliers

0      0.12200
1      0.27000
2      0.60660
3          NaN
4      0.48190
        ...   
807   -2.67505
808   -0.38560
809   -1.27775
810   -2.64610
811        NaN
Name: T diff, Length: 812, dtype: float64

In [None]:
result_15_read["T diff"] = result_filter_outliers
result_15_read = result_15_read[result_15_read["T diff"].notnull()]
result_15_read

Unnamed: 0,Time,T,Lat,Lon,Image Time,Image T,MODIS,Day/Night,distance_to_shore_m,T diff
0,2021-05-27 11:02:10+08:00,2.22800,52.465443,106.586891,2021-05-27 11:06:00+08:00,2.35,Modis11_Terra,Day,10263.662946,0.12200
1,2021-05-28 11:37:54+08:00,1.42000,52.052254,105.978180,2021-05-28 11:48:00+08:00,1.69,Modis11_Terra,Day,13368.793886,0.27000
2,2021-06-03 10:58:21+08:00,2.44340,51.606743,104.731491,2021-06-03 11:12:00+08:00,3.05,Modis11_Terra,Day,14584.820130,0.60660
4,2021-06-05 10:50:54+08:00,10.08810,52.292828,106.244370,2021-06-05 11:00:00+08:00,10.57,Modis11_Terra,Day,1620.903932,0.48190
5,2021-06-12 11:04:02+08:00,2.01110,53.814648,109.120308,2021-06-12 11:06:00+08:00,2.29,Modis11_Terra,Day,3250.812287,0.27890
...,...,...,...,...,...,...,...,...,...,...
806,2024-09-20 04:11:00+08:00,13.05815,51.608270,105.100390,2024-09-19 20:12:00+00:00,11.31,Modis21_Aqua,Night,5086.948500,-1.74815
807,2024-09-21 03:25:00+08:00,10.60505,51.876760,105.205520,2024-09-20 19:24:00+00:00,7.93,Modis21_Aqua,Night,3760.460237,-2.67505
808,2024-09-22 03:58:00+08:00,11.99560,52.100490,106.120810,2024-09-21 20:12:00+00:00,11.61,Modis21_Aqua,Night,6311.388764,-0.38560
809,2024-09-23 05:01:00+08:00,11.86775,52.575820,106.606930,2024-09-22 20:48:00+00:00,10.59,Modis21_Aqua,Night,13472.346742,-1.27775


In [None]:
result_15_read.to_csv('Данные_валидации/modis_validation_ext_15_distance_filtered.csv', index=False)