In [None]:
import pandas as pd
from sklearn.metrics import mean_squared_error, root_mean_squared_error, r2_score

from scipy.stats import theilslopes, kendalltau, tstd

In [None]:
def get_metrics_validation(result_in_situ, result_image):
   n = result_in_situ.shape[0]
   bias = (result_image - result_in_situ).mean()
   mse = mean_squared_error(result_in_situ, result_image)
   rmse = root_mean_squared_error(result_in_situ, result_image)
   slope, _, _, _ = theilslopes(result_in_situ, result_image)
   r2 = r2_score(result_in_situ, result_image)
   kendal = kendalltau(result_in_situ, result_image).statistic
   sko = tstd(abs(result_image - result_in_situ))

   return [n, bias, rmse, mse, slope, r2, kendal, sko]

Функции для MODIS

In [None]:
def split_modis(df):
   """
   Разбивает датафрейм на словарь "категория":tuple[температура_in_situ, температура_на_снимке] 
   """
   df_M11 = df[(df['MODIS'] == 'Modis11_Terra') | (df['MODIS'] == 'Modis11_Aqua')]
   df_M21 = df[(df['MODIS'] == 'Modis21_Terra') | (df['MODIS'] == 'Modis21_Aqua')]

   ## M11 & M21

   df_in_situ_full = df['T']
   df_image_full = df['Image T']

   df_in_situ_day = df[df['Day/Night'] == 'Day']['T']
   df_image_day = df[df['Day/Night'] == 'Day']['Image T']

   df_in_situ_night = df[df['Day/Night'] == 'Night']['T']
   df_image_night = df[df['Day/Night'] == 'Night']['Image T']

   ## M11

   df_in_situ_full_M11 = df_M11['T']
   df_image_full_M11 = df_M11['Image T']

   df_in_situ_day_M11 = df_M11[df_M11['Day/Night'] == 'Day']['T']
   df_image_day_M11 = df_M11[df_M11['Day/Night'] == 'Day']['Image T']

   df_in_situ_night_M11 = df_M11[df_M11['Day/Night'] == 'Night']['T']
   df_image_night_M11 = df_M11[df_M11['Day/Night'] == 'Night']['Image T']

   ## M21

   df_in_situ_full_M21 = df_M21['T']
   df_image_full_M21 = df_M21['Image T']

   df_in_situ_day_M21 = df_M21[df_M21['Day/Night'] == 'Day']['T']
   df_image_day_M21 = df_M21[df_M21['Day/Night'] == 'Day']['Image T']

   df_in_situ_night_M21 = df_M21[df_M21['Day/Night'] == 'Night']['T']
   df_image_night_M21 = df_M21[df_M21['Day/Night'] == 'Night']['Image T']

   return {"M11_full": (df_in_situ_full_M11, df_image_full_M11), "M11_day": (df_in_situ_day_M11, df_image_day_M11), "M11_night": (df_in_situ_night_M11, df_image_night_M11), "M21_full": (df_in_situ_full_M21, df_image_full_M21), "M21_day": (df_in_situ_day_M21, df_image_day_M21), "M21_night": (df_in_situ_night_M21, df_image_night_M21), "full": (df_in_situ_full, df_image_full), "day": (df_in_situ_day, df_image_day), "night": (df_in_situ_night, df_image_night)}

## Считать сохранённые данные для валидации 

In [None]:
landsat_df = pd.read_csv('Данные_валидации/landsat_ext_validation1_distance.csv')
landsat_in_situ = landsat_df['T']
landsat_image = landsat_df['Image T']

landsat_filtered_df = pd.read_csv('Данные_валидации/landsat_ext_validation1_distance_filtered.csv')
landsat_filtered_in_situ = landsat_filtered_df['T']
landsat_filtered_image = landsat_filtered_df['Image T']

modis_df = pd.read_csv('Данные_валидации/modis_ext_validation15_distance.csv')
modis_in_situ = modis_df['T']
modis_image = modis_df['Image T']

modis_filtered_df = pd.read_csv('Данные_валидации/modis_ext_validation15_distance_filtered.csv')
modis_filtered_in_situ = modis_filtered_df['T']
modis_filtered_image = modis_filtered_df['Image T']

Самая общая таблица

In [None]:
statistics_df = pd.DataFrame(columns=["Product", "N", "Bias", "RMSE", "MSE", "Sen's slope", "R2", "Kendal", "SKO"])

modis_dfs = split_modis(modis_df)
m11_full = modis_dfs["M11_full"]
m11_day = modis_dfs["M11_day"]
m11_night = modis_dfs["M11_night"]
m21_full = modis_dfs["M21_full"]
m21_day = modis_dfs["M21_day"]
m21_night = modis_dfs["M21_night"]
full = modis_dfs["full"]
day = modis_dfs["day"]
night = modis_dfs["night"]
statistics_df.loc[0] = ["M11_full"] + get_metrics_validation(m11_full[0], m11_full[1])
statistics_df.loc[1] = ["M11_day"] + get_metrics_validation(m11_day[0], m11_day[1])
statistics_df.loc[2] = ["M11_night"] + get_metrics_validation(m11_night[0], m11_night[1])

statistics_df.loc[3] = ["M21_full"] + get_metrics_validation(m21_full[0], m21_full[1])
statistics_df.loc[4] = ["M21_day"] + get_metrics_validation(m21_day[0], m21_day[1])
statistics_df.loc[5] = ["M21_night"] + get_metrics_validation(m21_night[0], m21_night[1])

statistics_df.loc[6] = ["full"] + get_metrics_validation(full[0], full[1])
statistics_df.loc[7] = ["day"] + get_metrics_validation(day[0], day[1])
statistics_df.loc[8] = ["night"] + get_metrics_validation(night[0], night[1])

statistics_df

# Исследование гипотез

Поиск оптимального расстояние до берега для MODIS, чтобы было не слишком мало снимков (большое расстояние) и выбросов (малое расстояние)

Результат бессмысленный 

In [None]:
def get_combination_statistics_filtered(df, combination):
   statistics_df = pd.DataFrame(columns=["distance_m", "MSE", "MBE", "R2"])
   for distance in range(100, 2100, 100):
      df_filtered = df[df["distance_to_shore_m"] >= distance]
      all_combinations = get_statistics(df_filtered)
      certain_combination = all_combinations[all_combinations["Product"] == combination]
      certain_combination = certain_combination.drop(columns=["Product"])
      certain_combination["distance_m"] = distance
      index = distance // 100 - 1
      statistics_df.loc[index] = certain_combination.iloc[0]
   statistics_df.reset_index(inplace=True)
   return statistics_df.drop(columns=["index"])

In [None]:
get_combination_statistics_filtered(result_15_read, "M11_full")

Unnamed: 0,distance_m,MSE,MBE,R2
0,100.0,2.656809,-0.644845,0.894074
1,200.0,2.60734,-0.634175,0.897063
2,300.0,2.464665,-0.674054,0.901832
3,400.0,2.447135,-0.674781,0.903055
4,500.0,2.373106,-0.674526,0.905581
5,600.0,2.384136,-0.677766,0.905268
6,700.0,2.357139,-0.689823,0.9066
7,800.0,2.349125,-0.692053,0.907374
8,900.0,2.349125,-0.692053,0.907374
9,1000.0,2.350468,-0.690043,0.907394


In [None]:
def get_best_limit_distance_series(df, combination):
   statistics_df = get_combination_statistics_filtered(df, combination)
   statistics_df["ABS_MBE"] = statistics_df["MBE"].apply(lambda x: abs(x))
   minimums = statistics_df.idxmin()
   maximums = statistics_df.idxmax()
   min_mbe = statistics_df.iloc[minimums["ABS_MBE"]]["MBE"]
   min_mse = statistics_df.iloc[minimums["MSE"]]["MSE"]
   max_r2 = statistics_df.iloc[maximums["R2"]]["R2"]
   min_mbe_dst = statistics_df.iloc[minimums["ABS_MBE"]]["distance_m"]
   min_mse_dst = statistics_df.iloc[minimums["MSE"]]["distance_m"]
   max_r2_dst = statistics_df.iloc[maximums["R2"]]["distance_m"]
   result = {"combination": combination, "Min MBE": min_mbe, "Min MBE distance" : min_mbe_dst, "Min MSE": min_mse, "Min MSE distance" : min_mse_dst, "Max R2": max_r2, "Max R2 distance" : max_r2_dst}
   return pd.Series(result)

In [None]:
def get_best_distances_df(df):
   statistics_df = pd.DataFrame(columns=["combination", "Min MBE", "Min MBE distance", "Min MSE", "Min MSE distance", "Max R2", "Max R2 distance"])
   statistics_df.loc[0] = get_best_limit_distance_series(result_15_read, "M11_full")
   statistics_df.loc[1] = get_best_limit_distance_series(result_15_read, "M11_day")
   statistics_df.loc[2] = get_best_limit_distance_series(result_15_read, "M11_night")
   statistics_df.loc[3] = get_best_limit_distance_series(result_15_read, "M21_full")
   statistics_df.loc[4] = get_best_limit_distance_series(result_15_read, "M21_day")
   statistics_df.loc[5] = get_best_limit_distance_series(result_15_read, "M21_night")
   statistics_df.loc[6] = get_best_limit_distance_series(result_15_read, "full")
   statistics_df.loc[7] = get_best_limit_distance_series(result_15_read, "day")
   statistics_df.loc[8] = get_best_limit_distance_series(result_15_read, "night")
   return statistics_df

In [None]:
get_best_distances_df(result_15_read)

Unnamed: 0,combination,Min MBE,Min MBE distance,Min MSE,Min MSE distance,Max R2,Max R2 distance
0,M11_full,-0.634175,200.0,2.349125,800.0,0.907394,1000.0
1,M11_day,-0.513114,200.0,2.821907,400.0,0.889822,400.0
2,M11_night,-0.757942,200.0,1.79723,1000.0,0.92983,1000.0
3,M21_full,0.047193,100.0,5.179943,1900.0,0.790054,2000.0
4,M21_day,0.535986,1500.0,5.821006,1500.0,0.772992,2000.0
5,M21_night,-0.507882,1900.0,3.968729,1900.0,0.820607,1900.0
6,full,-0.274948,1100.0,3.761633,1900.0,0.850815,1900.0
7,day,0.01386,1500.0,4.413911,1500.0,0.826535,1700.0
8,night,-0.688726,400.0,2.782248,1900.0,0.885012,1900.0


In [None]:
with open("Данные_валидации/Статистики/modis_aqua_terra_m11_m21.txt", "a") as at12, open("Данные_валидации/Статистики/modis_aqua_terra_m11.txt", "a") as at1, open("Данные_валидации/Статистики/modis_aqua_terra_m21.txt", "a") as at2, open("Данные_валидации/Статистики/modis_aqua_m11_m21.txt", "a") as a12, open("Данные_валидации/Статистики/modis_aqua_m11.txt", "a") as a1, open("Данные_валидации/Статистики/modis_aqua_m21.txt", "a") as a2, open("Данные_валидации/Статистики/modis_terra_m11_m21.txt", "a") as t12, open("Данные_валидации/Статистики/modis_terra_m11.txt", "a") as t1, open("Данные_валидации/Статистики/modis_terra_m21.txt", "a") as t2:
   for distance_limit_m in range(100, 2100, 100):
      
      # Aqua
      ## M11
      print(f"Distance from shore: {distance_limit_m} m\n", file=a1)

      print("Day+Night\n", file=a1)
      get_metrics_validation(result_in_situ_full_aqua_M11, result_image_full_aqua_M11, a1)

      print("Day\n", file=a1)
      get_metrics_validation(result_in_situ_day_aqua_M11, result_image_day_aqua_M11, a1)

      print("Night\n", file=a1)
      get_metrics_validation(result_in_situ_night_aqua_M11, result_image_night_aqua_M11, a1)
      ## M21
      print(f"Distance from shore: {distance_limit_m} m\n", file=a2)

      print("Day+Night\n", file=a2)
      get_metrics_validation(result_in_situ_full_aqua_M21, result_image_full_aqua_M21, a2)

      print("Day\n", file=a2)
      get_metrics_validation(result_in_situ_day_aqua_M21, result_image_day_aqua_M21, a2)

      print("Night\n", file=a2)
      get_metrics_validation(result_in_situ_night_aqua_M21, result_image_night_aqua_M21, a2)
      ## M11+M21
      print(f"Distance from shore: {distance_limit_m} m\n", file=a12)
      print("Day+Night\n", file=a12)
      get_metrics_validation(result_in_situ_full_aqua, result_image_full_aqua, a12)

      print("Day\n", file=a12)
      get_metrics_validation(result_in_situ_day_aqua, result_image_day_aqua, a12)

      print("Night\n", file=a12)
      get_metrics_validation(result_in_situ_night_aqua, result_image_night_aqua, a12)
      
      # Terra
      ## M11
      print(f"Distance from shore: {distance_limit_m} m\n", file=t1)

      print("Day+Night\n", file=t1)
      get_metrics_validation(result_in_situ_full_terra_M11, result_image_full_terra_M11, t1)

      print("Day\n", file=t1)
      get_metrics_validation(result_in_situ_day_terra_M11, result_image_day_terra_M11, t1)

      print("Night\n", file=t1)
      get_metrics_validation(result_in_situ_night_terra_M11, result_image_night_terra_M11, t1)
      ## M21
      print(f"Distance from shore: {distance_limit_m} m\n", file=t2)

      print("Day+Night\n", file=t2)
      get_metrics_validation(result_in_situ_full_terra_M21, result_image_full_terra_M21, t2)

      print("Day\n", file=t2)
      get_metrics_validation(result_in_situ_day_terra_M21, result_image_day_terra_M21, t2)

      print("Night\n", file=t2)
      get_metrics_validation(result_in_situ_night_terra_M21, result_image_night_terra_M21, t2)
      ## M11+M21
      print(f"Distance from shore: {distance_limit_m} m\n", file=t12)
      print("Day+Night\n", file=t12)
      get_metrics_validation(result_in_situ_full_terra, result_image_full_terra, t12)

      print("Day\n", file=t12)
      get_metrics_validation(result_in_situ_day_terra, result_image_day_terra, t12)

      print("Night\n", file=t12)
      get_metrics_validation(result_in_situ_night_terra, result_image_night_terra, t12)
      
      # Aqua + Terra
      ## M11
      print(f"Distance from shore: {distance_limit_m} m\n", file=at1)

      print("Day+Night\n", file=at1)
      get_metrics_validation(result_in_situ_full_M11, result_image_full_M11, at1)

      print("Day\n", file=at1)
      get_metrics_validation(result_in_situ_day_M11, result_image_day_M11, at1)

      print("Night\n", file=at1)
      get_metrics_validation(result_in_situ_night_M11, result_image_night_M11, at1)
      ## M21
      print(f"Distance from shore: {distance_limit_m} m\n", file=at2)

      print("Day+Night\n", file=at2)
      get_metrics_validation(result_in_situ_full_M21, result_image_full_M21, at2)

      print("Day\n", file=at2)
      get_metrics_validation(result_in_situ_day_M21, result_image_day_M21, at2)

      print("Night\n", file=at2)
      get_metrics_validation(result_in_situ_night_M21, result_image_night_M21, at2)
      ## M11+M21
      print(f"Distance from shore: {distance_limit_m} m\n", file=at12)
      print("Day+Night\n", file=at12)
      get_metrics_validation(result_in_situ_full, result_image_full, at12)

      print("Day\n", file=at12)
      get_metrics_validation(result_in_situ_day, result_image_day, at12)

      print("Night\n", file=at12)
      get_metrics_validation(result_in_situ_night, result_image_night, at12)