In [None]:
import numpy as np
from eolearn.core import EOPatch
from sentinelhub import SHConfig
from fs_s3fs import S3FS
import pandas as pd
from matplotlib import pyplot as plt 
from collections import defaultdict
from datetime import datetime
import seaborn as sns
from tqdm.auto import tqdm
import os 

In [None]:
config = SHConfig()
config.aws_access_key_id = ''
config.aws_secret_access_key = ''

# Per timestamp

In [None]:

filesystem = S3FS(bucket_name='', 
                      aws_access_key_id=config.aws_access_key_id, 
                      aws_secret_access_key=config.aws_secret_access_key)

In [None]:
NPZ_LOC = ''

In [None]:
metadata_ms4 = pd.read_parquet(filesystem.openbin('metadata/deimos_ms4_metadata.pq'))
metadata_pan = pd.read_parquet(filesystem.openbin('metadata/deimos_pan_metadata.pq'))

In [None]:
metadata_ms4['country'] = metadata_ms4.Projection_OGCWKT.apply(lambda x: 'Lithuania' if '34N' in x else 'Cyprus')

In [None]:
timestamp_data_map = defaultdict(list)
for npz_file in tqdm(chosen_samples):
    npz = np.load(filesystem.openbin(f'{NPZ_LOC}/{npz_file}'), allow_pickle=True)
    timestamp_data_map[npz['timetamps_deimos'].item()].append(npz['features'])

In [None]:
df_dicts = [] 
for ts, ts_values in timestamp_data_map.items():
    joined = np.concatenate(ts_values)
    mean = np.mean(joined, axis=(0, 1, 2))
    median = np.median(joined, axis=(0, 1, 2))
    std = np.std(joined, axis=(0, 1, 2))
    
    df_dicts.append({'timestamp': ts, 'mean': mean, 'median': median, 'std': std})
df_norm_s2 = pd.DataFrame(df_dicts)

In [None]:
for i in range(0, 4): 
    df_norm_s2[f'MEAN_{i}'] = df_norm_s2['mean'].apply(lambda x: x[i])
    df_norm_s2[f'STD_{i}'] = df_norm_s2['std'].apply(lambda x: x[i])
    df_norm_s2[f'MEDIAN_{i}'] = df_norm_s2['median'].apply(lambda x: x[i])

In [None]:
df_norm_s2_per_timestamp = df_norm_s2.set_index('timestamp').join(metadata_ms4[['START_TIME', 'country']].set_index('START_TIME')).reset_index()

In [None]:
fg = sns.FacetGrid(data=df_norm_s2_per_timestamp, hue='country', aspect=2.5, size=6)
fg.map(plt.scatter, 'timestamp', 'MEDIAN_2').add_legend()


# Per country

In [None]:
timestamp_country_map = {ts: country for ts,country in metadata_ms4[['START_TIME', 'country']].values}

In [None]:
country_data_map = defaultdict(list)

In [None]:
NPZ_LOC = ''

In [None]:
sample_filenames = os.listdir(NPZ_LOC)
chosen_samples = np.random.choice(sample_filenames, int(len(sample_filenames)*0.1), replace=False)

In [None]:
for npz_file in tqdm(chosen_samples):
    npz = np.load(f'{NPZ_LOC}/{npz_file}', allow_pickle=True)
                                    
    country = timestamp_country_map[npz['timetamps_deimos'].item()]
    country_data_map[country].append(npz['features'])

In [None]:
df_dicts = [] 
for country, country_values in country_data_map.items():
    joined = np.concatenate(country_values)
    mean = np.mean(joined, axis=(0, 1, 2))
    median = np.median(joined, axis=(0, 1, 2))
    std = np.std(joined, axis=(0, 1, 2))
    
    df_dicts.append({'country': country, 
                     'mean_0': mean[0], 'mean_1': mean[1], 'mean_2': mean[2], 'mean_3': mean[3],
                     'median_0': median[0], 'median_1': median[1], 'median_2': median[2], 'median_3': median[3],
                     'std_0': std[0], 'std_1': std[1], 'std_2': std[2], 'std_3': std[3]})

df_norm_s2_per_country = pd.DataFrame(df_dicts)

In [None]:
df_norm_s2_per_country

In [None]:
df_norm_s2_per_country.to_parquet(filesystem.openbin('metadata/s2_norm_per_country.pq', 'wb'))