In [1]:
import pandas as pd
import numpy as np
import traceback

from esa_snappy import ProductIO
from esa_snappy import GeoPos
from esa_snappy import PixelPos

from glob import glob
from tqdm import tqdm
import os
import pickle

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
import pandas as pd
from tqdm import tqdm

# Suppress specific warnings
warnings.filterwarnings("ignore", category=FutureWarning, module='pandas', lineno=11)
warnings.filterwarnings("ignore", category=pd.errors.SettingWithCopyWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=pd.errors.SettingWithCopyWarning)



INFO: org.esa.snap.core.gpf.operators.tooladapter.ToolAdapterIO: Initializing external tool adapters
INFO: org.esa.snap.core.util.EngineVersionCheckActivator: Please check regularly for new updates for the best SNAP experience.


In [2]:
## Get ID and Year 
years = range(2021, 2024)
numbers = range(1, 31)
year_id_ = []
for year in years:
    for number in numbers:
        year_id_.append(f"{year}_{str(number).zfill(2)}")
year_id_[:5]

vh_list = [f"VH_{i}" for i in range(30, -1, -1)]
print(vh_list)

vv_list = [f"VV_{i}" for i in range(30, -1, -1)]
print(vv_list)


['VH_30', 'VH_29', 'VH_28', 'VH_27', 'VH_26', 'VH_25', 'VH_24', 'VH_23', 'VH_22', 'VH_21', 'VH_20', 'VH_19', 'VH_18', 'VH_17', 'VH_16', 'VH_15', 'VH_14', 'VH_13', 'VH_12', 'VH_11', 'VH_10', 'VH_9', 'VH_8', 'VH_7', 'VH_6', 'VH_5', 'VH_4', 'VH_3', 'VH_2', 'VH_1', 'VH_0']
['VV_30', 'VV_29', 'VV_28', 'VV_27', 'VV_26', 'VV_25', 'VV_24', 'VV_23', 'VV_22', 'VV_21', 'VV_20', 'VV_19', 'VV_18', 'VV_17', 'VV_16', 'VV_15', 'VV_14', 'VV_13', 'VV_12', 'VV_11', 'VV_10', 'VV_9', 'VV_8', 'VV_7', 'VV_6', 'VV_5', 'VV_4', 'VV_3', 'VV_2', 'VV_1', 'VV_0']


In [3]:
## Prep DF
df_all_wide = pd.DataFrame(columns=["idpoint"]+year_id_)
# df_all_wide

df_bridging_citra = pd.read_excel("/data/ksa/03_Sampling/bridging.xlsx", dtype='object', sheet_name="periode_to_date")
# df_bridging_citra.head(2)

df_bridging_ksa = pd.read_excel("/data/ksa/03_Sampling/bridging.xlsx", dtype='object')
# df_bridging_ksa.head(2)

In [4]:
def get_df_values(mgrs):
    with open('/data/ksa/03_Sampling/data/32/sampling_'+mgrs+'.pkl', 'rb') as f:
        df_values = pickle.load(f)    
    df_values["VH"] = df_values.Sigma0_VH_db
    df_values["VV"] = df_values.Sigma0_VV_db
    df_values.drop(["Sigma0_VH_db","Sigma0_VV_db"],axis=True, inplace=True)
    return df_values

def reformat_to_wide(df_values, band, df_bridging_citra, df_all_wide):
    
    df_values["periode_start"] = df_values.periode.str[4:8]
    df_values["periode_end"] = df_values.periode.str[-4:]
    df_values["is_kabisat"] = 0
    
    df_values = df_values.merge(df_bridging_citra, left_on=['periode_start','periode_end','is_kabisat'],  right_on=['periode_start','periode_end','is_kabisat'])
    df_values['year_id_per_image'] = df_values.periode.str[:4]+"_"+df_values.id_per_image.astype("str").str.zfill(2)
    
    df_VH_wide_res = df_values.sort_values('year_id_per_image').pivot(index='idpoint', columns='year_id_per_image', values=band).reset_index()
    df_VH_wide_res = pd.concat([df_all_wide, df_VH_wide_res], axis=0)
    df_VH_wide_res['idsubsegmen'] = df_VH_wide_res.idpoint.str[:-3]
    return df_VH_wide_res

In [6]:
df_label = pd.read_csv("/data/raw/processed/relabelled_data_ksa.csv")

df_label = df_label.merge(df_bridging_ksa.query("is_kabisat == 0"), how='left', left_on='bulan', right_on='obs_in_a_year')
df_label['year_id_per_image'] = "20"+df_label.tahun.astype("str")+"_"+df_label.id_per_image.astype("str").str.zfill(2)
df_label.head()

Unnamed: 0,idsegmen,idsubsegmen,tahun,bulan,obs,nth,id_x,class,is_kabisat,obs_in_a_year,id_per_image,periode_start,periode_end,year_id_per_image
0,110101001,A1,22,1,8.0,0,110101001A1,NV,0,1,3,125,205,2022_03
1,110101001,A2,22,1,4.0,0,110101001A2,H,0,1,3,125,205,2022_03
2,110101001,A3,22,1,4.0,0,110101001A3,H,0,1,3,125,205,2022_03
3,110101001,B1,22,1,8.0,0,110101001B1,NV,0,1,3,125,205,2022_03
4,110101001,B2,22,1,4.0,0,110101001B2,H,0,1,3,125,205,2022_03


In [7]:
year_id_per_image_ = df_label.year_id_per_image.unique()
year_id_per_image_


array(['2022_03', '2022_05', '2022_08', '2022_10', '2022_13', '2022_15',
       '2022_18', '2022_20', '2022_23', '2022_25', '2022_28', '2022_30',
       '2023_03', '2023_05', '2023_08', '2023_10', '2023_13', '2023_15',
       '2023_18', '2023_20', '2023_23', '2023_25', '2023_28', '2023_30'],
      dtype=object)

In [9]:
mgrs_ = ['48MXT', '48MYT', '48MXU', '48MXS', '48MYS', '48MZT', '48MZS',
       '49MAN', '49MAM', '49MBM', '49MBN', '49MAP', '49MBP', '48MZU',
       '48MYU']

In [10]:
for mgrs in tqdm(mgrs_):
    df_values = get_df_values(mgrs)
    df_VH_wide = reformat_to_wide(df_values, "VH", df_bridging_citra, df_all_wide)
    df_VV_wide = reformat_to_wide(df_values, "VV", df_bridging_citra, df_all_wide)

    df_full = df_VH_wide[['idpoint','idsubsegmen']].copy()
    # df_full = df_VV_wide[['idpoint','idsubsegmen']].copy()
    df_full = df_full.merge(df_label[['id_x','tahun', 'bulan', 'obs', 'class', 'year_id_per_image']], how="left", left_on = "idsubsegmen", right_on="id_x")

    df_wide_full = pd.DataFrame()
    for yi in year_id_per_image_:
        df_tmp = df_full.loc[df_full.year_id_per_image == yi]
        df_tmp.loc[:,'MGRS'] = mgrs
        ind = df_VH_wide.columns.to_list().index(yi)+1
        # ind = df_VV_wide.columns.to_list().index(yi)+1
        df_wide_tmp = pd.concat([df_VH_wide.iloc[:,0:1], df_VH_wide.iloc[:,ind-31:ind]], axis=1)
        # df_wide_tmp = pd.concat([df_VV_wide.iloc[:,0:1], df_VV_wide.iloc[:,ind-31:ind]], axis=1)
        df_wide_res = df_tmp.merge(df_wide_tmp, how='left', left_on='idpoint', right_on='idpoint')
        df_wide_res.columns.values[-31:] = vh_list
        # df_wide_res.columns.values[-31:] = vv_list
        # df_wide_res.drop('id_x', axis=1, inplace=True)
        df_wide_full = pd.concat([df_wide_full,df_wide_res], axis=0) 
        # break
    # break
    with open('/data/ksa/03_Sampling/data-wide/32/wide_data_'+mgrs+'.pkl', 'wb') as f:
        pickle.dump(df_wide_full, f)
    # df_wide_full
    # break

100%|██████████| 15/15 [10:24<00:00, 41.64s/it]


In [11]:
recode_dict = {
    'V1': '1.0',
    'V2': '2.0',
    'G': '3.0',
    'H': '1.0',
    'PL': '5.0',
    'P': '99.0',
    'NP': '6.0',
    'NV': '7.0',
    'BL': '0.0'
}

mgrs_ = ['48MXT', '48MYT', '48MXU', '48MXS', '48MYS', '48MZT', '48MZS',
       '49MAN', '49MAM', '49MBM', '49MBN', '49MAP', '49MBP', '48MZU',
       '48MYU']
# for mgrs in tqdm(mgrs_):
mgrs


'48MYU'

In [12]:
for mgrs in tqdm(mgrs_):
    with open('/data/ksa/03_Sampling/data-wide/32/wide_data_'+mgrs+'.pkl', 'rb') as f:
        df_sampling = pickle.load(f)
    
    df_sampling['observation'] = df_sampling['class'].replace(recode_dict)
    df_sampling = df_sampling.loc[df_sampling.observation != '99.0'] 
    df_sampling['idsegment'] = df_sampling['idsubsegmen'].str[:-2]
    df_sampling.rename(columns={'idsubsegmen': 'idsubsegment'}, inplace=True)
    df_sampling.rename(columns={'bulan': 'nth'}, inplace=True)
    df_sampling.rename(columns={'year_id_per_image': 'periode'}, inplace=True)
    
    df_sampling = df_sampling[['idpoint','idsubsegment','idsegment','nth','periode',
     'observation', 'MGRS', 'VH_30', 'VH_29', 'VH_28', 'VH_27', 'VH_26', 'VH_25',
     'VH_24', 'VH_23', 'VH_22', 'VH_21', 'VH_20', 'VH_19', 'VH_18', 'VH_17',
     'VH_16', 'VH_15', 'VH_14', 'VH_13', 'VH_12', 'VH_11', 'VH_10', 'VH_9',
     'VH_8', 'VH_7', 'VH_6', 'VH_5', 'VH_4', 'VH_3', 'VH_2', 'VH_1', 'VH_0']]

    # df_sampling = df_sampling[['idpoint','idsubsegment','idsegment','nth','periode',
    #  'observation', 'MGRS', 'VV_30', 'VV_29', 'VV_28', 'VV_27', 'VV_26', 'VV_25',
    #  'VV_24', 'VV_23', 'VV_22', 'VV_21', 'VV_20', 'VV_19', 'VV_18', 'VV_17',
    #  'VV_16', 'VV_15', 'VV_14', 'VV_13', 'VV_12', 'VV_11', 'VV_10', 'VV_9',
    #  'VV_8', 'VV_7', 'VV_6', 'VV_5', 'VV_4', 'VV_3', 'VV_2', 'VV_1', 'VV_0']]
    
    with open('/data/ksa/03_Sampling/data-wide/32/sampling_VH_'+mgrs+'.pkl', 'wb') as f:
        pickle.dump(df_sampling, f)

    # with open('/data/ksa/03_Sampling/data-wide/32/sampling_VV_'+mgrs+'.pkl', 'wb') as f:
    #     pickle.dump(df_sampling, f)

    # break


100%|██████████| 15/15 [00:40<00:00,  2.73s/it]


In [15]:
df_sampling.sort_values("periode").query("idpoint == '321318003A1#01'")

Unnamed: 0,idpoint,idsubsegment,idsegment,nth,periode,observation,MGRS,VH_30,VH_29,VH_28,...,VH_9,VH_8,VH_7,VH_6,VH_5,VH_4,VH_3,VH_2,VH_1,VH_0
0,321318003A1#01,321318003A1,321318003,1,2022_03,1.0,48MYU,,-24.251793,,...,-17.441307,-16.251757,,0.0,,-18.333231,-23.893051,-25.192253,-26.484011,
0,321318003A1#01,321318003A1,321318003,2,2022_05,2.0,48MYU,,-19.041586,-18.839508,...,,0.0,,-18.333231,-23.893051,-25.192253,-26.484011,,-21.956528,
0,321318003A1#01,321318003A1,321318003,3,2022_08,3.0,48MYU,,-17.318237,-16.91514,...,-18.333231,-23.893051,-25.192253,-26.484011,,-21.956528,,-19.817095,-16.310555,
0,321318003A1#01,321318003A1,321318003,4,2022_10,1.0,48MYU,-16.91514,-18.11145,-14.180796,...,-25.192253,-26.484011,,-21.956528,,-19.817095,-16.310555,,-15.92127,-15.612541
0,321318003A1#01,321318003A1,321318003,5,2022_13,0.0,48MYU,,-15.092618,-15.71151,...,-21.956528,,-19.817095,-16.310555,,-15.92127,-15.612541,-15.025402,-15.612293,
0,321318003A1#01,321318003A1,321318003,6,2022_15,0.0,48MYU,-15.71151,,-23.729786,...,-19.817095,-16.310555,,-15.92127,-15.612541,-15.025402,-15.612293,,-15.840221,-14.40493
0,321318003A1#01,321318003A1,321318003,7,2022_18,0.0,48MYU,,-25.770788,-19.588863,...,-15.92127,-15.612541,-15.025402,-15.612293,,-15.840221,-14.40493,,-20.884041,
0,321318003A1#01,321318003A1,321318003,8,2022_20,1.0,48MYU,-19.588863,,-18.511692,...,-15.025402,-15.612293,,-15.840221,-14.40493,,-20.884041,,-18.852215,-22.667566
0,321318003A1#01,321318003A1,321318003,9,2022_23,2.0,48MYU,,-17.441307,-16.251757,...,-15.840221,-14.40493,,-20.884041,,-18.852215,-22.667566,,-21.931585,
0,321318003A1#01,321318003A1,321318003,10,2022_25,3.0,48MYU,-16.251757,,0.0,...,,-20.884041,,-18.852215,-22.667566,,-21.931585,,-18.62479,-20.095194
