In [67]:
import sys
print(sys.executable)

/opt/miniconda/envs/ksa/bin/python


In [68]:
import pandas as pd
import numpy as np
import traceback

from esa_snappy import ProductIO
from esa_snappy import GeoPos
from esa_snappy import PixelPos

from glob import glob
from tqdm import tqdm
import os
import pickle

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
import pandas as pd
from tqdm import tqdm

# Suppress specific warnings
warnings.filterwarnings("ignore", category=FutureWarning, module='pandas', lineno=11)
warnings.filterwarnings("ignore", category=pd.errors.SettingWithCopyWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=pd.errors.SettingWithCopyWarning)

In [69]:
data = 'imputed'
idprov = '32'

In [70]:
## Get ID and Year 
years = range(2021, 2024)
numbers = range(1, 31)
year_id_ = []
for year in years:
    for number in numbers:
        year_id_.append(f"{year}_{str(number).zfill(2)}")
year_id_[:5]

vh_list = [f"VH_{i}" for i in range(30, -1, -1)]
print(vh_list)

vv_list = [f"VV_{i}" for i in range(30, -1, -1)]
print(vv_list)


['VH_30', 'VH_29', 'VH_28', 'VH_27', 'VH_26', 'VH_25', 'VH_24', 'VH_23', 'VH_22', 'VH_21', 'VH_20', 'VH_19', 'VH_18', 'VH_17', 'VH_16', 'VH_15', 'VH_14', 'VH_13', 'VH_12', 'VH_11', 'VH_10', 'VH_9', 'VH_8', 'VH_7', 'VH_6', 'VH_5', 'VH_4', 'VH_3', 'VH_2', 'VH_1', 'VH_0']
['VV_30', 'VV_29', 'VV_28', 'VV_27', 'VV_26', 'VV_25', 'VV_24', 'VV_23', 'VV_22', 'VV_21', 'VV_20', 'VV_19', 'VV_18', 'VV_17', 'VV_16', 'VV_15', 'VV_14', 'VV_13', 'VV_12', 'VV_11', 'VV_10', 'VV_9', 'VV_8', 'VV_7', 'VV_6', 'VV_5', 'VV_4', 'VV_3', 'VV_2', 'VV_1', 'VV_0']


In [71]:
## Prep DF
df_all_wide = pd.DataFrame(columns=["idpoint"]+year_id_)
# df_all_wide

df_bridging_citra = pd.read_excel("/data/ksa/03_Sampling/bridging.xlsx", dtype='object', sheet_name="periode_to_date")
# df_bridging_citra.head(2)

df_bridging_ksa = pd.read_excel("/data/ksa/03_Sampling/bridging.xlsx", dtype='object')
# df_bridging_ksa.head(2)

In [72]:
def get_df_values(data, idprov, mgrs):
    if(data == 'not imputed'):
        with open('/data/ksa/03_Sampling/data/'+idprov+'/sampling_'+mgrs+'.pkl', 'rb') as f:
            df_values = pickle.load(f)    
        df_values["VH"] = df_values.Sigma0_VH_db
        df_values["VV"] = df_values.Sigma0_VV_db
        df_values.drop(["Sigma0_VH_db","Sigma0_VV_db"],axis=True, inplace=True)
    elif(data == 'imputed'):
        with open('/data/ksa/04_Data_Preprocessing/'+idprov+'/01_imputation/'+mgrs+'_imputed_data.pkl', 'rb') as f:
            df_values = pickle.load(f) 
        # print(df_values.columns)
        df_values["VH"] = df_values.Sigma0_VH_db_imputation
        df_values["VV"] = df_values.Sigma0_VV_db_imputation
        df_values.drop(["Sigma0_VH_db_imputation","Sigma0_VV_db_imputation"],axis=True, inplace=True)
    return df_values

def reformat_to_wide(df_values, band, df_bridging_citra, df_all_wide):
    
    df_values["periode_start"] = df_values.periode.str[4:8]
    df_values["periode_end"] = df_values.periode.str[-4:]
    df_values["is_kabisat"] = 0
    
    df_values = df_values.merge(df_bridging_citra, left_on=['periode_start','periode_end','is_kabisat'],  right_on=['periode_start','periode_end','is_kabisat'])
    df_values['year_id_per_image'] = df_values.periode.str[:4]+"_"+df_values.id_per_image.astype("str").str.zfill(2)
    
    df_VH_wide_res = df_values.sort_values('year_id_per_image').pivot(index='idpoint', columns='year_id_per_image', values=band).reset_index()
    df_VH_wide_res = pd.concat([df_all_wide, df_VH_wide_res], axis=0)
    df_VH_wide_res['idsubsegmen'] = df_VH_wide_res.idpoint.str[:-3]
    return df_VH_wide_res

In [73]:
df_label = pd.read_csv("/data/raw/processed/relabelled_data_ksa.csv")

df_label = df_label.merge(df_bridging_ksa.query("is_kabisat == 0"), how='left', left_on='bulan', right_on='obs_in_a_year')
df_label['year_id_per_image'] = "20"+df_label.tahun.astype("str")+"_"+df_label.id_per_image.astype("str").str.zfill(2)
df_label.head()

Unnamed: 0,idsegmen,idsubsegmen,tahun,bulan,obs,nth,id_x,class,is_kabisat,obs_in_a_year,id_per_image,periode_start,periode_end,year_id_per_image
0,110101001,A1,22,1,8.0,0,110101001A1,NV,0,1,3,125,205,2022_03
1,110101001,A2,22,1,4.0,0,110101001A2,H,0,1,3,125,205,2022_03
2,110101001,A3,22,1,4.0,0,110101001A3,H,0,1,3,125,205,2022_03
3,110101001,B1,22,1,8.0,0,110101001B1,NV,0,1,3,125,205,2022_03
4,110101001,B2,22,1,4.0,0,110101001B2,H,0,1,3,125,205,2022_03


In [74]:
year_id_per_image_ = df_label.year_id_per_image.unique()
year_id_per_image_


array(['2022_03', '2022_05', '2022_08', '2022_10', '2022_13', '2022_15',
       '2022_18', '2022_20', '2022_23', '2022_25', '2022_28', '2022_30',
       '2023_03', '2023_05', '2023_08', '2023_10', '2023_13', '2023_15',
       '2023_18', '2023_20', '2023_23', '2023_25', '2023_28', '2023_30'],
      dtype=object)

In [75]:
mgrs_ = ['48MXT', '48MYT', '48MXU', '48MXS', '48MYS', '48MZT', '48MZS',
       '49MAN', '49MAM', '49MBM', '49MBN', '49MAP', '49MBP', '48MZU',
       '48MYU']

In [76]:
for mgrs in tqdm(mgrs_):
    df_values = get_df_values(data, idprov, mgrs)
    # df_VH_wide = reformat_to_wide(df_values, "VH", df_bridging_citra, df_all_wide)
    df_VV_wide = reformat_to_wide(df_values, "VV", df_bridging_citra, df_all_wide)

    # df_full = df_VH_wide[['idpoint','idsubsegmen']].copy()
    df_full = df_VV_wide[['idpoint','idsubsegmen']].copy()
    df_full = df_full.merge(df_label[['id_x','tahun', 'bulan', 'obs', 'class', 'year_id_per_image']], how="left", left_on = "idsubsegmen", right_on="id_x")

    df_wide_full = pd.DataFrame()
    for yi in year_id_per_image_:
        df_tmp = df_full.loc[df_full.year_id_per_image == yi]
        df_tmp.loc[:,'MGRS'] = mgrs
        # ind = df_VH_wide.columns.to_list().index(yi)+1
        ind = df_VV_wide.columns.to_list().index(yi)+1
        # df_wide_tmp = pd.concat([df_VH_wide.iloc[:,0:1], df_VH_wide.iloc[:,ind-31:ind]], axis=1)
        df_wide_tmp = pd.concat([df_VV_wide.iloc[:,0:1], df_VV_wide.iloc[:,ind-31:ind]], axis=1)
        df_wide_res = df_tmp.merge(df_wide_tmp, how='left', left_on='idpoint', right_on='idpoint')
        # df_wide_res.columns.values[-31:] = vh_list
        df_wide_res.columns.values[-31:] = vv_list
        # df_wide_res.drop('id_x', axis=1, inplace=True)
        df_wide_full = pd.concat([df_wide_full,df_wide_res], axis=0) 
        # break
    # break
    if(data == 'not imputed'):
        with open('/data/ksa/03_Sampling/data-wide/32/wide_data_'+mgrs+'.pkl', 'wb') as f:
            pickle.dump(df_wide_full, f)
    elif(data == 'imputed'):
        with open('/data/ksa/04_Data_Preprocessing/32/01_imputation/wide_data/wide_data_'+mgrs+'.pkl', 'wb') as f:
            pickle.dump(df_wide_full, f)
    # df_wide_full
    # break

100%|██████████| 15/15 [09:57<00:00, 39.83s/it]


In [77]:
recode_dict = {
    'V1': '1.0',
    'V2': '2.0',
    'G': '3.0',
    'H': '4.0',
    'PL': '5.0',
    'P': '99.0',
    'NP': '6.0',
    'NV': '7.0',
    'BL': '0.0'
}

mgrs_ = ['48MXT', '48MYT', '48MXU', '48MXS', '48MYS', '48MZT', '48MZS',
       '49MAN', '49MAM', '49MBM', '49MBN', '49MAP', '49MBP', '48MZU',
       '48MYU']
mgrs


'48MYU'

In [78]:
for mgrs in tqdm(mgrs_):
    if(data == 'not imputed'):
        with open('/data/ksa/03_Sampling/data-wide/32/wide_data_'+mgrs+'.pkl', 'rb') as f:
            df_sampling = pickle.load(f)
    elif(data == 'imputed'):
        with open('/data/ksa/04_Data_Preprocessing/32/01_imputation/wide_data/wide_data_'+mgrs+'.pkl', 'rb') as f:
            df_sampling = pickle.load(f)

    
    df_sampling['observation'] = df_sampling['class'].replace(recode_dict)
    df_sampling = df_sampling.loc[df_sampling.observation != '99.0'] 
    df_sampling['idsegment'] = df_sampling['idsubsegmen'].str[:-2]
    df_sampling.rename(columns={'idsubsegmen': 'idsubsegment'}, inplace=True)
    df_sampling.rename(columns={'bulan': 'nth'}, inplace=True)
    df_sampling.rename(columns={'year_id_per_image': 'periode'}, inplace=True)
    
    # df_sampling = df_sampling[['idpoint','idsubsegment','idsegment','nth','periode',
    #  'observation','class', 'MGRS', 'VH_30', 'VH_29', 'VH_28', 'VH_27', 'VH_26', 'VH_25',
    #  'VH_24', 'VH_23', 'VH_22', 'VH_21', 'VH_20', 'VH_19', 'VH_18', 'VH_17',
    #  'VH_16', 'VH_15', 'VH_14', 'VH_13', 'VH_12', 'VH_11', 'VH_10', 'VH_9',
    #  'VH_8', 'VH_7', 'VH_6', 'VH_5', 'VH_4', 'VH_3', 'VH_2', 'VH_1', 'VH_0']]

    df_sampling = df_sampling[['idpoint','idsubsegment','idsegment','nth','periode',
     'observation', 'class','MGRS', 'VV_30', 'VV_29', 'VV_28', 'VV_27', 'VV_26', 'VV_25',
     'VV_24', 'VV_23', 'VV_22', 'VV_21', 'VV_20', 'VV_19', 'VV_18', 'VV_17',
     'VV_16', 'VV_15', 'VV_14', 'VV_13', 'VV_12', 'VV_11', 'VV_10', 'VV_9',
     'VV_8', 'VV_7', 'VV_6', 'VV_5', 'VV_4', 'VV_3', 'VV_2', 'VV_1', 'VV_0']]
    
    # if(data == 'not imputed'):
    #     with open('/data/ksa/03_Sampling/data-wide/32/sampling_VH_'+mgrs+'.pkl', 'wb') as f:
    #             pickle.dump(df_sampling, f)
    # elif(data == 'imputed'):
    #     with open('/data/ksa/04_Data_Preprocessing/32/wide_data/sampling_VH_'+mgrs+'.pkl', 'wb') as f:
    #         pickle.dump(df_sampling, f)

    if(data == 'not imputed'):
        with open('/data/ksa/03_Sampling/data-wide/32/sampling_VV_'+mgrs+'.pkl', 'wb') as f:
                pickle.dump(df_sampling, f)
    elif(data == 'imputed'):
        with open('/data/ksa/04_Data_Preprocessing/32/01_imputation/wide_data/sampling_VV_'+mgrs+'.pkl', 'wb') as f:
            pickle.dump(df_sampling, f)
            
    
    # break


100%|██████████| 15/15 [00:44<00:00,  2.95s/it]


In [79]:
df_sampling.groupby(['observation','class']).size()

observation  class
0.0          BL       170500
1.0          V1       170750
2.0          V2       127450
3.0          G        171500
4.0          H        152200
5.0          PL       132925
6.0          NP        12700
7.0          NV        28825
dtype: int64

In [81]:
idprov='32'
mgrs='48MYU'
with open('/data/ksa/04_Data_Preprocessing/'+idprov+'/01_imputation/'+mgrs+'_imputed_data.pkl', 'rb') as f:
    df_values = pickle.load(f) 
df_values.loc[(df_values.idpoint == '321318003A1#01') & (df_values.periode == '20221215_20221226'),:]

Unnamed: 0,periode,idpoint,MGRS,weight,Sigma0_VH_db_imputation,Sigma0_VV_db_imputation
714479,20221215_20221226,321318003A1#01,48MYU,0,-19.935861,-12.366875


In [None]:
df_sampling.sort_values("periode").query("idpoint == '321318003A1#01'")

In [22]:
# df_sampling.sort_values("periode").query("idpoint == '321318003A1#01'")
# df_values.periode.unique()

In [None]:
# with open('/data/ksa/03_Sampling/data-wide/32/sampling_VH_'+mgrs+'.pkl', 'rb') as f:
#     df_VH = pickle.load(f)
# df_VH.head(2)

In [None]:
# with open('/data/ksa/03_Sampling/data-wide/32/sampling_VV_'+mgrs+'.pkl', 'rb') as f:
#     df_VV = pickle.load(f)
# df_VV.head(2)

In [None]:
# df_label.loc[df_label.id_x == '327503106A2']

In [None]:
# df_VH.loc[df_VH.observation.isna()]