In [14]:
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import numpy as np
import pandas as pd
import seaborn as sns
import datetime as dt
from pathlib import Path
import warnings
import os
import random
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from tqdm.notebook import tqdm
from sklearn.preprocessing import StandardScaler
from collections import deque

warnings.simplefilter('ignore')

In [15]:
def fix_all_seeds(seed):
    np.random.seed(seed)
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    
fix_all_seeds(0)

In [16]:
SAVE_DF = True
SAVE_DF_DIR = Path("/content/drive/MyDrive/Kaggle/BlueCarbon/proc/FE_20230417")

# データ読み込み

In [17]:
train = pd.read_pickle("/content/drive/MyDrive/Kaggle/BlueCarbon/proc/train.pkl")
test = pd.read_pickle("/content/drive/MyDrive/Kaggle/BlueCarbon/proc/test.pkl")

# 特徴抽出

## minmaxをとる

In [18]:
# Sentinelの列名抽出
cols_sentinel = train.columns.to_list()[88:313]
indicators_sentinel = list(set([col[4:] for col in cols_sentinel]))
print(len(indicators_sentinel))
indicators_sentinel[:5]

76


['DVI', 'MCARI_MTVI2', 'Chlred_edge', 'CLD', 'SR672_708']

In [19]:
df_train_sentinel = pd.DataFrame()
df_test_sentinel = pd.DataFrame()
for indicator in indicators_sentinel:
    if indicator in ["CI", "CVI"]:
        continue
    df_train_sentinel[f"MINMAXDiff_{indicator}"] = train[f"MAX_{indicator}"] - train[f"MIN_{indicator}"]
    df_test_sentinel[f"MINMAXDiff_{indicator}"] = test[f"MAX_{indicator}"] - test[f"MIN_{indicator}"]

df_train_sentinel

Unnamed: 0,MINMAXDiff_DVI,MINMAXDiff_MCARI_MTVI2,MINMAXDiff_Chlred_edge,MINMAXDiff_CLD,MINMAXDiff_SR672_708,MINMAXDiff_B02,MINMAXDiff_AFRI1600,MINMAXDiff_MVI,MINMAXDiff_BRI,MINMAXDiff_IPVI,...,MINMAXDiff_TCI,MINMAXDiff_B06,MINMAXDiff_Datt4,MINMAXDiff_CARI,MINMAXDiff_GDVI,MINMAXDiff_BGI,MINMAXDiff_B03,MINMAXDiff_Bcc,MINMAXDiff_D678_500,MINMAXDiff_ARI
0,0.995833,0.208005,0.436992,0.000000,0.380258,533.236877,0.461905,6.660156,1.088456e+06,0.174237,...,27.275394,244.267426,11.912070,0.054051,0.017210,0.756266,304.929688,0.105740,0.032287,283.902863
1,2.924281,14.366846,1.202944,25.433641,0.411245,1016.696594,0.391054,2.606226,5.220135e+01,0.174424,...,89.906555,2225.026367,2.477750,0.394545,0.247894,0.350933,984.856567,0.088807,0.039344,5.884838
2,0.572618,2.244556,0.377514,26.501945,0.276887,868.334534,0.176764,3.446855,1.865566e+01,0.061699,...,71.606461,361.118286,0.968976,0.052253,0.079640,0.184313,827.519531,0.034656,0.027067,2.384067
3,0.436445,0.275386,0.665889,6.377331,0.216168,528.349915,0.089749,0.634893,4.663439e+02,0.108289,...,69.952789,849.447388,4.312903,0.169855,0.049780,0.360510,565.498718,0.080655,0.038762,13.167384
4,0.291842,0.100062,0.206414,2.605357,0.095370,775.733032,0.130834,1.162069,2.424203e+03,0.113621,...,63.780663,753.723572,4.944896,0.149159,0.038268,0.164914,650.589172,0.111413,0.044168,34.427551
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14135,0.640972,0.182385,0.702199,0.000000,0.187708,296.149811,0.232173,2.489504,1.968195e+03,0.092319,...,26.248377,445.368469,18.069717,0.063466,0.020889,0.337220,272.639343,0.117505,0.024062,21.982435
14136,0.585659,0.235923,1.171963,16.990030,0.235718,861.009277,0.176663,2.887172,9.054682e+02,0.131170,...,94.408478,1063.351196,3.507872,0.204909,0.066681,0.459513,771.872192,0.069012,0.058365,14.142537
14137,0.832658,0.202077,52.571980,0.000000,0.305635,568.970032,0.614038,4.024347,1.795459e+06,0.131678,...,41.373905,347.541565,8.966651,0.107973,0.014246,0.392797,427.520081,0.097816,0.033676,175.207016
14138,0.828902,1.633218,2.496309,39.620594,0.346685,748.606934,0.376230,3.518513,3.267029e+02,0.171678,...,57.476471,1610.120361,1.143905,0.255572,0.200968,0.146080,536.903076,0.053124,0.099156,4.521282


In [20]:
if SAVE_DF:
    df_train_sentinel.to_pickle(SAVE_DF_DIR / "20230428_train_sentinel_minmaxDiff.pkl")
    df_test_sentinel.to_pickle(SAVE_DF_DIR / "20230428_test_sentinel_minmaxDiff.pkl")