## Area of Interest - South Africa

In [None]:
# ensure you have the required python packages
import sys
! pip install -q -r requirements.txt

[K     |████████████████████████████████| 962 kB 5.2 MB/s 
[K     |████████████████████████████████| 18.3 MB 53.9 MB/s 
[K     |████████████████████████████████| 196 kB 82.5 MB/s 
[K     |████████████████████████████████| 40 kB 5.8 MB/s 
[K     |████████████████████████████████| 67.4 MB 158 kB/s 
[K     |████████████████████████████████| 10.5 MB 26.5 MB/s 
[K     |████████████████████████████████| 69 kB 7.9 MB/s 
[K     |████████████████████████████████| 6.3 MB 73.5 MB/s 
[K     |████████████████████████████████| 15.4 MB 64.4 MB/s 
[K     |████████████████████████████████| 131 kB 64.9 MB/s 
[K     |████████████████████████████████| 8.5 MB 63.0 MB/s 
[K     |████████████████████████████████| 128 kB 90.3 MB/s 
[K     |████████████████████████████████| 132 kB 73.7 MB/s 
[K     |████████████████████████████████| 63 kB 2.0 MB/s 
[K     |████████████████████████████████| 133 kB 74.1 MB/s 
[K     |████████████████████████████████| 53.9 MB 1.5 MB/s 
[K     |███████████████████

In [None]:
# Jupyter notebook related
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
# Built-in modules
import os
import time
import random
import glob
import json
from typing import Tuple, List
from datetime import datetime, timedelta
import pickle
import shutil
from pathlib import Path
from scipy.stats import gmean
import warnings
warnings.filterwarnings('ignore')

# Basics of Python data handling and visualization
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from matplotlib.colors import ListedColormap
from tqdm.auto import tqdm

# Utils
from utils import unzipper
from utils.utils_meoteq import S1Extractor, S2Extractor, PlanetExtractor
from utils.train_valid import train_val

# Machine learning
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedShuffleSplit,StratifiedKFold, KFold
from boostaroota import BoostARoota
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from xgboost import XGBClassifier

#  Deep learning
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import breizhcrops as bzh
import copy
from torch.utils.data import Subset
from torch import optim
from torch.optim.lr_scheduler import ReduceLROnPlateau


In [None]:
def seed_setter(seed_value):
    # Setting SEED to Reproduce Same Results 
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True

seed_setter(2021)

## Paths

In [None]:
output_path = Path('./SA')

# Data Path - where to save downloaded data (Sentinel-2, Sentinel-1, Planet 5 days)
data_path = output_path/'data'
data_path.mkdir(exist_ok=True, parents=True)

## Data Frames Paths - created dataframes will be saved here
df_path = output_path/'data_frames'
df_path.mkdir(exist_ok=True, parents=True)

## Download the data
You do not need to run this section if you already downloaded the data - but make sure to modify the data path above to where you saved the data.

In [None]:
#CREATE THE FOLDER FOR THE DATA TO BE DOWNLOADED AND SET DOWNLOAD CREDENTIALS
from radiant_mlhub import Dataset
from radiant_mlhub import Collection

os.environ['MLHUB_API_KEY'] = 'ac55f7d60f86044b9d6229b038f1352e75026b57cec007a23cbc9f3a702716b5'


In [None]:
ds = Dataset.fetch('ref_fusion_competition_south_africa')
for c in ds.collections:
  if c.id != 'ref_fusion_competition_south_africa_train_source_planet' and c.id !='ref_fusion_competition_south_africa_test_source_planet':
    #if c.id != 'ref_fusion_competition_south_africa_train_source_planet_5day' and c.id != 'ref_fusion_competition_south_africa_test_source_planet_5day':
    print(f'Downloading {c.id} ...')
    coll = Collection.fetch(c.id)
    coll.download(data_path)
    time.sleep(60)   

Downloading ref_fusion_competition_south_africa_train_source_planet_5day ...
Downloading ref_fusion_competition_south_africa_train_source_sentinel_1 ...
Downloading ref_fusion_competition_south_africa_train_source_sentinel_2 ...
Downloading ref_fusion_competition_south_africa_test_source_planet_5day ...
Downloading ref_fusion_competition_south_africa_test_source_sentinel_1 ...
Downloading ref_fusion_competition_south_africa_test_source_sentinel_2 ...
Downloading ref_fusion_competition_south_africa_train_labels ...
Downloading ref_fusion_competition_south_africa_test_labels ...


In [None]:
# Unzip the dwnloaded files
zip_files = glob.glob(f'{data_path}/*.gz')
unzipper(zip_files)

INFO: Unzipping drive/MyDrive/SA/data/ref_fusion_competition_south_africa_train_source_sentinel_1.tar.gz to drive/MyDrive/SA/data
INFO: Unzipping drive/MyDrive/SA/data/ref_fusion_competition_south_africa_train_source_sentinel_2.tar.gz to drive/MyDrive/SA/data
INFO: Unzipping drive/MyDrive/SA/data/ref_fusion_competition_south_africa_test_source_sentinel_1.tar.gz to drive/MyDrive/SA/data
INFO: Unzipping drive/MyDrive/SA/data/ref_fusion_competition_south_africa_test_source_sentinel_2.tar.gz to drive/MyDrive/SA/data
INFO: Unzipping drive/MyDrive/SA/data/ref_fusion_competition_south_africa_train_labels.tar.gz to drive/MyDrive/SA/data
INFO: Unzipping drive/MyDrive/SA/data/ref_fusion_competition_south_africa_test_labels.tar.gz to drive/MyDrive/SA/data
INFO: Unzipping drive/MyDrive/SA/data/ref_fusion_competition_south_africa_train_source_planet_5day.tar.gz to drive/MyDrive/SA/data
INFO: Unzipping drive/MyDrive/SA/data/ref_fusion_competition_south_africa_test_source_planet_5day.tar.gz to drive/

## Extract the fields 
Extract the fields from each data source (Sentinel-1, Sentinel-2, Planet 5 days) as numpy and store them in folders

#### Labels

In [None]:
# Train labels - file names
tr_labels_dir = 'ref_fusion_competition_south_africa_train_labels'
tr_labels_tile1 = 'ref_fusion_competition_south_africa_train_labels_34S_19E_258N'
tr_labels_tile2 = 'ref_fusion_competition_south_africa_train_labels_34S_19E_259N'


# Directory of train labels
tr_labels1 = gpd.read_file(f'{data_path}/{tr_labels_dir}/{tr_labels_tile1}/labels.geojson')
tr_labels2 = gpd.read_file(f'{data_path}/{tr_labels_dir}/{tr_labels_tile2}/labels.geojson')
tr_labels = pd.concat([tr_labels1, tr_labels2],ignore_index=True)


# Test labels - file names
te_labels_dir = 'ref_fusion_competition_south_africa_test_labels'
te_labels_tile = 'ref_fusion_competition_south_africa_test_labels_34S_20E_259N'

# Directory of test labels
te_labels = gpd.read_file(f'{data_path}/{te_labels_dir}/{te_labels_tile}/labels.geojson')

In [None]:
# Check the number of fields
print('Number of training fields: ', tr_labels.shape[0])
print('Number of testing fields: ', te_labels.shape[0])

Number of training fields:  4151
Number of testing fields:  2417


In [None]:
tr_labels['crop_id'].unique()

array([4, 2, 3, 1, 5])

#### Sentinel-1

In [None]:
# Path to save extracted fields from Sentinel-1 data - location of exported fields images
npy_dir = [f'{output_path}/train/s1', f'{output_path}/test/s1']
for directory in npy_dir:
  if not os.path.isdir(directory):
      os.makedirs(directory)
      if directory == f'{output_path}/train/s1':
        for c in range(5):
          os.mkdir(f"{directory}/{c}")

#Files names OF SENTINEL-1 TRAINING DATA:
train_s1_folder = 'ref_fusion_competition_south_africa_train_source_sentinel_1'
train_s1_tile1 = 'ref_fusion_competition_south_africa_train_source_sentinel_1_34S_19E_258N_asc_34S_19E_258N_2017'
train_s1_tile2 = 'ref_fusion_competition_south_africa_train_source_sentinel_1_34S_19E_259N_asc_34S_19E_259N_2017'

#Files names OF SENTINEL-2 TESTING DATA:
test_s1_folder = 'ref_fusion_competition_south_africa_test_source_sentinel_1'
test_s1_tile1 = 'ref_fusion_competition_south_africa_test_source_sentinel_1_asc_34S_20E_259N_2017'

In [None]:
# Extract Train data
S1Extractor(rootpath=f'{data_path}/{train_s1_folder}/{train_s1_tile1}/',              # Path of train sentinel-1
            label_dir=f'{data_path}/{tr_labels_dir}/{tr_labels_tile1}/labels.geojson', # Path of train labels
            npyfolder= output_path,                                                   # Where to save the extracted fields
            data_type='train')                                                   

S1Extractor(rootpath=f'{data_path}/{train_s1_folder}/{train_s1_tile2}/',              # Path of train sentinel-1
            label_dir=f'{data_path}/{tr_labels_dir}/{tr_labels_tile2}/labels.geojson', # Path of train labels
            npyfolder= output_path,                                                   # Where to save the extracted fields
            data_type='train')
   

# Extract Test data
S1Extractor(rootpath=f'{data_path}/{test_s1_folder}/{test_s1_tile1}/',               # Path of test sentinel-1
            label_dir=f'{data_path}/{te_labels_dir}/{te_labels_tile}/labels.geojson',# Path of test labels
            npyfolder= output_path,                                                  # Where to save the extracted fields
            data_type='test')

INFO: Extracting Sentinel-1 time series: 100%|██████████| 1715/1715 [00:32<00:00, 53.56it/s]
INFO: Extracting Sentinel-1 time series: 100%|██████████| 2436/2436 [00:40<00:00, 60.31it/s]
INFO: Extracting Sentinel-1 time series: 100%|██████████| 2417/2417 [00:36<00:00, 66.90it/s] 


In [None]:
# Check the number of extracted fields
s1_train = glob.glob(f'{output_path}/train/s1/*/*.npz')
print('Sentinel-1 train fields: ',len(s1_train))

s1_test = glob.glob(f'{output_path}/test/s1/*.npz')
print('Sentinel-1 test fields: ',len(s1_test))

Sentinel-1 train fields:  4150
Sentinel-1 test fields:  2417


#### Sentinel-2

In [None]:
# Path to save extracted fields from Sentinel-2 data - location of exported fields images
npy_dir = [f'{output_path}/train/s2', f'{output_path}/test/s2']
for directory in npy_dir:
  if not os.path.isdir(directory):
      os.makedirs(directory)
      if directory == f'{output_path}/train/s2':
        for c in range(5):
          os.mkdir(f"{directory}/{c}")


#Files names OF SENTINEL-2 TRAINING DATA:
s2_train_folder = 'ref_fusion_competition_south_africa_train_source_sentinel_2'
s2_train_tile1 = 'ref_fusion_competition_south_africa_train_source_sentinel_2_34S_19E_258N_34S_19E_258N_2017'
s2_train_tile2 = 'ref_fusion_competition_south_africa_train_source_sentinel_2_34S_19E_259N_34S_19E_259N_2017'


#Files names OF SENTINEL-2 TESTING DATA:
s2_test_folder = 'ref_fusion_competition_south_africa_test_source_sentinel_2'
s2_test_tile = 'ref_fusion_competition_south_africa_test_source_sentinel_2_34S_20E_259N_2017'

In [None]:
# Extract train Sentinel-2 data
S2Extractor(rootpath= f"{data_path}/{s2_train_folder}/{s2_train_tile1}/",             # Path of train sentinel-2
            label_dir=f'{data_path}/{tr_labels_dir}/{tr_labels_tile1}/labels.geojson',# Path of train labels
            npyfolder= output_path,                                                  # Where to save the extracted fields
            data_type='train')                                       

S2Extractor(rootpath= f"{data_path}/{s2_train_folder}/{s2_train_tile2}/",             # Path of train sentinel-2
            label_dir=f'{data_path}/{tr_labels_dir}/{tr_labels_tile2}/labels.geojson',# Path of train labels
            npyfolder= output_path,                                                  # Where to save the extracted fields
            data_type='train')   

# Extract Test Sentinel-2 data
S2Extractor(rootpath= f"{data_path}/{s2_test_folder}/{s2_test_tile}/",               # Path of test sentinel-2
            label_dir=f'{data_path}/{te_labels_dir}/{te_labels_tile}/labels.geojson',# Path of test labels
            npyfolder= output_path,                                                  # Where to save the extracted fields
            data_type='test')

INFO: Extracting Sentinel-1 time series: 100%|██████████| 1715/1715 [03:29<00:00,  8.20it/s]
INFO: Extracting Sentinel-1 time series: 100%|██████████| 2436/2436 [04:06<00:00,  9.89it/s]
INFO: Extracting Sentinel-1 time series: 100%|██████████| 2417/2417 [03:33<00:00, 11.34it/s]


In [None]:
# Check the number of extracted fields
s2_train = glob.glob(f'{output_path}/train/s2/*/*.npz')
print('Sentinel-2 train fields: ',len(s2_train))

s2_test = glob.glob(f'{output_path}/test/s2/*.npz')
print('Sentinel-2 test fields: ',len(s2_test))

Sentinel-2 train fields:  4150
Sentinel-2 test fields:  2417


#### Planet 5 days

In [None]:
# Path to save extracted fields from Sentinel-2 data - location of exported fields images
npy_dir = [f'{output_path}/train/planet_5day', f'{output_path}/test/planet_5day']
for directory in npy_dir:
  if not os.path.isdir(directory):
      os.makedirs(directory)
      if directory == f'{output_path}/train/planet_5day':
        for c in range(5):
          os.mkdir(f"{directory}/{c}")


#Files name of Planet-5days TRAINING DATA:
planet_5days_train_folder = 'ref_fusion_competition_south_africa_train_source_planet_5day'
planet_5day_train_tile1 = '34S_19E_258N'
planet_5day_train_tile2 = '34S_19E_259N'

#Files names of Planet-5days TESTING DATA:
planet_5days_test_folder = 'ref_fusion_competition_south_africa_test_source_planet_5day'
planet_5day_test_tile = '34S_20E_259N'

In [None]:
# Extract train planet_5days
PlanetExtractor(rootpath= f'{data_path}/{planet_5days_train_folder}',                 # Path of train planet 5days
            label_dir=f'{data_path}/{tr_labels_dir}/{tr_labels_tile1}/labels.geojson',# Path of train labels
            npyfolder= output_path,                                                   # Where to save the extracted fields
            tile=planet_5day_train_tile1,                                             # Train tile
            aoi='sa',                                                                 # Area of interest
            data_type='train',                                                        # Data type (train/test)
            planet='planet_5day')                                                     # Planet data (Planet for daily, planet_5day for 5days interval )


PlanetExtractor(rootpath= f'{data_path}/{planet_5days_train_folder}',                 # Path of train planet 5days
            label_dir=f'{data_path}/{tr_labels_dir}/{tr_labels_tile2}/labels.geojson',# Path of train labels
            npyfolder= output_path,                                                   # Where to save the extracted fields
            tile=planet_5day_train_tile2,                                             # Train tile
            aoi='sa',                                                                 # Area of interest
            data_type='train',                                                        # Data type (train/test)
            planet='planet_5day')                                                     # Planet data (Planet for daily, planet_5day for 5days interval )


# Extract test planet_5days
PlanetExtractor(rootpath= f'{data_path}/{planet_5days_test_folder}/',                    # Path of test planet 5days
                label_dir=f'{data_path}/{te_labels_dir}/{te_labels_tile}/labels.geojson',# Path of test labels
                npyfolder= output_path,                                                  # Where to save the extracted fields
                tile=planet_5day_test_tile,                                              # Test tile
                aoi='sa',                                                                # Area of interest
                data_type='test',                                                        # Data type (train/test)
                planet='planet_5day') 

INFO: Extracting Planet time series: 100%|██████████| 1715/1715 [44:37<00:00,  1.56s/it]
INFO: Extracting Planet time series: 100%|██████████| 2436/2436 [47:22<00:00,  1.17s/it]
INFO: Extracting Planet time series: 100%|██████████| 2417/2417 [53:56<00:00,  1.34s/it]


In [None]:
# Check the number of extracted fields
planet5days_train = glob.glob(f'{output_path}/train/planet_5day/*/*.npz')
print('planet5days train fields: ',len(planet5days_train))

planet5days_test = glob.glob(f'{output_path}/test/planet_5day/*.npz')
print('planet5days test fields: ',len(planet5days_test))

planet5days train fields:  4150
planet5days test fields:  2417


## Prepare the data for Tree models


In [None]:
# If you run the code for the first time Change "Preprocess" value to  True 
Preprocess = True

### Planet 5 days

In [None]:
# Prepare Planet 5 days train data
if Preprocess:
    planet5days_train = glob.glob(f'{output_path}/train/planet_5day/*/*.npz')
    planet5days_train = sorted(planet5days_train)

    traininig_data = None

    for field_path in tqdm(planet5days_train):
      object = np.load(field_path)
      features = object['image_stack']
      features=features.transpose(0, 2, 3, 1)
      label = int(field_path.split('/')[-2]) +1
      fid = int(field_path.split('/')[-1][:-4])
      mask = object["mask"]
      t, h, w, f = features.shape

      bands = []
      for i in range(t):
        t1 = features[i]
        t1 = t1.reshape(h*w, f)
        bands.append(t1)
      features = np.hstack(bands)
      mask = np.reshape(mask ,(w * h))

      # Remove pixels with no field_ids (0)
      mask_background = mask == 0
      features = features[~mask_background]
      
      if traininig_data is None:
        # group features by field id
        train_data = pd.DataFrame(features)
        train_data['field_id'] = fid
        train_data['label'] = label

        # group field's features based on mean value of pixels
        grouped_train_data = train_data.groupby('field_id').mean().reset_index()
        traininig_data = grouped_train_data
        
      else:
        # group features by field id
        train_data = pd.DataFrame(features)
        train_data['field_id'] = fid
        train_data['label'] = label

        # group field's features based on mean value of pixels
        grouped_train_data = train_data.groupby('field_id').mean().reset_index()
        traininig_data = pd.concat([traininig_data,grouped_train_data],ignore_index=True)
        

    traininig_data.to_csv(f"{df_path}/planet_5days_train_df.csv", index=False)

planet_5days_train_df = pd.read_csv(f"{df_path}/planet_5days_train_df.csv")
planet_5days_train_df.shape

  0%|          | 0/4150 [00:00<?, ?it/s]

(4150, 194)

In [None]:
# Prepare Planet 5 days test data 
if Preprocess:
  planet5days_test = glob.glob(f'{output_path}/test/planet_5day/*.npz')
  #planet5days_test = sorted(planet5days_test)

  testing_data = None

  for field_path in tqdm(planet5days_test):
    object = np.load(field_path)
    features = object['image_stack']
    features=features.transpose(0, 2, 3, 1)
    fid = int(field_path.split('/')[-1][:-4])
    mask = object["mask"]
    t, h, w, f = features.shape

    bands = []
    for i in range(t):
      t1 = features[i]
      t1 = t1.reshape(h*w, f)
      bands.append(t1)
    features = np.hstack(bands)
    mask = np.reshape(mask ,(w * h))

    # Remove pixels with no field_ids (0)
    mask_background = mask == 0
    features = features[~mask_background]

    if testing_data is None:
      # group features by field id
      test_data = pd.DataFrame(features)
      test_data['field_id'] = fid
      
      # group field's features based on mean value of pixels
      grouped_test_data = test_data.groupby('field_id').mean().reset_index()
      testing_data = grouped_test_data

    else:
      # group features by field id
      test_data = pd.DataFrame(features)
      test_data['field_id'] = fid

      # group field's features based on mean value of pixels
      grouped_test_data = test_data.groupby('field_id').mean().reset_index()
      testing_data = pd.concat([testing_data,grouped_test_data],ignore_index=True)
      
  testing_data.to_csv(f"{df_path}/planet_5days_test_df.csv", index=False)

planet_5days_test_df = pd.read_csv(f"{df_path}/planet_5days_test_df.csv") 
planet_5days_test_df.shape

  0%|          | 0/2417 [00:00<?, ?it/s]

(2417, 193)

### Sentinel-2

In [None]:
# Prepare Sentinel-2 train data
if Preprocess:
    s2_train = glob.glob(f'{output_path}/train/s2/*/*.npz')
    s2_train = sorted(s2_train)

    traininig_data = None

    for field_path in tqdm(s2_train):
      fid = int(field_path.split('/')[-1][:-4])
      object = np.load(field_path)
      features = object['image_stack']
      features=features.transpose(0, 2, 3, 1)
      label = int(field_path.split('/')[-2]) +1
      mask = object["mask"]
      t, h, w, f = features.shape

      bands = []
      for i in range(t):
        t1 = features[i]
        t1 = t1.reshape(h*w, f)
        bands.append(t1)
      features = np.hstack(bands)
      mask = np.reshape(mask ,(w * h))

      # Remove pixels with no field_ids (0)
      mask_background = mask == 0
      features = features[~mask_background]
      
      if traininig_data is None:
        # group features by field id
        train_data = pd.DataFrame(features)
        train_data['field_id'] = fid
        train_data['label'] = label

        # group field's features based on mean value of pixels
        grouped_train_data = train_data.groupby('field_id').mean().reset_index()
        traininig_data = grouped_train_data
        
      else:
        # group features by field id
        train_data = pd.DataFrame(features)
        train_data['field_id'] = fid
        train_data['label'] = label

        # group field's features based on mean value of pixels
        grouped_train_data = train_data.groupby('field_id').mean().reset_index()
        traininig_data = pd.concat([traininig_data,grouped_train_data],ignore_index=True)
        
    traininig_data.to_csv(f"{df_path}/s2_train_df.csv", index=False)

s2_train_df = pd.read_csv(f"{df_path}/s2_train_df.csv")
s2_train_df.shape

  0%|          | 0/4150 [00:00<?, ?it/s]

(4150, 914)

In [None]:
# Prepare Sentinel-2 days test data 
if Preprocess:
  s2_test =  glob.glob(f'{output_path}/test/s2/*.npz')
  #s2_test = sorted(s2_test)

  testing_data = None

  for field_path in tqdm(s2_test):
    object = np.load(field_path)
    features = object['image_stack']
    features=features.transpose(0, 2, 3, 1)
    fid = int(field_path.split('/')[-1][:-4])
    mask = object["mask"]
    t, h, w, f = features.shape

    bands = []
    for i in range(t):
      t1 = features[i]
      t1 = t1.reshape(h*w, f)
      bands.append(t1)
    features = np.hstack(bands)
    mask = np.reshape(mask ,(w * h))

    # Remove pixels with no field_ids (0)
    mask_background = mask == 0
    features = features[~mask_background]

    if testing_data is None:
      # group features by field id
      test_data = pd.DataFrame(features)
      test_data['field_id'] = fid
      
      # group field's features based on mean value of pixels
      grouped_test_data = test_data.groupby('field_id').mean().reset_index()
      testing_data = grouped_test_data

    else:
      # group features by field id
      test_data = pd.DataFrame(features)
      test_data['field_id'] = fid

      # group field's features based on mean value of pixels
      grouped_test_data = test_data.groupby('field_id').mean().reset_index()
      testing_data = pd.concat([testing_data,grouped_test_data],ignore_index=True)
      
  testing_data.to_csv(f"{df_path}/s2_test_df.csv", index=False)

s2_test_df = pd.read_csv(f"{df_path}/s2_test_df.csv") 
s2_test_df.shape

  0%|          | 0/2417 [00:00<?, ?it/s]

(2417, 913)

### Sentinel-1

In [None]:
# Prepare Sentinel-1 days train data
if Preprocess:
    s1_train = glob.glob(f'{output_path}/train/s1/*/*.npz')
    s1_train = sorted(s1_train)

    traininig_data = None

    for field_path in tqdm(s1_train):
      fid = int(field_path.split('/')[-1][:-4])
      #if fid in l2:
      #print(fid)
      object = np.load(field_path)
      features = object['image_stack']
      features=features.transpose(0, 2, 3, 1)
      label = int(field_path.split('/')[-2]) +1
      fid = int(field_path.split('/')[-1][:-4])
      mask = object["mask"]
      t, h, w, f = features.shape

      bands = []
      for i in range(t):
        t1 = features[i]
        t1 = t1.reshape(h*w, f)
        bands.append(t1)
      features = np.hstack(bands)
      mask = np.reshape(mask ,(w * h))

      # Remove pixels with no field_ids (0)
      mask_background = mask == 0
      features = features[~mask_background]
      
      if traininig_data is None:
        # group features by field id
        train_data = pd.DataFrame(features)
        train_data['field_id'] = fid
        train_data['label'] = label

        # group field's features based on mean value of pixels
        grouped_train_data = train_data.groupby('field_id').mean().reset_index()
        traininig_data = grouped_train_data
        
      else:
        # group features by field id
        train_data = pd.DataFrame(features)
        train_data['field_id'] = fid
        train_data['label'] = label

        # group field's features based on mean value of pixels
        grouped_train_data = train_data.groupby('field_id').mean().reset_index()
        traininig_data = pd.concat([traininig_data,grouped_train_data],ignore_index=True)
        

    traininig_data.to_csv(f"{df_path}/s1_train_df.csv", index=False)

s1_train_df = pd.read_csv(f"{df_path}/s1_train_df.csv")
s1_train_df.shape

  0%|          | 0/4150 [00:00<?, ?it/s]

(4150, 84)

In [None]:
# Prepare Sentinel-1 days test data 
if Preprocess:
  s1_test = glob.glob(f'{output_path}/test/s1/*.npz')

  testing_data = None

  for field_path in tqdm(s1_test):
    object = np.load(field_path)
    features = object['image_stack']
    features=features.transpose(0, 2, 3, 1)
    fid = int(field_path.split('/')[-1][:-4])
    mask = object["mask"]
    t, h, w, f = features.shape

    bands = []
    for i in range(t):
      t1 = features[i]
      t1 = t1.reshape(h*w, f)
      bands.append(t1)
    features = np.hstack(bands)
    mask = np.reshape(mask ,(w * h))

    # Remove pixels with no field_ids (0)
    mask_background = mask == 0
    features = features[~mask_background]

    if testing_data is None:
      # group features by field id
      test_data = pd.DataFrame(features)
      test_data['field_id'] = fid
      
      # group field's features based on mean value of pixels
      grouped_test_data = test_data.groupby('field_id').mean().reset_index()
      testing_data = grouped_test_data

    else:
      # group features by field id
      test_data = pd.DataFrame(features)
      test_data['field_id'] = fid

      # group field's features based on mean value of pixels
      grouped_test_data = test_data.groupby('field_id').mean().reset_index()
      testing_data = pd.concat([testing_data,grouped_test_data],ignore_index=True)
      
  testing_data.to_csv(f"{df_path}/s1_test_df.csv", index=False)

s1_test_df = pd.read_csv(f"{df_path}/s1_test_df.csv") 
s1_test_df.shape

  0%|          | 0/2417 [00:00<?, ?it/s]

(2417, 83)

## Preprcessing & Extract Features
Rename the columns "with the bands name" and compute more feaures (vegitation indices, RedEdge indices FLOWERING PHENOLOGY growing rate of the crops) 

In [None]:
# Rename the columns - interpolate the missing values
def rename_cols(df, source='s2'):
    if source == 's2':
      df = df.replace(0, np.nan)
      features = ['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07','B08', 'B8A', 'B09', 'B11', 'B12']
      time_step = 76 

    if source == 'planet':
      df = df.replace(0, np.nan)
      features = ['B01', 'B02', 'B03', 'NIR']
      time_step = 48
       
    elif source == 's1':
      features = ['VV', 'VH']
      time_step = 41

    for i in range(len(features)):
      ind = i
      cols = []
      for t in range(time_step):
        df = df.rename(columns={f'{ind}': f'{features[i]}_time_{t+1}'})
        cols.append(f'{features[i]}_time_{t+1}')
        ind+=len(features)
      df[cols] = df[cols].interpolate(axis=0)
    return df

In [None]:
# Rename Planet5days dataframe
planet_5days_train_df = rename_cols(planet_5days_train_df, source='planet')
planet_5days_test_df = rename_cols(planet_5days_test_df, source='planet')

# Rename Sentinel-2 dataframe
s2_train_df = rename_cols(s2_train_df, source='s2')
s2_test_df = rename_cols(s2_test_df, source='s2')

# Rename Sentinel-1 dataframe
s1_train_df = rename_cols(s1_train_df, source='s1')
s1_test_df = rename_cols(s1_test_df, source='s1')

### Compute more Features - Sentinel-2 Indices
#### Vegetation Indices

* ```NDSI: B03 /B11 ```
* ```NDMI: (B08 - B11) / (B08 + B11)```
* ```NDBI: (B11 - B08) / (B11 + B08)```
* ```NDCI: (B05 - B04) / (B05 + B04)```
* ```SAVI: (B01 - B02) / (B01 + B02 + 0.248) * (1+ 0.428)```
* ```BSI: (B11 - B04) / (B08 + B02)```
* ```NDVI_R: (B08 - B07) / (B08 + B07)```
* ```CHL: (B07 / B05) - 1```

In [None]:
def s2_veg_indices(df ,times, data_type='train'):
  veg_df = pd.DataFrame()
  for time in times:
    veg_df[f'NDSI_time_{time}'] =  df[f'B03_time_{time}']  / (df[f'B11_time_{time}'])
    veg_df[f'NDMI_time_{time}'] = (df[f'B08_time_{time}'] - df[f'B11_time_{time}'] )  / (df[f'B08_time_{time}'] +df[f'B11_time_{time}' ])
    veg_df[f'NDCI_time_{time}'] = (df[f'B05_time_{time}'] - df[f'B04_time_{time}'] )  / (df[f'B05_time_{time}'] +df[f'B04_time_{time}'])
    veg_df[f'NDBI_time_{time}'] = (df[f'B11_time_{time}'] - df[f'B08_time_{time}'] )  / (df[f'B11_time_{time}'] +df[f'B08_time_{time}'])
    veg_df[f'SAVI_time_{time}'] = (df[f'B01_time_{time}'] - df[f'B02_time_{time}'] )  / (df[f'B01_time_{time}'] +df[f'B02_time_{time}'] + 0.428 ) * (1.0 + 0.428)
    veg_df[f'BSI_time_{time}'] =  (df[f'B11_time_{time}'] - df[f'B04_time_{time}'] )  / (df[f'B08_time_{time}'] +df[f'B02_time_{time}'])
    veg_df[f'NDVI_R_time_{time}'] =  (df[f'B08_time_{time}'] - df[f'B07_time_{time}'] )  / (df[f'B08_time_{time}'] +df[f'B07_time_{time}'])
    veg_df[f'CHL_time_{time}'] =  (df[f'B07_time_{time}'] / (df[f'B05_time_{time}']))  - 1

  veg_df['field_id'] = list(df['field_id'])
  if data_type == 'train':
    veg_df['label'] = list(df['label'])
  return veg_df

In [None]:
train_veg_indices = s2_veg_indices(s2_train_df ,[i+1 for i in range(76)])
test_veg_indices = s2_veg_indices(s2_test_df ,[i+1 for i in range(76)], data_type='test')

train_veg_indices.head()

Unnamed: 0,NDSI_time_1,NDMI_time_1,NDCI_time_1,NDBI_time_1,SAVI_time_1,BSI_time_1,NDVI_R_time_1,CHL_time_1,NDSI_time_2,NDMI_time_2,NDCI_time_2,NDBI_time_2,SAVI_time_2,BSI_time_2,NDVI_R_time_2,CHL_time_2,NDSI_time_3,NDMI_time_3,NDCI_time_3,NDBI_time_3,SAVI_time_3,BSI_time_3,NDVI_R_time_3,CHL_time_3,NDSI_time_4,NDMI_time_4,NDCI_time_4,NDBI_time_4,SAVI_time_4,BSI_time_4,NDVI_R_time_4,CHL_time_4,NDSI_time_5,NDMI_time_5,NDCI_time_5,NDBI_time_5,SAVI_time_5,BSI_time_5,NDVI_R_time_5,CHL_time_5,...,NDCI_time_72,NDBI_time_72,SAVI_time_72,BSI_time_72,NDVI_R_time_72,CHL_time_72,NDSI_time_73,NDMI_time_73,NDCI_time_73,NDBI_time_73,SAVI_time_73,BSI_time_73,NDVI_R_time_73,CHL_time_73,NDSI_time_74,NDMI_time_74,NDCI_time_74,NDBI_time_74,SAVI_time_74,BSI_time_74,NDVI_R_time_74,CHL_time_74,NDSI_time_75,NDMI_time_75,NDCI_time_75,NDBI_time_75,SAVI_time_75,BSI_time_75,NDVI_R_time_75,CHL_time_75,NDSI_time_76,NDMI_time_76,NDCI_time_76,NDBI_time_76,SAVI_time_76,BSI_time_76,NDVI_R_time_76,CHL_time_76,field_id,label
0,0.471601,-0.169487,0.051702,0.169487,0.04192,0.384127,0.014228,0.077877,0.292888,-0.313185,0.072897,0.313185,-0.075105,0.806204,-0.000167,0.099004,0.288606,-0.312683,0.066052,0.312683,-0.103579,0.819444,0.012001,0.093128,0.282255,-0.314035,0.065015,0.314035,-0.132646,0.843427,0.018179,0.096841,0.417817,-0.239441,0.041842,0.239441,-0.02532,0.508832,0.021929,0.060154,...,0.058366,0.174939,-0.320972,0.456829,0.02436,0.08262,0.429968,-0.155111,0.04546,0.155111,-0.224025,0.394721,0.039875,0.087252,0.413703,-0.171074,0.056586,0.171074,-0.150809,0.432614,0.02477,0.079001,0.357852,-0.205915,0.051663,0.205915,-0.214498,0.543641,0.031828,0.094591,0.351615,-0.219897,0.057185,0.219897,-0.296493,0.569098,0.03191,0.089859,185161,1
1,0.441627,-0.166995,0.047602,0.166995,-0.096066,0.398501,0.022313,0.072718,0.302835,-0.285373,0.072432,0.285373,-0.093025,0.731904,-0.003949,0.088816,0.308996,-0.274062,0.063557,0.274062,-0.126684,0.704461,0.006284,0.079127,0.301852,-0.274647,0.062103,0.274647,-0.169996,0.727714,0.015162,0.081222,0.390542,-0.22993,0.048051,0.22993,-0.037204,0.526393,0.01872,0.069198,...,0.062889,0.223158,-0.308271,0.601162,0.025155,0.098919,0.445532,-0.144772,0.043923,0.144772,-0.169455,0.377075,0.050384,0.077234,0.423888,-0.170778,0.055544,0.170778,-0.021297,0.417099,0.014799,0.084987,0.315802,-0.22126,0.056162,0.22126,-0.188987,0.617608,0.031452,0.109696,0.315977,-0.232695,0.060493,0.232695,-0.282801,0.63084,0.031598,0.099254,185168,1
2,0.476811,-0.14589,0.047057,0.14589,-0.108505,0.369378,0.028222,0.102735,0.330848,-0.245089,0.075959,0.245089,-0.13052,0.651299,-0.001158,0.148639,0.333743,-0.25071,0.06553,0.25071,-0.155796,0.658619,0.010309,0.132123,0.323105,-0.250776,0.068435,0.250776,-0.168561,0.686026,0.017623,0.143369,0.349326,-0.230064,0.057534,0.230064,-0.100325,0.609525,0.023952,0.141233,...,0.06369,0.19453,-0.29001,0.536373,0.020967,0.144867,0.372908,-0.171392,0.063686,0.171392,-0.165121,0.505973,0.029502,0.173166,0.577638,-0.095368,0.041816,0.095368,0.011734,0.25522,0.029784,0.093108,0.355297,-0.191222,0.059496,0.191222,-0.175702,0.555011,0.028479,0.162744,0.348135,-0.202576,0.065186,0.202576,-0.261846,0.579708,0.028084,0.169987,185171,1
3,0.430095,-0.14705,0.051939,0.14705,-0.185088,0.434984,0.047529,0.157906,0.349927,-0.202479,0.07727,0.202479,-0.122075,0.585219,0.003884,0.199918,0.33233,-0.243536,0.061703,0.243536,-0.186722,0.663934,0.014202,0.167518,0.326887,-0.240069,0.060361,0.240069,-0.237348,0.677521,0.022752,0.177419,0.34323,-0.233077,0.055653,0.233077,-0.106349,0.634615,0.021686,0.182941,...,0.063375,0.176173,-0.332586,0.524676,0.026913,0.184522,0.440137,-0.16889,0.059999,0.16889,0.3032,0.399157,-0.052513,0.204792,0.932377,0.041579,0.020097,-0.041579,-0.089359,0.033401,0.073082,-0.031809,0.353667,-0.189556,0.049724,0.189556,-0.231263,0.561441,0.036019,0.187297,0.346355,-0.199205,0.063756,0.199205,-0.295444,0.583747,0.033317,0.185925,185172,1
4,0.43765,-0.176025,0.050283,0.176025,-0.093886,0.433662,0.033482,0.087607,0.342052,-0.248718,0.073834,0.248718,-0.076085,0.634889,-0.000627,0.123636,0.325439,-0.276288,0.063314,0.276288,-0.105683,0.705131,0.01105,0.106873,0.323852,-0.276459,0.060959,0.276459,-0.135521,0.716084,0.017446,0.105206,0.336471,-0.266902,0.053246,0.266902,-0.057604,0.67524,0.022393,0.113117,...,0.056735,0.191269,-0.299562,0.493229,0.023911,0.095953,0.398782,-0.182763,0.048469,0.182763,-0.139611,0.461867,0.024765,0.108754,0.655672,-0.073498,0.038925,0.073498,-0.00802,0.176068,0.016602,0.040801,0.363578,-0.206022,0.049885,0.206022,-0.18614,0.543061,0.029948,0.105567,0.3591,-0.217942,0.054637,0.217942,-0.27579,0.56206,0.030855,0.09992,185177,1


#### RedEdge indices - add more indices from rededge bands (vegetation bands)

In [None]:
def s2_rededge_indices(df ,times, data_type='train'):
  rededge_df = pd.DataFrame()
  for time in times:
    # Redge Edge Indices
    rededge_df[f'NDVIre1_time_{time}'] =  (df[f'B08_time_{time}'] - df[f'B05_time_{time}'])  / (df[f'B08_time_{time}'] + df[f'B05_time_{time}'])
    rededge_df[f'NDVIre2_time_{time}'] =  (df[f'B08_time_{time}'] - df[f'B06_time_{time}'])  / (df[f'B08_time_{time}'] + df[f'B06_time_{time}'])
    rededge_df[f'NDVIre3_time_{time}'] =  (df[f'B08_time_{time}'] - df[f'B07_time_{time}'])  / (df[f'B08_time_{time}'] + df[f'B07_time_{time}'])

    rededge_df[f'NDRE1_time_{time}'] =  (df[f'B06_time_{time}'] - df[f'B05_time_{time}'])  / (df[f'B06_time_{time}'] + df[f'B05_time_{time}'])
    rededge_df[f'NDRE2_time_{time}'] =  (df[f'B07_time_{time}'] - df[f'B05_time_{time}'])  / (df[f'B07_time_{time}'] + df[f'B05_time_{time}'])
    rededge_df[f'NDRE3_time_{time}'] =  (df[f'B07_time_{time}'] - df[f'B06_time_{time}'])  / (df[f'B07_time_{time}'] + df[f'B06_time_{time}'])

    rededge_df[f'CIre1_time_{time}'] =  (df[f'B08_time_{time}'] /(df[f'B05_time_{time}']))  - 1 
    rededge_df[f'CIre2_time_{time}'] =  (df[f'B08_time_{time}'] /(df[f'B06_time_{time}']))  - 1
    rededge_df[f'CIre3_time_{time}'] =  (df[f'B08_time_{time}'] /(df[f'B07_time_{time}']))  - 1

    rededge_df[f'MCARI1_time_{time}'] =  ((df[f'B05_time_{time}'] - df[f'B04_time_{time}']) - 0.2*(df[f'B05_time_{time}'] - df[f'B03_time_{time}'])) * (df[f'B05_time_{time}'] / (df[f'B04_time_{time}']))
    rededge_df[f'MCARI2_time_{time}'] =  ((df[f'B06_time_{time}'] - df[f'B04_time_{time}']) - 0.2*(df[f'B06_time_{time}'] - df[f'B03_time_{time}'])) * (df[f'B06_time_{time}'] / (df[f'B04_time_{time}']))
    rededge_df[f'MCARI3_time_{time}'] =  ((df[f'B07_time_{time}'] - df[f'B04_time_{time}']) - 0.2*(df[f'B07_time_{time}'] - df[f'B03_time_{time}'])) * (df[f'B07_time_{time}'] / (df[f'B04_time_{time}']))

    
    rededge_df[f'TCARI1_time_{time}'] =  3*((df[f'B05_time_{time}'] - df[f'B04_time_{time}']) - 0.2*(df[f'B05_time_{time}'] - df[f'B03_time_{time}'])) * (df[f'B05_time_{time}'] / (df[f'B04_time_{time}']))
    rededge_df[f'TCARI2_time_{time}'] =  3*((df[f'B06_time_{time}'] - df[f'B04_time_{time}']) - 0.2*(df[f'B06_time_{time}'] - df[f'B03_time_{time}'])) * (df[f'B06_time_{time}'] / (df[f'B04_time_{time}']))
    rededge_df[f'TCARI3_time_{time}'] =  3*((df[f'B07_time_{time}'] - df[f'B04_time_{time}']) - 0.2*(df[f'B07_time_{time}'] - df[f'B03_time_{time}'])) * (df[f'B07_time_{time}'] / (df[f'B04_time_{time}']))

    rededge_df[f'MTCI1_time_{time}'] =  (df[f'B06_time_{time}'] - df[f'B05_time_{time}'])  / (df[f'B05_time_{time}'] - df[f'B04_time_{time}'])
    rededge_df[f'MTCI2_time_{time}'] =  (df[f'B07_time_{time}'] - df[f'B05_time_{time}'])  / (df[f'B05_time_{time}'] - df[f'B04_time_{time}'])
    rededge_df[f'MTCI3_time_{time}'] =  (df[f'B07_time_{time}'] - df[f'B06_time_{time}'])  / (df[f'B06_time_{time}'] - df[f'B04_time_{time}']) 
  
  rededge_df['field_id'] = list(df['field_id'])
  if data_type == 'train':
    rededge_df['label'] = list(df['label'])
  return rededge_df

In [None]:
train_rededge_indices = s2_rededge_indices(s2_train_df ,[i+1 for i in range(76)])
test_rededge_indices = s2_rededge_indices(s2_test_df ,[i+1 for i in range(76)], data_type='test')

train_rededge_indices.head()

Unnamed: 0,NDVIre1_time_1,NDVIre2_time_1,NDVIre3_time_1,NDRE1_time_1,NDRE2_time_1,NDRE3_time_1,CIre1_time_1,CIre2_time_1,CIre3_time_1,MCARI1_time_1,MCARI2_time_1,MCARI3_time_1,TCARI1_time_1,TCARI2_time_1,TCARI3_time_1,MTCI1_time_1,MTCI2_time_1,MTCI3_time_1,NDVIre1_time_2,NDVIre2_time_2,NDVIre3_time_2,NDRE1_time_2,NDRE2_time_2,NDRE3_time_2,CIre1_time_2,CIre2_time_2,CIre3_time_2,MCARI1_time_2,MCARI2_time_2,MCARI3_time_2,TCARI1_time_2,TCARI2_time_2,TCARI3_time_2,MTCI1_time_2,MTCI2_time_2,MTCI3_time_2,NDVIre1_time_3,NDVIre2_time_3,NDVIre3_time_3,NDRE1_time_3,...,MTCI2_time_74,MTCI3_time_74,NDVIre1_time_75,NDVIre2_time_75,NDVIre3_time_75,NDRE1_time_75,NDRE2_time_75,NDRE3_time_75,CIre1_time_75,CIre2_time_75,CIre3_time_75,MCARI1_time_75,MCARI2_time_75,MCARI3_time_75,TCARI1_time_75,TCARI2_time_75,TCARI3_time_75,MTCI1_time_75,MTCI2_time_75,MTCI3_time_75,NDVIre1_time_76,NDVIre2_time_76,NDVIre3_time_76,NDRE1_time_76,NDRE2_time_76,NDRE3_time_76,CIre1_time_76,CIre2_time_76,CIre3_time_76,MCARI1_time_76,MCARI2_time_76,MCARI3_time_76,TCARI1_time_76,TCARI2_time_76,TCARI3_time_76,MTCI1_time_76,MTCI2_time_76,MTCI3_time_76,field_id,label
0,0.051679,0.036309,0.014228,0.015398,0.037479,0.022093,0.10899,0.075355,0.028866,98.908572,157.958242,252.225824,296.725715,473.874725,756.677472,0.318123,0.792065,0.359558,0.047,0.029912,-0.000167,0.017113,0.047167,0.030079,0.098636,0.061668,-0.000335,127.872285,194.870849,329.394067,383.616855,584.612547,988.182202,0.256248,0.728571,0.375979,0.056463,0.041219,0.012001,0.01528,...,0.737564,0.349832,0.076877,0.05752,0.031828,0.019443,0.04516,0.025739,0.166559,0.122061,0.065748,62.837931,148.097292,276.630303,188.513792,444.291877,829.890908,0.403638,0.962759,0.398338,0.074805,0.057565,0.03191,0.017314,0.042998,0.025703,0.161706,0.122163,0.065923,100.555033,185.762224,328.82321,301.665099,557.286672,986.469629,0.325728,0.830615,0.380838,185161,1
1,0.057351,0.044095,0.022313,0.013289,0.035083,0.021804,0.121681,0.092259,0.045644,75.383773,133.632326,239.398009,226.15132,400.896977,718.194026,0.296401,0.800167,0.388588,0.038578,0.023518,-0.003949,0.015073,0.04252,0.027464,0.080251,0.048169,-0.007866,116.685053,176.066038,298.142075,350.055158,528.198115,894.426226,0.226589,0.657511,0.351317,0.044331,0.031644,0.006284,0.012705,...,0.807531,0.363557,0.083312,0.060312,0.031452,0.023116,0.051996,0.028915,0.181767,0.128365,0.064946,55.257272,151.484743,291.218619,165.771817,454.45423,873.655857,0.445007,1.031455,0.405845,0.078761,0.05928,0.031598,0.019573,0.047281,0.027734,0.170989,0.126031,0.065258,87.075316,178.279763,326.242692,261.225947,534.83929,978.728076,0.349975,0.870005,0.385215,185168,1
2,0.076973,0.052638,0.028222,0.024435,0.048858,0.024453,0.166785,0.111125,0.058082,97.18223,206.478631,332.07338,291.546691,619.435893,996.220139,0.55731,1.142979,0.376078,0.068026,0.03313,-0.001158,0.034975,0.069178,0.034286,0.145983,0.06853,-0.002313,131.556853,264.574203,422.302075,394.670559,793.72261,1266.906224,0.513381,1.052739,0.356393,0.07223,0.040456,0.010309,0.031867,...,1.159859,0.36238,0.103506,0.058026,0.028479,0.045755,0.075249,0.029596,0.230912,0.1232,0.058627,95.499712,299.833496,462.471913,286.499136,899.500488,1387.41574,0.853867,1.44907,0.32106,0.106186,0.059238,0.028084,0.047245,0.078335,0.031206,0.237602,0.125937,0.057792,132.836362,367.319654,559.17787,398.509087,1101.958961,1677.533609,0.810299,1.388853,0.31959,185171,1
3,0.120287,0.080961,0.047529,0.039712,0.073176,0.033561,0.273468,0.176187,0.099802,87.073547,229.696555,377.323282,261.220642,689.089665,1131.969847,0.837559,1.599052,0.414405,0.094726,0.043704,0.003884,0.051235,0.090875,0.039826,0.209276,0.091402,0.007799,135.613906,332.961468,528.968561,406.841717,998.884403,1586.905682,0.752865,1.39359,0.36553,0.091387,0.051455,0.014202,0.04012,...,-0.807289,-0.679673,0.121274,0.069618,0.036019,0.052096,0.08563,0.033684,0.276023,0.149655,0.074729,58.208306,273.345545,449.591541,174.624918,820.036636,1348.774623,1.160234,1.976999,0.378091,0.118038,0.065985,0.033317,0.052462,0.085056,0.03274,0.267672,0.141293,0.068931,122.865341,371.803827,567.593448,368.596024,1115.41148,1702.780343,0.923773,1.551056,0.326069,185172,1
4,0.075341,0.057763,0.033482,0.017655,0.041965,0.024328,0.162959,0.122608,0.069283,87.548672,154.247058,257.836642,262.646017,462.741175,773.509927,0.375393,0.914942,0.392287,0.057595,0.034198,-0.000627,0.023443,0.058219,0.034824,0.122229,0.070818,-0.001252,128.327526,213.814722,363.210523,384.982579,641.444165,1089.631568,0.349137,0.899079,0.407625,0.061741,0.041762,0.01105,0.020031,...,0.544503,0.279798,0.079964,0.056622,0.029948,0.023449,0.050137,0.026719,0.173829,0.12004,0.061744,60.557812,162.258183,296.039932,181.673437,486.77455,888.119795,0.505359,1.110883,0.402246,0.078323,0.057267,0.030855,0.02115,0.047583,0.026459,0.169957,0.121492,0.063675,93.889325,196.176982,342.742035,281.667975,588.530946,1028.226106,0.417077,0.964355,0.386202,185177,1


In [None]:
def s2_general_indices(df ,times, data_type='train'):
  veg_df = pd.DataFrame()
  for time in times:
    veg_df[f'R01_time_{time}'] =  df[f'B01_time_{time}']  / (df[f'B03_time_{time}'])
    veg_df[f'R02_time_{time}'] =  df[f'B01_time_{time}']  / (df[f'B05_time_{time}'])
    veg_df[f'R03_time_{time}'] =  df[f'B11_time_{time}']  / (df[f'B12_time_{time}'])
    veg_df[f'R04_time_{time}'] =  df[f'B05_time_{time}']  / (df[f'B04_time_{time}'])

    veg_df[f'MI_time_{time}'] = (df[f'B8A_time_{time}'] - df[f'B11_time_{time}'] )  / (df[f'B8A_time_{time}'] + df[f'B11_time_{time}'] )
    veg_df[f'MRESR_time_{time}'] = (df[f'B06_time_{time}'] - df[f'B01_time_{time}'] )  / (df[f'B05_time_{time}'] - df[f'B01_time_{time}'])
    veg_df[f'PSRI_time_{time}'] = (df[f'B04_time_{time}'] - df[f'B02_time_{time}'] )  / (df[f'B06_time_{time}'] )
   
    veg_df[f'TVI_time_{time}'] = (120*(df[f'B06_time_{time}'] - df[f'B03_time_{time}'] ) - 200 * (df[f'B04_time_{time}'] - df[f'B03_time_{time}'])) / 2


  veg_df['field_id'] = list(df['field_id'])
  if data_type == 'train':
    veg_df['label'] = list(df['label'])
  return veg_df

In [None]:
train_gen_indices = s2_general_indices(s2_train_df ,[i+1 for i in range(76)])
test_gen_indices = s2_general_indices(s2_test_df ,[i+1 for i in range(76)], data_type='test')

train_gen_indices.head()

Unnamed: 0,R01_time_1,R02_time_1,R03_time_1,R04_time_1,MI_time_1,MRESR_time_1,PSRI_time_1,TVI_time_1,R01_time_2,R02_time_2,R03_time_2,R04_time_2,MI_time_2,MRESR_time_2,PSRI_time_2,TVI_time_2,R01_time_3,R02_time_3,R03_time_3,R04_time_3,MI_time_3,MRESR_time_3,PSRI_time_3,TVI_time_3,R01_time_4,R02_time_4,R03_time_4,R04_time_4,MI_time_4,MRESR_time_4,PSRI_time_4,TVI_time_4,R01_time_5,R02_time_5,R03_time_5,R04_time_5,MI_time_5,MRESR_time_5,PSRI_time_5,TVI_time_5,...,R03_time_72,R04_time_72,MI_time_72,MRESR_time_72,PSRI_time_72,TVI_time_72,R01_time_73,R02_time_73,R03_time_73,R04_time_73,MI_time_73,MRESR_time_73,PSRI_time_73,TVI_time_73,R01_time_74,R02_time_74,R03_time_74,R04_time_74,MI_time_74,MRESR_time_74,PSRI_time_74,TVI_time_74,R01_time_75,R02_time_75,R03_time_75,R04_time_75,MI_time_75,MRESR_time_75,PSRI_time_75,TVI_time_75,R01_time_76,R02_time_76,R03_time_76,R04_time_76,MI_time_76,MRESR_time_76,PSRI_time_76,TVI_time_76,field_id,label
0,0.877021,0.645894,1.188666,1.109042,-0.149134,1.08833,0.283756,2282.352,0.63646,0.391573,1.223961,1.157257,-0.269233,1.057231,0.414613,539.0,0.589618,0.363893,1.206703,1.141447,-0.282738,1.048786,0.441548,-1935.8,0.546645,0.336192,1.17207,1.139072,-0.29195,1.054844,0.456229,-1804.602,0.809392,0.61046,1.16713,1.087338,-0.229125,1.047792,0.281927,-1757.91,...,1.274797,1.123968,-0.165385,1.040728,0.418506,-4847.03,0.61897,0.428464,1.351711,1.09525,-0.164836,1.053797,0.315348,-1845.72,0.616297,0.408403,1.311144,1.119959,-0.15942,1.052007,0.376441,-2878.648,0.505067,0.320191,1.304012,1.108955,-0.197224,1.058336,0.450467,-5514.758,0.475918,0.303995,1.276392,1.121308,-0.214446,1.05063,0.413877,-3785.688,185161,1
1,0.680361,0.472156,1.193073,1.099963,-0.158775,1.051031,0.359169,-3549.386,0.58579,0.344686,1.19939,1.156175,-0.244895,1.046707,0.458159,-2140.766,0.536466,0.317919,1.170538,1.135742,-0.247301,1.037732,0.488081,-5617.094,0.483798,0.286012,1.141569,1.132431,-0.257288,1.044515,0.503706,-5324.202,0.72988,0.505346,1.167014,1.100952,-0.219007,1.054279,0.366088,-3573.45,...,1.173569,1.13422,-0.215256,1.05091,0.459829,-5304.448,0.675646,0.480115,1.245446,1.091883,-0.171149,1.069133,0.295787,-2061.576,0.755022,0.505003,1.232586,1.117622,-0.154245,1.069227,0.362054,-2124.002,0.487065,0.285068,1.201295,1.119008,-0.213056,1.066198,0.498017,-6569.972,0.458013,0.272254,1.178692,1.128776,-0.229167,1.054864,0.460756,-5659.714,185168,1
2,0.698574,0.521409,1.234765,1.098761,-0.140671,1.104668,0.288487,4177.324,0.582333,0.364152,1.258714,1.164406,-0.19961,1.113998,0.392888,5936.808,0.551078,0.354797,1.225084,1.140252,-0.221204,1.102033,0.40839,3593.406,0.527035,0.336702,1.203186,1.146926,-0.230371,1.115535,0.413357,5156.532,0.622388,0.415861,1.23268,1.122092,-0.215188,1.12736,0.383905,3972.76,...,1.229272,1.136045,-0.186516,1.110624,0.384797,5652.102,0.595514,0.390696,1.27245,1.136036,-0.169346,1.170174,0.350975,10440.05,0.927742,0.752857,1.323541,1.087282,-0.099285,1.190132,0.171101,7559.424,0.528789,0.340616,1.261629,1.126519,-0.184185,1.145435,0.411961,5668.014,0.497082,0.322983,1.249545,1.139463,-0.198583,1.146489,0.372584,9309.786,185171,1
3,0.618927,0.45588,1.287703,1.10957,-0.149621,1.152005,0.28591,7325.946,0.584533,0.372947,1.335997,1.167481,-0.150721,1.172238,0.373509,10374.69,0.517471,0.339566,1.278139,1.131522,-0.208742,1.126574,0.407902,4789.068,0.46516,0.305751,1.248291,1.128476,-0.218006,1.140695,0.417654,5923.432,0.612632,0.417662,1.285689,1.117865,-0.207353,1.167226,0.37331,6021.564,...,1.269481,1.135327,-0.167232,1.14657,0.369848,9742.188,1.181213,0.793029,1.146707,1.127657,-0.082018,1.562048,0.332856,12641.252,0.91572,0.880584,1.213718,1.041019,-0.030913,0.868546,-0.038155,8011.248,0.487225,0.322734,1.298374,1.104653,-0.183523,1.162297,0.412419,4965.306,0.476315,0.313181,1.278496,1.136196,-0.193491,1.161225,0.363274,10380.314,185172,1
4,0.699173,0.507901,1.2576,1.10589,-0.171546,1.073043,0.313576,1522.116,0.644132,0.41097,1.298777,1.159439,-0.201078,1.081509,0.386679,3659.266,0.596378,0.38732,1.247545,1.135186,-0.245592,1.066726,0.414708,604.344,0.555066,0.362939,1.211513,1.129833,-0.2549,1.066468,0.427893,336.862,0.659553,0.44645,1.247603,1.112482,-0.2485,1.087789,0.395671,180.192,...,1.281153,1.120295,-0.183691,1.055957,0.403067,-2013.788,0.636485,0.427981,1.300744,1.101876,-0.175449,1.086841,0.368487,-2286.812,0.907935,0.74214,1.255118,1.081003,-0.070714,1.060105,0.171879,4619.142,0.52571,0.340797,1.31589,1.105008,-0.19851,1.072851,0.440813,-3742.002,0.492908,0.322507,1.291926,1.11559,-0.214789,1.063786,0.402074,-2133.948,185177,1


In [None]:
##  Delete bands exist in planet data
bands = ['B02','B03','B04','B08']
for band in bands:
  cols=[]
  for i in range(76):
    cols.append(f'{band}_time_{i+1}')
  s2_train_df = s2_train_df.drop(cols, axis=1)
  s2_test_df = s2_test_df.drop(cols, axis=1)

print(s2_train_df.shape)
print(s2_test_df.shape)

(4150, 610)
(2417, 609)


In [None]:
# Merge Sentinel-2 train data
s2_train_df = pd.merge(s2_train_df, train_veg_indices, on=['field_id','label'], how='inner')
s2_train_df = pd.merge(s2_train_df, train_rededge_indices, on=['field_id','label'], how='inner')
s2_train_df = pd.merge(s2_train_df, train_gen_indices, on=['field_id','label'], how='inner')

s2_train_df.to_csv(f"{df_path}/all_s2_train_df.csv", index=False)

# Merge Sentinel-2 test data
s2_test_df = pd.merge(s2_test_df, test_veg_indices, on=['field_id'], how='inner')
s2_test_df = pd.merge(s2_test_df, test_rededge_indices, on=['field_id'], how='inner')
s2_test_df = pd.merge(s2_test_df, test_gen_indices, on=['field_id'], how='inner')

s2_test_df.to_csv(f"{df_path}/all_s2_test_df.csv", index=False)

s2_train_df.head()

Unnamed: 0,field_id,B01_time_1,B05_time_1,B06_time_1,B07_time_1,B8A_time_1,B09_time_1,B11_time_1,B12_time_1,B01_time_2,B05_time_2,B06_time_2,B07_time_2,B8A_time_2,B09_time_2,B11_time_2,B12_time_2,B01_time_3,B05_time_3,B06_time_3,B07_time_3,B8A_time_3,B09_time_3,B11_time_3,B12_time_3,B01_time_4,B05_time_4,B06_time_4,B07_time_4,B8A_time_4,B09_time_4,B11_time_4,B12_time_4,B01_time_5,B05_time_5,B06_time_5,B07_time_5,B8A_time_5,B09_time_5,B11_time_5,...,R01_time_72,R02_time_72,R03_time_72,R04_time_72,MI_time_72,MRESR_time_72,PSRI_time_72,TVI_time_72,R01_time_73,R02_time_73,R03_time_73,R04_time_73,MI_time_73,MRESR_time_73,PSRI_time_73,TVI_time_73,R01_time_74,R02_time_74,R03_time_74,R04_time_74,MI_time_74,MRESR_time_74,PSRI_time_74,TVI_time_74,R01_time_75,R02_time_75,R03_time_75,R04_time_75,MI_time_75,MRESR_time_75,PSRI_time_75,TVI_time_75,R01_time_76,R02_time_76,R03_time_76,R04_time_76,MI_time_76,MRESR_time_76,PSRI_time_76,TVI_time_76
0,185161,1262.8505,1955.1984,2016.3535,2107.4626,2260.7769,2975.281,3053.284,2568.6648,734.1532,1874.8804,1940.1655,2060.5007,2267.524,2259.9521,3938.3477,3217.706,664.97235,1827.3821,1884.092,1997.5632,2185.0823,2197.0872,3907.7583,3238.3765,603.0439,1793.7506,1859.054,1967.4592,2142.0078,2158.2456,3908.4336,3334.6418,1573.7266,2577.9363,2625.9297,2733.0098,2918.5828,2905.3862,4653.546,...,0.456052,0.290567,1.274797,1.123968,-0.165385,1.040728,0.418506,-4847.03,0.61897,0.428464,1.351711,1.09525,-0.164836,1.053797,0.315348,-1845.72,0.616297,0.408403,1.311144,1.119959,-0.15942,1.052007,0.376441,-2878.648,0.505067,0.320191,1.304012,1.108955,-0.197224,1.058336,0.450467,-5514.758,0.475918,0.303995,1.276392,1.121308,-0.214446,1.05063,0.413877,-3785.688
1,185168,1090.4572,2309.529,2371.7395,2477.4731,2634.674,2870.3032,3629.2263,3041.9136,659.3229,1912.8221,1971.3687,2082.7112,2254.3645,2243.4612,3716.6306,3098.7676,637.941,2006.6144,2058.2578,2165.391,2322.3938,2329.2874,3848.448,3287.7598,553.469,1935.1233,1996.628,2092.2974,2238.8274,2244.5994,3789.9631,3319.9597,1184.341,2343.6228,2406.5469,2505.7957,2661.9438,2686.352,4154.8735,...,0.441363,0.260852,1.173569,1.13422,-0.215256,1.05091,0.459829,-5304.448,0.675646,0.480115,1.245446,1.091883,-0.171149,1.069133,0.295787,-2061.576,0.755022,0.505003,1.232586,1.117622,-0.154245,1.069227,0.362054,-2124.002,0.487065,0.285068,1.201295,1.119008,-0.213056,1.066198,0.498017,-6569.972,0.458013,0.272254,1.178692,1.128776,-0.229167,1.054864,0.460756,-5659.714
2,185171,1177.5992,2258.4932,2371.6282,2490.5203,2663.4045,2876.3218,3535.399,2863.2158,620.9345,1705.1542,1828.7534,1958.6069,2150.343,2157.9846,3222.894,2560.4663,636.5318,1794.0752,1912.183,2031.1136,2207.1426,2231.0771,3460.946,2825.0674,585.98456,1740.3661,1873.7379,1989.8805,2152.5278,2185.1252,3441.152,2860.0327,797.6262,1918.0096,2060.7014,2188.896,2369.3486,2397.1465,3668.653,...,0.476477,0.308655,1.229272,1.136045,-0.186516,1.110624,0.384797,5652.102,0.595514,0.390696,1.27245,1.136036,-0.169346,1.170174,0.350975,10440.05,0.927742,0.752857,1.323541,1.087282,-0.099285,1.190132,0.171101,7559.424,0.528789,0.340616,1.261629,1.126519,-0.184185,1.145435,0.411961,5668.014,0.497082,0.322983,1.249545,1.139463,-0.198583,1.146489,0.372584,9309.786
3,185172,776.65625,1703.6406,1844.5469,1972.6562,2158.1562,2320.1562,2917.5938,2265.7344,609.6406,1634.6562,1811.2031,1961.4531,2199.7188,2201.5,2980.4844,2230.9062,563.2344,1658.6875,1797.3438,1936.5469,2143.9688,2146.6875,3275.1719,2562.4531,511.9375,1674.3594,1837.9062,1971.4219,2161.5781,2180.4531,3366.7969,2697.125,687.7656,1646.7031,1807.0625,1947.9531,2147.3438,2200.4844,3270.8125,...,0.44853,0.294064,1.269481,1.135327,-0.167232,1.14657,0.369848,9742.188,1.181213,0.793029,1.146707,1.127657,-0.082018,1.562048,0.332856,12641.252,0.91572,0.880584,1.213718,1.041019,-0.030913,0.868546,-0.038155,8011.248,0.487225,0.322734,1.298374,1.104653,-0.183523,1.162297,0.412419,4965.306,0.476315,0.313181,1.278496,1.136196,-0.193491,1.161225,0.363274,10380.314
4,185177,979.8007,1929.1163,1998.4569,2098.1196,2264.3052,2455.697,3202.0332,2546.1467,698.52155,1699.6907,1781.2948,1909.8335,2108.8516,2090.335,3170.3914,2441.0598,688.8011,1778.3755,1851.0781,1968.4353,2149.4766,2156.2803,3548.9722,2844.766,655.0718,1804.9111,1881.3383,1994.7985,2163.7327,2175.0293,3644.164,3007.9453,784.7327,1757.7183,1843.1354,1956.5454,2128.4575,2157.0027,3536.0994,...,0.473381,0.309575,1.281153,1.120295,-0.183691,1.055957,0.403067,-2013.788,0.636485,0.427981,1.300744,1.101876,-0.175449,1.086841,0.368487,-2286.812,0.907935,0.74214,1.255118,1.081003,-0.070714,1.060105,0.171879,4619.142,0.52571,0.340797,1.31589,1.105008,-0.19851,1.072851,0.440813,-3742.002,0.492908,0.322507,1.291926,1.11559,-0.214789,1.063786,0.402074,-2133.948


### Compute more Features - Planet Indices
#### Vegitation Indices

* ```MNDVI: (NIR - B03)/(NIR + B03 - 2*B01)```
* ```NDVI: (NIR - B03)/(NIR + B03)```
* ```NDWI: (B02 - NIR) / (B02 + NIR)```
* ```GNDVI: (NIR - B02) / (NIR + B02)```
* ```EVI2:  2.4 * (NIR - B03) / (NIR + B03 + 1.0)```
* ```EVI:   2.5 * (NIR - B03) / ((NIR + 6.0 * B03 - 7.5 * B01) + 1.0)```
* ```CVI: (NIR / B02) * (NIR / B02)```
* ```BI: (B03**2 + B02**2 + B01*2) /3```
* ```SI: (B03 - B01) /(B03 + B01)```

In [None]:
def planet_veg_indices(df ,times, data_type='train'):
  veg_df = pd.DataFrame()
  for time in times:
    veg_df[f'MNDVI_time_{time}'] = (df[f'NIR_time_{time}'] - df[f'B03_time_{time}'] )  / (df[f'NIR_time_{time}'] +df[f'B03_time_{time}'] - 2*df[f'B01_time_{time}'])
    veg_df[f'NDVI_time_{time}'] =  (df[f'NIR_time_{time}'] - df[f'B03_time_{time}'] )  / (df[f'NIR_time_{time}'] +df[f'B03_time_{time}'] )
    veg_df[f'NDWI_time_{time}'] = (df[f'B02_time_{time}'] -  df[f'NIR_time_{time}'] )  / (df[f'B02_time_{time}'] +df[f'NIR_time_{time}']) 
    veg_df[f'GNDVI_time_{time}'] = (df[f'NIR_time_{time}'] - df[f'B02_time_{time}'] )  / (df[f'NIR_time_{time}'] +df[f'B02_time_{time}'] )
    veg_df[f'EVI2_time_{time}'] = 2.4*((df[f'NIR_time_{time}'] - df[f'B03_time_{time}'] )  / (df[f'NIR_time_{time}'] +df[f'B03_time_{time}'] + 1.0))
    veg_df[f'EVI_time_{time}'] = 2.5*((df[f'NIR_time_{time}'] - df[f'B03_time_{time}'] )  / (df[f'NIR_time_{time}'] + 6 * df[f'B03_time_{time}'] - 7.5 * df[f'B01_time_{time}'] + 1.0))
    veg_df[f'CVI_time_{time}'] =  (df[f'NIR_time_{time}'] / (df[f'B02_time_{time}']))  * (df[f'B03_time_{time}'] / (df[f'B02_time_{time}']))
    veg_df[f'BI_time_{time}'] =  (df[f'B03_time_{time}'] **2+ df[f'B02_time_{time}']**2+ df[f'B01_time_{time}']*2) /3
    veg_df[f'SI_time_{time}'] =  (df[f'B03_time_{time}'] - df[f'B01_time_{time}'])  / (df[f'B03_time_{time}'] + df[f'B01_time_{time}'])

  veg_df['field_id'] = list(df['field_id'])
  if data_type == 'train':
    veg_df['label'] = list(df['label'])
  return veg_df


In [None]:
planet_train_veg_indices = planet_veg_indices(planet_5days_train_df ,[i+1 for i in range(48)])
planet_test_veg_indices = planet_veg_indices(planet_5days_test_df ,[i+1 for i in range(48)], data_type='test')

planet_train_veg_indices.head()

Unnamed: 0,MNDVI_time_1,NDVI_time_1,NDWI_time_1,GNDVI_time_1,EVI2_time_1,EVI_time_1,CVI_time_1,BI_time_1,SI_time_1,MNDVI_time_2,NDVI_time_2,NDWI_time_2,GNDVI_time_2,EVI2_time_2,EVI_time_2,CVI_time_2,BI_time_2,SI_time_2,MNDVI_time_3,NDVI_time_3,NDWI_time_3,GNDVI_time_3,EVI2_time_3,EVI_time_3,CVI_time_3,BI_time_3,SI_time_3,MNDVI_time_4,NDVI_time_4,NDWI_time_4,GNDVI_time_4,EVI2_time_4,EVI_time_4,CVI_time_4,BI_time_4,SI_time_4,MNDVI_time_5,NDVI_time_5,NDWI_time_5,GNDVI_time_5,...,BI_time_44,SI_time_44,MNDVI_time_45,NDVI_time_45,NDWI_time_45,GNDVI_time_45,EVI2_time_45,EVI_time_45,CVI_time_45,BI_time_45,SI_time_45,MNDVI_time_46,NDVI_time_46,NDWI_time_46,GNDVI_time_46,EVI2_time_46,EVI_time_46,CVI_time_46,BI_time_46,SI_time_46,MNDVI_time_47,NDVI_time_47,NDWI_time_47,GNDVI_time_47,EVI2_time_47,EVI_time_47,CVI_time_47,BI_time_47,SI_time_47,MNDVI_time_48,NDVI_time_48,NDWI_time_48,GNDVI_time_48,EVI2_time_48,EVI_time_48,CVI_time_48,BI_time_48,SI_time_48,field_id,label
0,0.303092,0.168973,-0.326634,0.326634,0.405432,0.297814,2.759372,1319757.0,0.305078,0.302129,0.168944,-0.330389,0.330389,0.40536,0.296431,2.806398,1272018.0,0.306817,0.284799,0.15971,-0.326552,0.326552,0.383205,0.274621,2.811383,1325280.0,0.313457,0.292963,0.161576,-0.323368,0.323368,0.387681,0.285564,2.761041,1291099.0,0.303014,0.303686,0.169906,-0.340547,0.340547,...,1446726.0,0.380503,0.323756,0.208886,-0.40513,0.40513,0.501216,0.316976,3.651261,1531677.0,0.380752,0.282519,0.170683,-0.352508,0.352508,0.409559,0.268533,3.090952,2224132.0,0.353796,0.259477,0.151899,-0.327519,0.327519,0.364491,0.24254,2.869155,2565505.0,0.34332,0.250399,0.14687,-0.328025,0.328025,0.35242,0.232012,2.905411,2299565.0,0.347136,185161,1
1,0.258336,0.152258,-0.334801,0.334801,0.365331,0.240949,2.962393,1479450.0,0.347375,0.262205,0.154381,-0.337283,0.337283,0.370424,0.245482,2.982748,1419989.0,0.34563,0.238498,0.142755,-0.345746,0.345746,0.342528,0.217887,3.173838,1492979.0,0.362125,0.247348,0.145954,-0.3414,0.3414,0.350204,0.228318,3.091633,1459723.0,0.35137,0.276392,0.161872,-0.353694,0.353694,...,1559836.0,0.401216,0.283612,0.18234,-0.383421,0.383421,0.437531,0.267313,3.481454,2057903.0,0.392073,0.267624,0.169464,-0.367909,0.367909,0.406632,0.24905,3.326036,2066874.0,0.387325,0.261942,0.166182,-0.368001,0.368001,0.398751,0.242411,3.349998,1785577.0,0.390397,0.259938,0.165147,-0.370939,0.370939,0.396266,0.240061,3.403145,1719096.0,0.391977,185168,1
2,0.308928,0.172582,-0.326293,0.326293,0.4141,0.305194,2.734759,1557765.0,0.304284,0.3052,0.171542,-0.32878,0.32878,0.411602,0.300082,2.77134,1517966.0,0.308373,0.284047,0.159107,-0.327295,0.327295,0.381768,0.273756,2.824241,1599521.0,0.313126,0.286146,0.154788,-0.312082,0.312082,0.371404,0.278038,2.662634,1654506.0,0.296067,0.301065,0.163847,-0.329488,0.329488,...,1729025.0,0.357382,0.291324,0.175614,-0.350414,0.350414,0.421397,0.279337,3.030581,2456993.0,0.349714,0.284764,0.169036,-0.339027,0.339027,0.40561,0.272007,2.91719,2369147.0,0.343121,0.282125,0.167202,-0.338344,0.338344,0.401202,0.268874,2.919202,2044254.0,0.343065,0.275604,0.162984,-0.338535,0.338535,0.391079,0.261127,2.94717,1931301.0,0.343908,185171,1
3,0.385432,0.210799,-0.344868,0.344868,0.505768,0.413579,2.74674,909423.0,0.270565,0.388571,0.209261,-0.342033,0.342033,0.502073,0.419637,2.720395,880752.5,0.262961,0.362766,0.19933,-0.348909,0.348909,0.478245,0.379675,2.865473,857502.9,0.279846,0.36961,0.200633,-0.338765,0.338765,0.48137,0.390545,2.729188,860970.7,0.272326,0.374901,0.206405,-0.357746,0.357746,...,1454396.0,0.347828,0.3124,0.189785,-0.36453,0.36453,0.455388,0.305337,3.13984,1842411.0,0.347317,0.292704,0.173269,-0.343123,0.343123,0.415759,0.281851,2.94598,1880915.0,0.339084,0.293728,0.174048,-0.343658,0.343658,0.417618,0.283067,2.948404,1607128.0,0.339304,0.296947,0.175566,-0.34467,0.34467,0.421261,0.287162,2.952699,1530271.0,0.33707,185172,1
4,0.322939,0.174507,-0.31949,0.31949,0.418702,0.325476,2.642411,1173698.0,0.284691,0.322785,0.1729,-0.319979,0.319979,0.414844,0.325804,2.656972,1126109.0,0.280885,0.303621,0.161231,-0.320303,0.320303,0.386848,0.301131,2.725462,1176911.0,0.282773,0.301132,0.158161,-0.304288,0.304288,0.379483,0.298541,2.55475,1218000.0,0.27879,0.317463,0.170804,-0.328728,0.328728,...,1826063.0,0.363665,0.280555,0.170102,-0.352139,0.352139,0.408169,0.266021,3.08944,2366815.0,0.356495,0.266181,0.157282,-0.334204,0.334204,0.377405,0.250003,2.924191,2431607.0,0.346372,0.257776,0.150918,-0.328455,0.328455,0.362129,0.240562,2.887017,2132268.0,0.343888,0.255948,0.15019,-0.331553,0.331553,0.360379,0.238362,2.931814,1994658.0,0.345687,185177,1


#### FLOWERING PHENOLOGY
Flowering is an essential phenological period, so we added indices to able to reflect the spectral performance of different flowers, because the crops have different flowers color

* ```Normalized Differences Yellwoness index "NDYI" ```
* ```Yellwoness ratio "DYI" ```
* ``` Normalized Differences Greeness index "NDGI" ``` 
* ``` Normalized Differences Purpleness index "NDPI" ```
* ```The Enhanced Bloom Index for yellow flowers 'YEBI'``` 
* ```The Enhanced Bloom Index for purple flowers 'PEBI'``` 

In [None]:
def planet_bloom_indices(df ,times, data_type='train'):
  bloom_df = pd.DataFrame()
  for time in times:    
    # Blooming Indices (to detect flowers colors (purple, yellow) of different crops)
    bloom_df[f'NDGI_time_{time}'] =  (df[f'B03_time_{time}'] - df[f'B02_time_{time}'] )  / (df[f'B03_time_{time}'] +df[f'B02_time_{time}'] )
    bloom_df[f'DYI_time_{time}'] =  df[f'B03_time_{time}']  / df[f'B02_time_{time}']
    bloom_df[f'NDPI_time_{time}'] =  (0.5*(df[f'B03_time_{time}'] + df[f'B01_time_{time}']) - df[f'B02_time_{time}'])  / (0.5*(df[f'B03_time_{time}'] + df[f'B01_time_{time}']) + df[f'B02_time_{time}'])
    
    bloom_df[f'PEBI_time_{time}'] =  bloom_df[f'NDPI_time_{time}'] / ((bloom_df[f'NDGI_time_{time}'] +1) * df[f'NIR_time_{time}'])
    bloom_df[f'NDYI_time_{time}'] =  (0.5*(df[f'B03_time_{time}'] + df[f'B02_time_{time}']) - df[f'B01_time_{time}'])  / (0.5*(df[f'B03_time_{time}'] + df[f'B02_time_{time}']) + df[f'B01_time_{time}'])
    bloom_df[f'YEBI_time_{time}'] =  bloom_df[f'NDYI_time_{time}'] / ((bloom_df[f'NDGI_time_{time}'] +1) * df[f'NIR_time_{time}']) 
  
  bloom_df['field_id'] = list(df['field_id'])
  if data_type == 'train':
    bloom_df['label'] = list(df['label'])
  return bloom_df

In [None]:
planet_train_bloom_indices = planet_bloom_indices(planet_5days_train_df ,[i+1 for i in range(48)])
planet_test_bloom_indices = planet_bloom_indices(planet_5days_test_df ,[i+1 for i in range(48)], data_type='test')

planet_train_bloom_indices.head()

Unnamed: 0,NDGI_time_1,DYI_time_1,NDPI_time_1,PEBI_time_1,NDYI_time_1,YEBI_time_1,NDGI_time_2,DYI_time_2,NDPI_time_2,PEBI_time_2,NDYI_time_2,YEBI_time_2,NDGI_time_3,DYI_time_3,NDPI_time_3,PEBI_time_3,NDYI_time_3,YEBI_time_3,NDGI_time_4,DYI_time_4,NDPI_time_4,PEBI_time_4,NDYI_time_4,YEBI_time_4,NDGI_time_5,DYI_time_5,NDPI_time_5,PEBI_time_5,NDYI_time_5,YEBI_time_5,NDGI_time_6,DYI_time_6,NDPI_time_6,PEBI_time_6,NDYI_time_6,YEBI_time_6,NDGI_time_7,DYI_time_7,NDPI_time_7,PEBI_time_7,...,NDYI_time_42,YEBI_time_42,NDGI_time_43,DYI_time_43,NDPI_time_43,PEBI_time_43,NDYI_time_43,YEBI_time_43,NDGI_time_44,DYI_time_44,NDPI_time_44,PEBI_time_44,NDYI_time_44,YEBI_time_44,NDGI_time_45,DYI_time_45,NDPI_time_45,PEBI_time_45,NDYI_time_45,YEBI_time_45,NDGI_time_46,DYI_time_46,NDPI_time_46,PEBI_time_46,NDYI_time_46,YEBI_time_46,NDGI_time_47,DYI_time_47,NDPI_time_47,PEBI_time_47,NDYI_time_47,YEBI_time_47,NDGI_time_48,DYI_time_48,NDPI_time_48,PEBI_time_48,NDYI_time_48,YEBI_time_48,field_id,label
0,0.166871,1.400588,0.0353,1.3e-05,0.233556,8.8e-05,0.170989,1.412514,0.038869,1.5e-05,0.233703,8.9e-05,0.176022,1.42725,0.041519,1.6e-05,0.238616,9e-05,0.170712,1.411707,0.040038,1.5e-05,0.229848,8.8e-05,0.181121,1.442364,0.049386,1.9e-05,0.229402,8.7e-05,0.188603,1.464886,0.055358,2.1e-05,0.231234,8.8e-05,0.193961,1.48127,0.061093,2.3e-05,...,0.311207,8.8e-05,0.212899,1.540969,0.0566,1.6e-05,0.290159,8.2e-05,0.216089,1.551311,0.05826,1.7e-05,0.293898,8.6e-05,0.214387,1.545782,0.056391,1.7e-05,0.294803,8.8e-05,0.193466,1.479745,0.044449,1.2e-05,0.27415,7.6e-05,0.184814,1.453428,0.03937,1.1e-05,0.266468,7.2e-05,0.190324,1.470125,0.043656,1.3e-05,0.268337,7.7e-05,185161,1
1,0.192347,1.476312,0.045663,1.6e-05,0.267801,9.5e-05,0.192949,1.478158,0.046933,1.7e-05,0.265725,9.5e-05,0.21353,1.543009,0.062263,2.2e-05,0.275276,9.6e-05,0.205696,1.517927,0.058048,2.1e-05,0.266857,9.4e-05,0.203472,1.510896,0.060554,2.1e-05,0.253956,8.9e-05,0.211824,1.537505,0.065448,2.3e-05,0.261578,9.3e-05,0.21367,1.54346,0.06644,2.4e-05,...,0.325207,9.2e-05,0.233566,1.609486,0.066312,1.9e-05,0.31836,9.1e-05,0.226725,1.586401,0.061984,1.8e-05,0.312148,9e-05,0.216195,1.551656,0.054211,1.5e-05,0.306234,8.3e-05,0.21164,1.536914,0.051155,1.4e-05,0.302856,8.5e-05,0.214965,1.547657,0.053525,1.6e-05,0.304898,9.2e-05,0.219222,1.561546,0.057413,1.8e-05,0.305004,9.4e-05,185168,1
2,0.162883,1.389154,0.03151,1.1e-05,0.234346,8.1e-05,0.166637,1.399914,0.0338,1.2e-05,0.237086,8.3e-05,0.177427,1.431394,0.043092,1.5e-05,0.237707,8.2e-05,0.165278,1.396007,0.037124,1.3e-05,0.224816,7.8e-05,0.175094,1.424518,0.047846,1.6e-05,0.219122,7.5e-05,0.183319,1.448938,0.054036,1.8e-05,0.222011,7.6e-05,0.18424,1.451701,0.055255,1.9e-05,...,0.285861,8.3e-05,0.197795,1.493129,0.046054,1.4e-05,0.280796,8.4e-05,0.195715,1.486679,0.045462,1.3e-05,0.277074,8.1e-05,0.186262,1.457792,0.038496,1e-05,0.272641,7.2e-05,0.180325,1.43999,0.034806,1e-05,0.268021,7.4e-05,0.181404,1.443208,0.035942,1.1e-05,0.267538,7.9e-05,0.185803,1.456407,0.040174,1.2e-05,0.2667,8.2e-05,185171,1
3,0.14458,1.338032,0.025863,1.1e-05,0.206923,8.9e-05,0.143008,1.333744,0.027259,1.2e-05,0.199733,8.8e-05,0.160759,1.383106,0.038777,1.7e-05,0.209815,9.3e-05,0.148205,1.347984,0.028874,1.3e-05,0.207229,9.3e-05,0.163407,1.390648,0.042655,1.8e-05,0.205633,8.9e-05,0.176156,1.427644,0.05147,2.2e-05,0.211865,9.1e-05,0.174994,1.424226,0.050112,2.1e-05,...,0.280816,9e-05,0.191456,1.473581,0.042963,1.4e-05,0.273221,8.8e-05,0.190195,1.469731,0.043265,1.4e-05,0.269118,8.5e-05,0.187733,1.462244,0.040906,1.2e-05,0.269539,8e-05,0.18059,1.440781,0.036583,1.1e-05,0.263671,8.1e-05,0.180401,1.440218,0.036306,1.2e-05,0.263977,8.7e-05,0.179995,1.43901,0.036721,1.2e-05,0.261788,8.8e-05,185172,1
4,0.153543,1.362791,0.0295,1.2e-05,0.217814,8.8e-05,0.155693,1.368807,0.033182,1.4e-05,0.212982,8.8e-05,0.167734,1.403079,0.044792,1.8e-05,0.20999,8.5e-05,0.153515,1.362713,0.031771,1.3e-05,0.211712,8.7e-05,0.167319,1.401879,0.043728,1.8e-05,0.211864,8.6e-05,0.17584,1.426713,0.050501,2e-05,0.213725,8.6e-05,0.177984,1.433044,0.054203,2.2e-05,...,0.295781,9e-05,0.203687,1.511576,0.048955,1.5e-05,0.287915,9e-05,0.19973,1.499158,0.047328,1.4e-05,0.282185,8.3e-05,0.193635,1.480267,0.043632,1.2e-05,0.276939,7.5e-05,0.186738,1.459233,0.040227,1.1e-05,0.268931,7.4e-05,0.186797,1.45941,0.041209,1.2e-05,0.26629,7.9e-05,0.190868,1.471786,0.044756,1.4e-05,0.266596,8.2e-05,185177,1


In [None]:
def planet_general_indices(df ,times, data_type='train'):
  veg_df = pd.DataFrame()
  for time in times:
    veg_df[f'ARVI_time_{time}'] = (df[f'NIR_time_{time}'] - 2*df[f'B03_time_{time}'] + df[f'B01_time_{time}'])  / (df[f'NIR_time_{time}'] + 2*df[f'B03_time_{time}'] + df[f'B01_time_{time}'])

    veg_df[f'SIPI_time_{time}'] =  (df[f'NIR_time_{time}'] - df[f'B01_time_{time}'] )  / (df[f'NIR_time_{time}'] +df[f'B03_time_{time}'] )

    veg_df[f'EXG_time_{time}'] = (2 * df[f'B02_time_{time}'] -  df[f'B03_time_{time}'] -  df[f'B01_time_{time}'] )
    veg_df[f'ACI_time_{time}'] = (df[f'NIR_time_{time}']  )  * (df[f'B03_time_{time}'] +df[f'B02_time_{time}'] )
    

  veg_df['field_id'] = list(df['field_id'])
  if data_type == 'train':
    veg_df['label'] = list(df['label'])
  return veg_df


In [None]:
planet_train_gen_indices = planet_general_indices(planet_5days_train_df ,[i+1 for i in range(48)])
planet_test_gen_indices = planet_general_indices(planet_5days_test_df ,[i+1 for i in range(48)], data_type='test')

planet_train_gen_indices.head()

Unnamed: 0,ARVI_time_1,SIPI_time_1,EXG_time_1,ACI_time_1,ARVI_time_2,SIPI_time_2,EXG_time_2,ACI_time_2,ARVI_time_3,SIPI_time_3,EXG_time_3,ACI_time_3,ARVI_time_4,SIPI_time_4,EXG_time_4,ACI_time_4,ARVI_time_5,SIPI_time_5,EXG_time_5,ACI_time_5,ARVI_time_6,SIPI_time_6,EXG_time_6,ACI_time_6,ARVI_time_7,SIPI_time_7,EXG_time_7,ACI_time_7,ARVI_time_8,SIPI_time_8,EXG_time_8,ACI_time_8,ARVI_time_9,SIPI_time_9,EXG_time_9,ACI_time_9,ARVI_time_10,SIPI_time_10,EXG_time_10,ACI_time_10,...,EXG_time_39,ACI_time_39,ARVI_time_40,SIPI_time_40,EXG_time_40,ACI_time_40,ARVI_time_41,SIPI_time_41,EXG_time_41,ACI_time_41,ARVI_time_42,SIPI_time_42,EXG_time_42,ACI_time_42,ARVI_time_43,SIPI_time_43,EXG_time_43,ACI_time_43,ARVI_time_44,SIPI_time_44,EXG_time_44,ACI_time_44,ARVI_time_45,SIPI_time_45,EXG_time_45,ACI_time_45,ARVI_time_46,SIPI_time_46,EXG_time_46,ACI_time_46,ARVI_time_47,SIPI_time_47,EXG_time_47,ACI_time_47,ARVI_time_48,SIPI_time_48,EXG_time_48,ACI_time_48,field_id,label
0,-0.015451,0.363236,-169.19443,6319904.0,-0.015999,0.364061,-182.548593,6104119.0,-0.024897,0.360246,-198.208459,6256425.0,-0.020322,0.35655,-189.749159,6101642.0,-0.015215,0.364693,-227.300313,5939932.0,-0.020351,0.363102,-253.431214,5887422.0,-0.022757,0.360235,-278.05382,5810187.0,-0.018596,0.362576,-285.261951,5932224.0,-0.017067,0.373805,-275.06341,5682118.0,-0.016573,0.383607,-265.476724,5457673.0,...,102.930365,4052077.0,0.260783,0.631278,2.562234,5074053.0,0.169029,0.577434,-96.771288,6059426.0,0.087105,0.513379,-190.737483,7202400.0,0.034773,0.459717,-269.839196,8368476.0,0.012718,0.443384,-279.266299,8079219.0,-0.005893,0.427041,-278.280357,8149360.0,-0.028556,0.387413,-269.075592,10832250.0,-0.039755,0.368653,-257.752137,11972900.0,-0.044675,0.366708,-269.694088,10649900.0,185161,1
1,-0.040697,0.37082,-226.083173,6933742.0,-0.038577,0.371582,-227.760736,6685878.0,-0.052258,0.370656,-305.63596,6927055.0,-0.046634,0.368014,-283.73097,6794409.0,-0.03076,0.373766,-290.537715,6678198.0,-0.041813,0.370663,-313.137922,6612886.0,-0.050844,0.363681,-320.569098,6602250.0,-0.047749,0.36688,-325.517653,6754331.0,-0.04764,0.374327,-321.931246,6611227.0,-0.048735,0.379741,-317.046696,6442342.0,...,-9.293427,4976900.0,0.176736,0.580128,-90.466896,5864098.0,0.112793,0.540747,-176.641087,6675949.0,0.057248,0.499197,-235.096524,7388325.0,0.018707,0.46591,-301.698348,7849502.0,-0.006406,0.439649,-304.854749,8444641.0,-0.030208,0.412632,-308.556263,10369360.0,-0.03904,0.40134,-292.818341,10122280.0,-0.042469,0.400302,-284.089986,8697641.0,-0.043722,0.400239,-298.336407,8370449.0,185168,1
2,-0.012513,0.365615,-164.330877,7499496.0,-0.014523,0.366803,-173.536478,7306947.0,-0.025245,0.359626,-225.938467,7547397.0,-0.023175,0.347864,-200.044031,7689164.0,-0.016004,0.354037,-253.566875,7650545.0,-0.020934,0.353764,-282.942889,7508510.0,-0.022979,0.351052,-286.272476,7307942.0,-0.020552,0.34656,-278.75129,7329649.0,-0.017429,0.361546,-265.67115,6852723.0,-0.014949,0.376003,-255.416175,6419829.0,...,-112.137436,7575099.0,0.057668,0.473782,-166.891489,8276080.0,0.028573,0.447565,-212.830877,8837106.0,0.017489,0.438757,-223.708917,8592099.0,0.010535,0.429323,-228.519344,8203913.0,-0.005064,0.41214,-242.123066,8995442.0,-0.023582,0.389215,-245.917833,12047620.0,-0.026717,0.381319,-219.31577,11427010.0,-0.0281,0.379927,-210.299005,9827848.0,-0.031556,0.377177,-228.069455,9224403.0,185171,1
3,0.026364,0.378858,-104.983027,4690203.0,0.027441,0.3739,-109.270156,4523446.0,0.014923,0.374402,-151.596888,4357689.0,0.018141,0.371728,-113.845827,4356213.0,0.021171,0.378482,-168.298444,4506740.0,0.022268,0.38816,-195.579915,4328436.0,0.013107,0.379194,-196.223479,4479998.0,0.011073,0.374852,-205.048091,4962063.0,0.0097,0.382333,-203.114569,5064901.0,0.017724,0.400525,-173.8529,4356329.0,...,5.195191,4505237.0,0.150439,0.539721,-60.704385,5179248.0,0.096635,0.500464,-127.410184,5870127.0,0.053264,0.466116,-177.312588,6599208.0,0.033988,0.444293,-190.19802,6795204.0,0.004803,0.414749,-212.507779,7692281.0,-0.011912,0.398645,-226.373409,9309024.0,-0.022305,0.382615,-205.693069,9152456.0,-0.021774,0.383297,-188.685997,7831823.0,-0.019962,0.383403,-186.407355,7479095.0,185172,1
4,-0.005129,0.357438,-134.941704,5643202.0,-0.00515,0.354274,-148.813608,5402950.0,-0.01431,0.346128,-204.510015,5549123.0,-0.015298,0.341691,-148.400595,5662508.0,-0.007783,0.354417,-195.12187,5410361.0,-0.009064,0.357289,-222.224083,5338018.0,-0.008873,0.354317,-236.198843,5216505.0,-0.011078,0.352804,-233.186087,5517965.0,-0.010356,0.366349,-224.559717,5170774.0,-0.008359,0.380151,-212.899208,4766578.0,...,-52.070273,5767303.0,0.079409,0.496864,-135.899411,6638848.0,0.036888,0.462855,-209.294898,7367641.0,0.013267,0.443044,-233.198843,7678373.0,0.003936,0.429159,-232.568683,7538992.0,-0.019211,0.403314,-258.055014,9211620.0,-0.02978,0.388205,-272.168223,11514440.0,-0.036606,0.374082,-255.92976,11484480.0,-0.040663,0.36819,-245.742861,9940318.0,-0.041749,0.368493,-257.605393,9302888.0,185177,1


In [None]:
# Merge Planet train data
planet_5days_train_df = pd.merge(planet_5days_train_df, planet_train_veg_indices, on=['field_id','label'], how='inner')
planet_5days_train_df = pd.merge(planet_5days_train_df, planet_train_bloom_indices, on=['field_id','label'], how='inner')
planet_5days_train_df = pd.merge(planet_5days_train_df, planet_train_gen_indices, on=['field_id','label'], how='inner')

planet_5days_train_df.to_csv(f"{df_path}/all_planet_5days_train_df.csv", index=False)

# Merge Planet test data
planet_5days_test_df = pd.merge(planet_5days_test_df, planet_test_veg_indices, on=['field_id'], how='inner')
planet_5days_test_df = pd.merge(planet_5days_test_df, planet_test_bloom_indices, on=['field_id'], how='inner')
planet_5days_test_df = pd.merge(planet_5days_test_df, planet_test_gen_indices, on=['field_id'], how='inner')

planet_5days_test_df.to_csv(f"{df_path}/all_planet_5days_test_df.csv", index=False)

planet_5days_train_df.head()

Unnamed: 0,field_id,B01_time_1,B02_time_1,B03_time_1,NIR_time_1,B01_time_2,B02_time_2,B03_time_2,NIR_time_2,B01_time_3,B02_time_3,B03_time_3,NIR_time_3,B01_time_4,B02_time_4,B03_time_4,NIR_time_4,B01_time_5,B02_time_5,B03_time_5,NIR_time_5,B01_time_6,B02_time_6,B03_time_6,NIR_time_6,B01_time_7,B02_time_7,B03_time_7,NIR_time_7,B01_time_8,B02_time_8,B03_time_8,NIR_time_8,B01_time_9,B02_time_9,B03_time_9,NIR_time_9,B01_time_10,B02_time_10,B03_time_10,...,ARVI_time_39,SIPI_time_39,EXG_time_39,ACI_time_39,ARVI_time_40,SIPI_time_40,EXG_time_40,ACI_time_40,ARVI_time_41,SIPI_time_41,EXG_time_41,ACI_time_41,ARVI_time_42,SIPI_time_42,EXG_time_42,ACI_time_42,ARVI_time_43,SIPI_time_43,EXG_time_43,ACI_time_43,ARVI_time_44,SIPI_time_44,EXG_time_44,ACI_time_44,ARVI_time_45,SIPI_time_45,EXG_time_45,ACI_time_45,ARVI_time_46,SIPI_time_46,EXG_time_46,ACI_time_46,ARVI_time_47,SIPI_time_47,EXG_time_47,ACI_time_47,ARVI_time_48,SIPI_time_48,EXG_time_48,ACI_time_48
0,185161,862.096679,1155.96971,1619.037171,2277.43723,845.521191,1128.491675,1594.010752,2242.098675,853.389397,1143.920506,1632.660074,2253.284051,858.848905,1137.3572,1605.614654,2224.463917,837.240417,1093.7958,1577.651497,2223.488369,831.974862,1081.159404,1583.77516,2209.218287,832.222844,1068.319375,1582.469724,2191.870886,841.508682,1073.935398,1591.624066,2225.508176,797.098228,1035.765212,1549.495607,2197.88965,757.173373,1002.283931,1512.871214,...,0.37794,0.695504,102.930365,4052077.0,0.260783,0.631278,2.562234,5074053.0,0.169029,0.577434,-96.771288,6059426.0,0.087105,0.513379,-190.737483,7202400.0,0.034773,0.459717,-269.839196,8368476.0,0.012718,0.443384,-279.266299,8079219.0,-0.005893,0.427041,-278.280357,8149360.0,-0.028556,0.387413,-269.075592,10832250.0,-0.039755,0.368653,-257.752137,11972900.0,-0.044675,0.366708,-269.694088,10649900.0
1,185168,844.699553,1181.26927,1743.922161,2370.354852,831.16139,1156.29033,1709.180007,2333.256676,831.531865,1150.779766,1775.663627,2367.055781,838.609876,1151.027736,1747.176566,2344.351136,841.695114,1126.871094,1702.58479,2360.241019,830.134756,1117.842877,1718.688919,2331.327839,834.677185,1126.096352,1738.084618,2305.109085,840.483586,1132.932416,1750.898899,2342.138094,811.287922,1108.714935,1728.073193,2330.532588,784.387405,1086.076772,1704.812836,...,0.250758,0.623194,-9.293427,4976900.0,0.176736,0.580128,-90.466896,5864098.0,0.112793,0.540747,-176.641087,6675949.0,0.057248,0.499197,-235.096524,7388325.0,0.018707,0.46591,-301.698348,7849502.0,-0.006406,0.439649,-304.854749,8444641.0,-0.030208,0.412632,-308.556263,10369360.0,-0.03904,0.40134,-292.818341,10122280.0,-0.042469,0.400302,-284.089986,8697641.0,-0.043722,0.400239,-298.336407,8370449.0
2,185171,935.663412,1262.72734,1754.122144,2485.870118,917.735999,1240.154937,1736.110354,2455.072402,939.13762,1254.294952,1795.390752,2474.811349,983.501842,1297.130066,1810.802321,2474.044768,979.549005,1261.52045,1797.058769,2501.339536,965.328666,1238.310243,1794.234709,2475.976419,957.201916,1223.655306,1776.381172,2435.950995,974.852985,1235.948231,1775.794768,2433.689941,906.042189,1171.772476,1703.173913,2383.600405,841.898305,1111.725129,1636.968128,...,0.096486,0.506498,-112.137436,7575099.0,0.057668,0.473782,-166.891489,8276080.0,0.028573,0.447565,-212.830877,8837106.0,0.017489,0.438757,-223.708917,8592099.0,0.010535,0.429323,-228.519344,8203913.0,-0.005064,0.41214,-242.123066,8995442.0,-0.023582,0.389215,-245.917833,12047620.0,-0.026717,0.381319,-219.31577,11427010.0,-0.0281,0.379927,-210.299005,9827848.0,-0.031556,0.377177,-228.069455,9224403.0
3,185172,759.366337,988.541726,1322.700141,2029.299859,758.755304,974.828854,1300.17256,1988.326733,731.154173,939.476662,1299.39604,1946.376238,737.998586,957.265912,1290.379066,1938.123055,743.719943,944.316832,1313.212164,1996.315417,711.318246,901.079208,1286.420085,1978.714286,731.613861,929.861386,1324.33239,1987.405941,777.30976,985.25884,1398.256011,2081.826025,765.207921,986.615276,1411.137199,2112.353607,678.369165,896.81471,1289.113154,...,0.216985,0.585733,5.195191,4505237.0,0.150439,0.539721,-60.704385,5179248.0,0.096635,0.500464,-127.410184,5870127.0,0.053264,0.466116,-177.312588,6599208.0,0.033988,0.444293,-190.19802,6795204.0,0.004803,0.414749,-212.507779,7692281.0,-0.011912,0.398645,-226.373409,9309024.0,-0.022305,0.382615,-205.693069,9152456.0,-0.021774,0.383297,-188.685997,7831823.0,-0.019962,0.383403,-186.407355,7479095.0
4,185177,842.148498,1109.850724,1512.494654,2151.967418,833.025579,1083.997429,1483.782887,2104.132325,855.33763,1090.307755,1529.787894,2117.908377,868.947456,1130.646603,1540.746346,2119.683618,833.176343,1066.764853,1495.475233,2111.574739,821.040905,1044.532718,1490.248613,2105.908851,820.375964,1030.374171,1476.571221,2080.821085,843.502571,1065.878468,1521.440452,2132.696068,784.96938,1013.008154,1465.606645,2086.154926,724.181994,954.732203,1398.181621,...,0.137842,0.541121,-52.070273,5767303.0,0.079409,0.496864,-135.899411,6638848.0,0.036888,0.462855,-209.294898,7367641.0,0.013267,0.443044,-233.198843,7678373.0,0.003936,0.429159,-232.568683,7538992.0,-0.019211,0.403314,-258.055014,9211620.0,-0.02978,0.388205,-272.168223,11514440.0,-0.036606,0.374082,-255.92976,11484480.0,-0.040663,0.36819,-245.742861,9940318.0,-0.041749,0.368493,-257.605393,9302888.0


### Compute more Features - Vegetation Indices
#### Sentinel-1 Indices
* ``` Radar Vegetation Index (RVI): (4*VH)/(VV+VH) ```
* ``` Radar Vegetation Index for Sentinel-1 (RVI4): (Sqrt(DOP))*((4*(VH))/(VV+VH))```
* ``` Polar Ration : VV/VH ```

In [None]:
def s1_feature_calc(df ,times):
  for time in times:
    df[f'PolarRation_time_{time}'] = df[f'VV_time_{time}'] /df[f'VH_time_{time}']

    RVI = (4*df[f'VH_time_{time}']) / (df[f'VV_time_{time}'] + df[f'VH_time_{time}'])
    df[f'RVI_time_{time}'] = list(RVI)

    DOP = df[f'VV_time_{time}'] / (df[f'VV_time_{time}'] + df[f'VH_time_{time}'])
    RVI4 = list(np.sqrt(DOP) * RVI)
    df[f'RVI4_time_{time}'] = list(RVI4)

In [None]:
# Sentinel-1 Indices 
s1_feature_calc(s1_train_df ,[i+1 for i in range(41)] )
s1_feature_calc(s1_test_df ,[i+1 for i in range(41)] )

s1_train_df.head()

Unnamed: 0,field_id,VV_time_1,VH_time_1,VV_time_2,VH_time_2,VV_time_3,VH_time_3,VV_time_4,VH_time_4,VV_time_5,VH_time_5,VV_time_6,VH_time_6,VV_time_7,VH_time_7,VV_time_8,VH_time_8,VV_time_9,VH_time_9,VV_time_10,VH_time_10,VV_time_11,VH_time_11,VV_time_12,VH_time_12,VV_time_13,VH_time_13,VV_time_14,VH_time_14,VV_time_15,VH_time_15,VV_time_16,VH_time_16,VV_time_17,VH_time_17,VV_time_18,VH_time_18,VV_time_19,VH_time_19,VV_time_20,...,RVI4_time_28,PolarRation_time_29,RVI_time_29,RVI4_time_29,PolarRation_time_30,RVI_time_30,RVI4_time_30,PolarRation_time_31,RVI_time_31,RVI4_time_31,PolarRation_time_32,RVI_time_32,RVI4_time_32,PolarRation_time_33,RVI_time_33,RVI4_time_33,PolarRation_time_34,RVI_time_34,RVI4_time_34,PolarRation_time_35,RVI_time_35,RVI4_time_35,PolarRation_time_36,RVI_time_36,RVI4_time_36,PolarRation_time_37,RVI_time_37,RVI4_time_37,PolarRation_time_38,RVI_time_38,RVI4_time_38,PolarRation_time_39,RVI_time_39,RVI4_time_39,PolarRation_time_40,RVI_time_40,RVI4_time_40,PolarRation_time_41,RVI_time_41,RVI4_time_41
0,185161,0.074842,0.010611,0.066684,0.010533,0.078444,0.015958,0.06969,0.01003,0.068285,0.009464,0.070652,0.01003,0.06647,0.009087,0.070467,0.009475,0.061471,0.007826,0.068549,0.009482,0.068437,0.009276,0.117246,0.017023,0.113445,0.017014,0.123093,0.019253,0.098813,0.013486,0.118962,0.015479,0.108578,0.017379,0.120173,0.015784,0.097825,0.014947,0.160762,...,0.996887,2.406031,1.174388,0.987047,2.62474,1.103527,0.939048,2.49388,1.144859,0.967243,2.367768,1.18773,0.995901,2.724254,1.074041,0.918597,2.784926,1.056824,0.906527,3.075501,0.981474,0.852602,3.339383,0.92179,0.808632,4.67217,0.705197,0.640023,5.57991,0.607911,0.559814,6.068759,0.56587,0.524319,9.031074,0.398761,0.378363,7.502975,0.470424,0.441896
1,185168,0.058208,0.00848,0.06382,0.008066,0.071563,0.013261,0.061932,0.008661,0.056667,0.007618,0.059185,0.008119,0.051368,0.00684,0.063714,0.007443,0.049175,0.006122,0.059544,0.008043,0.053123,0.006892,0.10903,0.016061,0.081576,0.011714,0.095628,0.016242,0.084365,0.012584,0.091942,0.012725,0.090008,0.014128,0.099085,0.013387,0.080894,0.011605,0.136022,...,0.878828,2.31218,1.207664,1.00902,2.955828,1.011166,0.874065,2.576896,1.118288,0.949181,3.000583,0.999854,0.86592,2.675286,1.088351,0.928557,3.173982,0.958317,0.835673,3.110696,0.973071,0.846478,4.013716,0.797811,0.713828,5.262766,0.638695,0.585488,6.453076,0.536691,0.49939,6.777645,0.514294,0.480095,8.621084,0.415754,0.393555,7.943156,0.447269,0.421522
2,185171,0.093294,0.020286,0.086999,0.014312,0.077604,0.01632,0.089057,0.018727,0.094317,0.017171,0.089668,0.01736,0.08782,0.014624,0.087956,0.015692,0.081294,0.021598,0.094044,0.02107,0.097018,0.024774,0.131431,0.026149,0.097233,0.018627,0.103102,0.020091,0.077948,0.018567,0.090149,0.017057,0.090617,0.022234,0.085422,0.019113,0.080039,0.018343,0.10689,...,0.908984,2.753595,1.065645,0.912723,3.130442,0.968419,0.843079,2.888548,1.028662,0.886581,3.527318,0.883525,0.779867,3.299825,0.93027,0.814947,4.407413,0.739725,0.667832,3.565663,0.876105,0.774238,4.504444,0.726686,0.657371,4.983692,0.668484,0.610073,4.425241,0.737294,0.665886,5.006632,0.665931,0.607976,4.980057,0.66889,0.610407,5.430848,0.622002,0.571599
3,185172,0.078183,0.016866,0.136299,0.017555,0.116276,0.01599,0.093893,0.018507,0.077303,0.012701,0.089238,0.014913,0.074145,0.01127,0.116105,0.013697,0.125195,0.016821,0.177362,0.01676,0.128056,0.014818,0.221447,0.024417,0.132113,0.014693,0.166949,0.016989,0.097418,0.01426,0.139521,0.009273,0.106745,0.014448,0.162007,0.012016,0.119224,0.016516,0.185825,...,0.607913,3.58691,0.872047,0.771152,6.233774,0.552962,0.51332,3.913672,0.814055,0.726512,4.278443,0.757799,0.682251,3.031775,0.992119,0.860328,4.103644,0.783754,0.702788,3.363612,0.916672,0.804811,5.712944,0.595864,0.549693,4.009215,0.798528,0.71439,5.733382,0.594055,0.54817,6.001708,0.571289,0.528922,9.269662,0.389497,0.370048,4.676644,0.704642,0.639572
4,185177,0.095041,0.013787,0.120572,0.014234,0.094661,0.021405,0.099454,0.012053,0.088412,0.011937,0.097854,0.01219,0.086597,0.010578,0.11575,0.01169,0.105531,0.015082,0.109985,0.015657,0.119812,0.01636,0.217655,0.024805,0.118994,0.014421,0.141692,0.016635,0.102573,0.013264,0.13719,0.015106,0.097531,0.016761,0.140451,0.018555,0.106409,0.019472,0.121154,...,0.89749,2.462955,1.155083,0.974132,3.195185,0.953474,0.832111,3.323261,0.925228,0.811195,3.591874,0.871104,0.770434,3.223956,0.94698,0.827324,4.217285,0.766682,0.689302,4.348623,0.747856,0.674331,6.275303,0.549805,0.510623,6.174175,0.557555,0.517239,6.39734,0.540735,0.502859,7.793398,0.454887,0.428241,9.773122,0.371294,0.353642,8.728779,0.411151,0.389448


In [None]:
# save data frames with new computimg features
s1_train_df.to_csv(f"{df_path}/all_s1_train_df.csv", index=False)
s1_test_df.to_csv(f"{df_path}/all_s1_test_df.csv", index=False)

## Modeling

### Temporal models
TempCNN models trained by using only planet 5days time series data.

In [None]:
# Read DFs 

## Planet-5days
planet_5days_train_df = pd.read_csv(f"{df_path}/all_planet_5days_train_df.csv")
planet_5days_test_df = pd.read_csv(f"{df_path}/all_planet_5days_test_df.csv")

#### Planet-5days

In [None]:
# Rearrange train dataset in proper format for TempCNN model
features = ['B01', 'B02', 'B03', 'NIR','NDVI', 'NDWI', 'MNDVI',  'GNDVI', 'EVI2',
           'EVI','CVI','BI','SI', 'NDGI', 'DYI', 'NDPI', 'PEBI', 'NDYI', 'YEBI', 'ARVI', 'SIPI', 'EXG', 'ACI']

train_inputs = np.zeros((len(planet_5days_train_df),48, len(features)))
labels = np.zeros(len(planet_5days_train_df))

for idx, field_id in enumerate(planet_5days_train_df['field_id']):
  label = planet_5days_train_df.loc[idx,'label']
  data = []
  for t in range(48):
    time_step=[]
    for i in range(len(features)):
      col = f'{features[i]}_time_{t+1}'
      f = planet_5days_train_df.loc[idx,col]
      time_step.append(f)
    data.append(np.array(time_step))

  train_inputs[idx] = np.vstack(data)
  labels[idx] = label

In [None]:
# Rearrange test dataset in proper format for TempCNN model
test_inputs = np.zeros((len(planet_5days_test_df),48, len(features)))
field_ids = np.zeros(len(planet_5days_test_df))

for idx, field_id in enumerate(planet_5days_test_df['field_id']):
  data = []
  for t in range(48):
    time_step=[]
    for i in range(len(features)):
      col = f'{features[i]}_time_{t+1}'
      f = planet_5days_test_df.loc[idx,col]
      time_step.append(f)
    data.append(np.array(time_step))

  test_inputs[idx] = np.vstack(data)
  field_ids[idx] = field_id

In [None]:
# Reshape train/test dataset and create tesor dataset for training

train_inputs = train_inputs.reshape(-1,48, len(features))
labels = labels.reshape(-1,1)
labels -=1
train_data = TensorDataset(torch.from_numpy(train_inputs), torch.from_numpy(labels))

test_inputs = test_inputs.reshape(-1,48, len(features))
field_ids = field_ids.reshape(-1,1)

In [None]:
# Start training TempCNN models
seed_setter(2021)

path2models = "./temporal_models"
if not os.path.isdir(path2models):
  os.mkdir(path2models)


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Stratified 10-folds
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=2022)
indices= range(len(labels))

# Start training
print(f'********** TempCNN Model ***********')
models = []
i=0
for train_index, val_index in skf.split(indices, labels):
  print(f'########### Fold {i+1} / {skf.n_splits} ')
  # Create train/val dataset
  train_ds=Subset(train_data,train_index)
  val_ds=Subset(train_data,val_index)

  # Define data loaders
  dataloaders = {'train': torch.utils.data.DataLoader(train_ds, batch_size=16, shuffle=True, num_workers=16),
                'val': torch.utils.data.DataLoader(val_ds, batch_size=16, shuffle=False, num_workers=16)}

  ## Model params
  sequencelength = train_ds[0][0].shape[0]
  input_dim = train_ds[0][0].shape[1]
  num_classes = len(np.unique(labels))
  

  model = bzh.models.TempCNN(input_dim=input_dim, num_classes=num_classes, sequencelength=sequencelength)
  opt = optim.SGD(model.parameters(), 3e-4, momentum=0.9)


  model = model.to(device)
  criterion = nn.NLLLoss(reduction='sum')
  lr_scheduler = ReduceLROnPlateau(opt, mode='min',factor=0.5, patience=5,verbose=1)

  # Train/val
  params_train={"num_epochs": 25, "optimizer": opt, "loss_func": criterion,
                "train_dl": dataloaders['train'], "val_dl": dataloaders['val'],
                "sanity_check": False, "fold":i+1, "lr_scheduler": lr_scheduler,"device": device,
                "path2weights": path2models}

  model, best_loss= train_val(model, params_train)
  models.append(model)
  i+=1


********** TempCNN Model ***********
########### Fold 1 / 10 
Epoch 0/24, current lr=0.0003
train loss: 0.991316
val loss: 0.903678
----------
Epoch 1/24, current lr=0.0003
train loss: 0.884700
val loss: 0.935378
----------
Epoch 2/24, current lr=0.0003
train loss: 0.837497
val loss: 0.919766
----------
Epoch 3/24, current lr=0.0003
train loss: 0.797355
val loss: 0.929512
----------
Epoch 4/24, current lr=0.0003
train loss: 0.789818
val loss: 0.953890
----------
Epoch 5/24, current lr=0.0003
train loss: 0.761819
val loss: 0.890999
----------
Epoch 6/24, current lr=0.0003
train loss: 0.716971
val loss: 0.945541
----------
Epoch 7/24, current lr=0.0003
train loss: 0.700057
val loss: 0.944131
----------
Epoch 8/24, current lr=0.0003
train loss: 0.677742
val loss: 0.832796
----------
Epoch 9/24, current lr=0.0003
train loss: 0.667058
val loss: 0.840607
----------
Epoch 10/24, current lr=0.0003
train loss: 0.649836
val loss: 0.893126
----------
Epoch 11/24, current lr=0.0003
train loss: 0.6

In [None]:
# Creae tensor test dataset
test_data = TensorDataset(torch.from_numpy(test_inputs))
test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False)

# Predictions
m_preds = []
for model in models:
  preds =[]
  for x in test_loader:
    x = x[0]
    x= x.float()
    output = model(x.to(device))
    predicted_probabilities = torch.exp(output)
    predicted_probabilities = predicted_probabilities.detach().cpu().numpy()
    preds.append(predicted_probabilities[0])
  m_preds.append(preds)


# Ensemble the predictions of 10 models
preds = np.mean(m_preds, axis=0)

# Put the predictions in DataFrame
planet_time_preds = pd.DataFrame(preds)
planet_time_preds['field_id'] = field_ids

## Boosting Models

In [None]:
# Read DFs 
## Sentinel-1
s1_train_df = pd.read_csv(f"{df_path}/all_s1_train_df.csv")
s1_test_df = pd.read_csv(f"{df_path}/all_s1_test_df.csv")

## Sentinel-2
s2_train_df = pd.read_csv(f"{df_path}/all_s2_train_df.csv")
s2_test_df = pd.read_csv(f"{df_path}/all_s2_test_df.csv")

## Planet-5days
planet_5days_train_df = pd.read_csv(f"{df_path}/all_planet_5days_train_df.csv")
planet_5days_test_df = pd.read_csv(f"{df_path}/all_planet_5days_test_df.csv")


## Merge the data 
# Merge S2&S1 train data
train_df = pd.merge(s1_train_df, s2_train_df, on=['field_id','label'], how='inner')
train_df = pd.merge(train_df, planet_5days_train_df, on=['field_id','label'], how='inner')

# Merge S2&S1 test data
test_df = pd.merge(s1_test_df, s2_test_df, on=['field_id'], how='inner')
test_df = pd.merge(test_df, planet_5days_test_df, on=['field_id'], how='inner')

### Time series analysis
The slope (S) of the different vegitation indices curve describes the growing rate of the crops, difference (D) between  and the base value (average of lowest three values).

The used indices are NDVI, NDWI, and MNDVI.

This method insbired by:
You, Xingzhi, et al. "Remote sensing based detection of crop phenology for agricultural zones in China using a new threshold method." Remote Sensing 5.7 (2013): 3190-3211.

In [None]:
def diff_index(df , df1, a, index, times, data_type = 'train'):
  diff_index_df = pd.DataFrame()
  
  for time in times:
    diff_index_df[f'D_{index}_time_{time}'] = df[f'{index}_time_{time}'] - a
    if time == 48:
      break
    diff_index_df[f'S_{index}_time_{time}'] = (df[f'{index}_time_{time+1}'] - df[f'{index}_time_{time}']) / df[f'{index}_time_{time}']
                                        
  diff_index_df['field_id'] = list(df1['field_id'])
  if data_type == 'train':
    diff_index_df['label'] = list(df1['label'])
  
  return diff_index_df

In [None]:
indices = ['NDVI','NDWI', 'MNDVI']
for index in indices:
  cols = []
  for i in range(48):
    col = f'{index}_time_{i+1}'
    cols.append(col)
    cols.append('label')
  df = planet_5days_train_df[cols]
  arr = df.values.argsort(1)[:,:3]
  b = df.values[np.arange(len(arr))[:,None], arr]
  a = np.mean(b, axis=1)
  
  diff_df = diff_index(df , planet_5days_train_df, a, index, [i+1 for i in range(48)], data_type = 'train')
  train_df = pd.merge(train_df, diff_df, on=['field_id','label'], how='inner')

In [None]:
indices = ['NDVI','NDWI', 'MNDVI']
for index in indices:
  cols = []
  for i in range(48):
    col = f'{index}_time_{i+1}'
    cols.append(col)
    
  df = planet_5days_test_df[cols]
  arr = df.values.argsort(1)[:,:3]
  b = df.values[np.arange(len(arr))[:,None], arr]
  a = np.mean(b, axis=1)
  
  diff_df = diff_index(df , planet_5days_test_df, a, index, [i+1 for i in range(48)], data_type = 'test')
  test_df = pd.merge(test_df, diff_df, on=['field_id'], how='inner')

In [None]:
train_df.head()

Unnamed: 0,field_id,VV_time_1,VH_time_1,VV_time_2,VH_time_2,VV_time_3,VH_time_3,VV_time_4,VH_time_4,VV_time_5,VH_time_5,VV_time_6,VH_time_6,VV_time_7,VH_time_7,VV_time_8,VH_time_8,VV_time_9,VH_time_9,VV_time_10,VH_time_10,VV_time_11,VH_time_11,VV_time_12,VH_time_12,VV_time_13,VH_time_13,VV_time_14,VH_time_14,VV_time_15,VH_time_15,VV_time_16,VH_time_16,VV_time_17,VH_time_17,VV_time_18,VH_time_18,VV_time_19,VH_time_19,VV_time_20,...,S_MNDVI_time_28,D_MNDVI_time_29,S_MNDVI_time_29,D_MNDVI_time_30,S_MNDVI_time_30,D_MNDVI_time_31,S_MNDVI_time_31,D_MNDVI_time_32,S_MNDVI_time_32,D_MNDVI_time_33,S_MNDVI_time_33,D_MNDVI_time_34,S_MNDVI_time_34,D_MNDVI_time_35,S_MNDVI_time_35,D_MNDVI_time_36,S_MNDVI_time_36,D_MNDVI_time_37,S_MNDVI_time_37,D_MNDVI_time_38,S_MNDVI_time_38,D_MNDVI_time_39,S_MNDVI_time_39,D_MNDVI_time_40,S_MNDVI_time_40,D_MNDVI_time_41,S_MNDVI_time_41,D_MNDVI_time_42,S_MNDVI_time_42,D_MNDVI_time_43,S_MNDVI_time_43,D_MNDVI_time_44,S_MNDVI_time_44,D_MNDVI_time_45,S_MNDVI_time_45,D_MNDVI_time_46,S_MNDVI_time_46,D_MNDVI_time_47,S_MNDVI_time_47,D_MNDVI_time_48
0,185161,0.074842,0.010611,0.066684,0.010533,0.078444,0.015958,0.06969,0.01003,0.068285,0.009464,0.070652,0.01003,0.06647,0.009087,0.070467,0.009475,0.061471,0.007826,0.068549,0.009482,0.068437,0.009276,0.117246,0.017023,0.113445,0.017014,0.123093,0.019253,0.098813,0.013486,0.118962,0.015479,0.108578,0.017379,0.120173,0.015784,0.097825,0.014947,0.160762,...,0.008449,0.692364,0.009738,0.701678,0.00236,0.703958,0.006357,0.710112,0.010382,0.720227,-0.001347,0.718901,-0.026144,0.6932,-0.059842,0.635912,-0.067963,0.574742,-0.054863,0.528719,-0.059264,0.481731,-0.137916,0.378865,-0.147367,0.284108,-0.168741,0.191597,-0.150359,0.123074,-0.087108,0.089345,-0.084082,0.059624,-0.127369,0.018388,-0.081562,-0.004655,-0.034986,-0.013733
1,185168,0.058208,0.00848,0.06382,0.008066,0.071563,0.013261,0.061932,0.008661,0.056667,0.007618,0.059185,0.008119,0.051368,0.00684,0.063714,0.007443,0.049175,0.006122,0.059544,0.008043,0.053123,0.006892,0.10903,0.016061,0.081576,0.011714,0.095628,0.016242,0.084365,0.012584,0.091942,0.012725,0.090008,0.014128,0.099085,0.013387,0.080894,0.011605,0.136022,...,0.028357,0.680085,0.010056,0.689339,0.014477,0.702796,0.004469,0.707011,-0.006039,0.701291,-0.002444,0.69899,-0.027265,0.673384,-0.086103,0.594722,-0.094495,0.515828,-0.087635,0.449574,-0.078935,0.395127,-0.121739,0.317784,-0.132774,0.2437,-0.143435,0.174293,-0.128351,0.121093,-0.105121,0.083115,-0.122775,0.043421,-0.056372,0.027433,-0.02123,0.021752,-0.00765,0.019748
2,185171,0.093294,0.020286,0.086999,0.014312,0.077604,0.01632,0.089057,0.018727,0.094317,0.017171,0.089668,0.01736,0.08782,0.014624,0.087956,0.015692,0.081294,0.021598,0.094044,0.02107,0.097018,0.024774,0.131431,0.026149,0.097233,0.018627,0.103102,0.020091,0.077948,0.018567,0.090149,0.017057,0.090617,0.022234,0.085422,0.019113,0.080039,0.018343,0.10689,...,0.017655,0.587622,0.003548,0.590703,-0.009146,0.582734,-0.052379,0.537514,-0.030265,0.512754,-0.022777,0.494684,-0.018721,0.48017,-0.107354,0.398499,-0.123456,0.314661,-0.095776,0.25765,-0.121159,0.192437,-0.109257,0.140756,-0.100773,0.098295,-0.045686,0.080985,-0.030122,0.070094,-0.074047,0.044127,-0.102842,0.010732,-0.022517,0.004172,-0.009268,0.001533,-0.023114,-0.004988
3,185172,0.078183,0.016866,0.136299,0.017555,0.116276,0.01599,0.093893,0.018507,0.077303,0.012701,0.089238,0.014913,0.074145,0.01127,0.116105,0.013697,0.125195,0.016821,0.177362,0.01676,0.128056,0.014818,0.221447,0.024417,0.132113,0.014693,0.166949,0.016989,0.097418,0.01426,0.139521,0.009273,0.106745,0.014448,0.162007,0.012016,0.119224,0.016516,0.185825,...,0.005582,0.655154,0.002615,0.657638,-0.007637,0.650367,-0.030122,0.621907,-0.029007,0.595326,-0.01993,0.577593,-0.006249,0.572143,-0.104861,0.48127,-0.11793,0.389788,-0.0233,0.373845,-0.079082,0.320994,-0.118322,0.248173,-0.123221,0.181309,-0.125479,0.12161,-0.066473,0.093953,-0.120668,0.047084,-0.085329,0.01794,-0.063045,-0.001755,0.003496,-0.000732,0.010959,0.002487
4,185177,0.095041,0.013787,0.120572,0.014234,0.094661,0.021405,0.099454,0.012053,0.088412,0.011937,0.097854,0.01219,0.086597,0.010578,0.11575,0.01169,0.105531,0.015082,0.109985,0.015657,0.119812,0.01636,0.217655,0.024805,0.118994,0.014421,0.141692,0.016635,0.102573,0.013264,0.13719,0.015106,0.097531,0.016761,0.140451,0.018555,0.106409,0.019472,0.121154,...,-0.009154,0.702037,9.8e-05,0.702131,-0.013101,0.689527,-0.032395,0.658768,-0.026765,0.634179,-0.030525,0.606885,-0.032946,0.578325,-0.098237,0.495974,-0.126972,0.39999,-0.088138,0.341822,-0.132833,0.261885,-0.138843,0.189429,-0.131951,0.130131,-0.091558,0.094414,-0.041243,0.079798,-0.115963,0.040398,-0.065957,0.020587,-0.051235,0.006213,-0.031577,-0.002192,-0.00709,-0.00402


In [None]:
test_df.head()

Unnamed: 0,field_id,VV_time_1,VH_time_1,VV_time_2,VH_time_2,VV_time_3,VH_time_3,VV_time_4,VH_time_4,VV_time_5,VH_time_5,VV_time_6,VH_time_6,VV_time_7,VH_time_7,VV_time_8,VH_time_8,VV_time_9,VH_time_9,VV_time_10,VH_time_10,VV_time_11,VH_time_11,VV_time_12,VH_time_12,VV_time_13,VH_time_13,VV_time_14,VH_time_14,VV_time_15,VH_time_15,VV_time_16,VH_time_16,VV_time_17,VH_time_17,VV_time_18,VH_time_18,VV_time_19,VH_time_19,VV_time_20,...,S_MNDVI_time_28,D_MNDVI_time_29,S_MNDVI_time_29,D_MNDVI_time_30,S_MNDVI_time_30,D_MNDVI_time_31,S_MNDVI_time_31,D_MNDVI_time_32,S_MNDVI_time_32,D_MNDVI_time_33,S_MNDVI_time_33,D_MNDVI_time_34,S_MNDVI_time_34,D_MNDVI_time_35,S_MNDVI_time_35,D_MNDVI_time_36,S_MNDVI_time_36,D_MNDVI_time_37,S_MNDVI_time_37,D_MNDVI_time_38,S_MNDVI_time_38,D_MNDVI_time_39,S_MNDVI_time_39,D_MNDVI_time_40,S_MNDVI_time_40,D_MNDVI_time_41,S_MNDVI_time_41,D_MNDVI_time_42,S_MNDVI_time_42,D_MNDVI_time_43,S_MNDVI_time_43,D_MNDVI_time_44,S_MNDVI_time_44,D_MNDVI_time_45,S_MNDVI_time_45,D_MNDVI_time_46,S_MNDVI_time_46,D_MNDVI_time_47,S_MNDVI_time_47,D_MNDVI_time_48
0,185471,0.039544,0.004696,0.044856,0.005784,0.037505,0.004427,0.045581,0.006021,0.040425,0.004435,0.043385,0.00566,0.069948,0.010082,0.086155,0.010922,0.068744,0.009806,0.089116,0.011182,0.074773,0.010306,0.10093,0.012113,0.080593,0.009908,0.114502,0.011829,0.088006,0.012553,0.127183,0.013952,0.080917,0.013664,0.170608,0.019466,0.099427,0.017337,0.143034,...,0.015411,0.662745,0.019686,0.681097,0.004295,0.685179,-0.003426,0.681909,-0.005988,0.676212,-0.013341,0.663596,-0.02985,0.635743,-0.038837,0.600586,-0.084199,0.527326,-0.092453,0.453658,-0.077303,0.397757,-0.136811,0.30647,-0.184151,0.200406,-0.279631,0.069008,-0.081656,0.041368,-0.037995,0.029557,0.003035,0.030464,-0.024713,0.023051,-0.046741,0.009378,-0.036702,-0.000857
1,185489,0.062731,0.007674,0.064341,0.008496,0.057415,0.00671,0.066643,0.008695,0.063157,0.007015,0.065467,0.00797,0.059709,0.006897,0.061305,0.007818,0.056808,0.007331,0.068774,0.008241,0.058074,0.007353,0.083859,0.009116,0.059444,0.006573,0.079668,0.008934,0.055882,0.00683,0.078289,0.008877,0.061369,0.007539,0.086237,0.009703,0.066758,0.006943,0.094748,...,0.109126,0.157436,-0.013999,0.152052,0.078013,0.181636,0.051818,0.20282,0.002921,0.204077,0.032113,0.217925,-0.028239,0.205356,-0.037383,0.189187,-0.036362,0.174047,-0.024328,0.164286,-0.019579,0.156621,-0.079258,0.126202,-0.07107,0.101087,-0.045981,0.085993,-0.051643,0.06982,-0.066471,0.050078,-0.015939,0.045659,0.047662,0.058663,-0.042849,0.046415,-0.022611,0.040229
2,185490,0.042945,0.010104,0.041931,0.010908,0.043514,0.009469,0.037158,0.010628,0.043072,0.009852,0.046261,0.011234,0.058145,0.011747,0.067488,0.013283,0.056607,0.01259,0.065229,0.013606,0.068714,0.01199,0.123188,0.01593,0.079096,0.012454,0.115545,0.015438,0.078704,0.013438,0.128658,0.014645,0.066144,0.015605,0.156994,0.019993,0.097925,0.018978,0.166924,...,0.019364,0.44504,0.010791,0.454087,0.018266,0.469567,-0.021609,0.45092,-0.011198,0.441466,0.004401,0.44514,-0.01905,0.429166,-0.055064,0.383874,-0.105043,0.30223,-0.063917,0.25777,-0.055379,0.221711,-0.08461,0.16967,-0.078102,0.125696,-0.105841,0.070758,-0.105908,0.021604,-0.061168,-0.003779,0.001657,-0.003133,0.026311,0.007134,0.021662,0.01581,0.019632,0.023843
3,185495,0.042354,0.007025,0.045777,0.007443,0.038662,0.00626,0.048068,0.008355,0.041405,0.00704,0.046539,0.008169,0.039208,0.006272,0.048297,0.007809,0.040649,0.006495,0.047747,0.00799,0.042281,0.006515,0.055527,0.008111,0.044366,0.006503,0.052881,0.007888,0.043323,0.005748,0.055343,0.007959,0.043083,0.006804,0.061324,0.007776,0.04913,0.006718,0.079671,...,0.092397,0.243368,0.023965,0.256744,0.064614,0.293671,0.037492,0.316483,0.006271,0.320441,0.016937,0.3312,-0.009495,0.325066,-0.024938,0.30911,-0.05508,0.274747,-0.060294,0.239203,-0.030824,0.222127,-0.073764,0.182524,-0.078849,0.143313,-0.092779,0.100813,-0.077263,0.068704,-0.080472,0.037845,-0.020503,0.030616,0.005041,0.032357,-0.013935,0.02752,-0.017805,0.021425
4,185502,0.024332,0.003528,0.025294,0.004925,0.021795,0.002946,0.025566,0.004813,0.022948,0.003403,0.028382,0.005023,0.021926,0.003185,0.029774,0.005077,0.023124,0.003304,0.0269,0.004301,0.02293,0.003385,0.034955,0.004556,0.027922,0.003386,0.036438,0.005199,0.028081,0.003532,0.040023,0.005115,0.026942,0.004194,0.042822,0.004639,0.033033,0.004272,0.062462,...,0.071117,0.354215,0.026194,0.372829,0.02541,0.391359,0.018069,0.404871,-0.001437,0.403777,-0.017721,0.390305,-0.016881,0.3777,-0.036025,0.351254,-0.080696,0.294149,-0.072105,0.247241,-0.040479,0.222806,-0.057755,0.189354,-0.068388,0.152031,-0.086188,0.108211,-0.064833,0.078088,-0.072244,0.046699,-0.023814,0.0371,-0.006954,0.034364,-0.092811,-0.001904,-0.029776,-0.012459


### Catboost Classifier

In [None]:
seed_setter(2021)
seeds = np.random.randint(low=1, high=3000, size=10)
seed = 2021 # seed

skf = StratifiedKFold(n_splits=10,shuffle=True, random_state=seed) # for cross validation
sklearnscores = []
catboostpreds= []

X = train_df.drop(columns=['field_id','label'])
y = train_df['label'].astype(int)


X_test = test_df.drop(columns=['field_id']) 
test_fields = test_df['field_id']

#creating a for loop for the stratified k fold
cat_val_outputs = np.empty((train_df.shape[0],5), dtype = np.float32)
i = 0
for train, val in skf.split(X, y):
    print('########### Fold number {} '.format(i+1))

    # spliting the data
    x_train, x_val, y_train, y_val = X.iloc[train], X.iloc[val], y.iloc[train], y.iloc[val]
    
    estimator = CatBoostClassifier(iterations=30000,  has_time=True ,bootstrap_type='No',random_strength=0,
                                   learning_rate=0.08,objective='MultiClass',use_best_model=True,
                                   reg_lambda=10,random_seed=seed, task_type='GPU', loss_function='MultiClass')

    # fitting on train data
    estimator.fit( x_train, y_train, eval_set = (x_val,y_val),verbose=500 ,early_stopping_rounds=300)
    
    # Check the loss
    score = log_loss(y_val, estimator.predict_proba(x_val)) # checking the cross_entropy loss
    cat_val_outputs[val] = estimator.predict_proba(x_val)
    print('Logloss score: {}'.format(score))
 
    # Making prediction probabities
    catboostpred = estimator.predict_proba(X_test) # making prediction probabities
    sklearnscores.append(score)
    catboostpreds.append(catboostpred)
    i += 1
print('mean logloss scores from sklearn: {} '.format(np.mean(sklearnscores)))

########### Fold number 1 
0:	learn: 1.4860805	test: 1.4909466	best: 1.4909466 (0)	total: 73ms	remaining: 36m 31s
500:	learn: 0.2670753	test: 0.5550556	best: 0.5547361 (499)	total: 23.9s	remaining: 23m 25s
1000:	learn: 0.1622001	test: 0.5329300	best: 0.5329300 (1000)	total: 46.9s	remaining: 22m 39s
1500:	learn: 0.1050229	test: 0.5283115	best: 0.5263719 (1394)	total: 1m 10s	remaining: 22m 13s
bestTest = 0.5258125765
bestIteration = 1622
Shrink model to first 1623 iterations.
Logloss score: 0.5258125818721768
########### Fold number 2 
0:	learn: 1.4908068	test: 1.4925419	best: 1.4925419 (0)	total: 64.9ms	remaining: 32m 27s
500:	learn: 0.2720374	test: 0.5170232	best: 0.5169377 (499)	total: 23.6s	remaining: 23m 11s
1000:	learn: 0.1684521	test: 0.4964511	best: 0.4963258 (999)	total: 46.8s	remaining: 22m 36s
1500:	learn: 0.1076310	test: 0.4872697	best: 0.4866073 (1435)	total: 1m 10s	remaining: 22m 13s
bestTest = 0.4866072689
bestIteration = 1435
Shrink model to first 1436 iterations.
Logloss

### Feature Selection

In [None]:
X = train_df.drop(columns=['field_id','label'])
y = train_df['label'].astype(int)

seed_setter(2021)

#EXCEPTION: multi-class currently only supports "mlogloss" so much be passed in as eval_metric
br = BoostARoota(metric='logloss',max_rounds =1)

#Fit the model for the subset of variables
br.fit(X, y)


# Select just importante features
X=br.transform(X)
test_fields = test_df['field_id']
test_df = br.transform(test_df)
x_test = test_df

Round:  1  iteration:  1
Round:  1  iteration:  2
Round:  1  iteration:  3
Round:  1  iteration:  4
Round:  1  iteration:  5
Round:  1  iteration:  6
Round:  1  iteration:  7
Round:  1  iteration:  8
Round:  1  iteration:  9
Round:  1  iteration:  10
BoostARoota ran successfully! Algorithm went through  1  rounds.


### XGBClassifier

In [None]:
seed_setter(2021)
seed = 944 # seed

skf = StratifiedKFold(n_splits=10,shuffle=True, random_state=seed) # for cross validation
sklearnscores = []
xgbmpreds = []

#creating a for loop for the stratified k fold
xgboost_val_outputs = np.empty((train_df.shape[0],5), dtype = np.float32)
i = 0
for train, val in skf.split(X, y):
    print('########### Fold number {} '.format(i+1))
    # spliting the data
    x_train, x_val, y_train, y_val = X.iloc[train], X.iloc[val], y.iloc[train], y.iloc[val]

    model = XGBClassifier(colsample_bytree=0.20, colsample_bylevel=0.05, num_class=5,
                          learning_rate=0.03,n_estimators=2000,tree_method='gpu_hist', gpu_id=0,
                          objective="multi:softmax", reg_alpha=2, reg_lambda=1.5,
                          seed=seed, silent=True, subsample=0.65)
    
    # fitting on train data

    model.fit( x_train, y_train,verbose=100)
  
    
    # Check the loss
    score = log_loss(y_val, model.predict_proba(x_val))
    xgboost_val_outputs[val] = model.predict_proba(x_val)
    print('Logloss score: {}'.format(score))
 
    # Making prediction probabities
    xgbmpred = model.predict_proba(x_test) 
    sklearnscores.append(score)
    xgbmpreds.append(xgbmpred)
    i += 1
print('mean logloss scores from sklearn: {} '.format(np.mean(sklearnscores)))

########### Fold number 1 
Logloss score: 0.4955061009626398
########### Fold number 2 
Logloss score: 0.5046752412682848
########### Fold number 3 
Logloss score: 0.6108567223483024
########### Fold number 4 
Logloss score: 0.5143088860916961
########### Fold number 5 
Logloss score: 0.48302074351112345
########### Fold number 6 
Logloss score: 0.5805514909380328
########### Fold number 7 
Logloss score: 0.5402481611871374
########### Fold number 8 
Logloss score: 0.5281108858084856
########### Fold number 9 
Logloss score: 0.415328962034097
########### Fold number 10 
Logloss score: 0.5432025040490048
mean logloss scores from sklearn: 0.5215809698198804 


### LightGBM Classifier

In [None]:
seed_setter(2021)
seed = 379 # seed

skf = StratifiedKFold(n_splits=10,shuffle=True, random_state=seed) # for cross validation
sklearnscores = []
lgbmpreds = []

#creating a for loop for the stratified k fold
lgbm_val_outputs = np.empty((train_df.shape[0],5), dtype = np.float32)

i = 0
for train, val in skf.split(X, y):
    print('########### Fold number {} '.format(i+1))
    # spliting the data
    x_train, x_val, y_train, y_val = X.iloc[train], X.iloc[val], y.iloc[train], y.iloc[val]

    model = LGBMClassifier(boosting_type='gbdt', num_class =5,learning_rate=0.05, n_estimators=2000,
                           subsample_for_bin=300000,deterministic=True, objective='multiclass',class_weight='balanced',
                           subsample=0.65,subsample_freq=20, colsample_bytree=0.15, reg_alpha=3,
                           random_state=seed,n_jobs=- 1)

    # fitting on train data
    model.fit( x_train, y_train, eval_set = (x_val,y_val),verbose=100 ,early_stopping_rounds=200)
    
    
    # Check the loss
    score = log_loss(y_val, model.predict_proba(x_val))
    lgbm_val_outputs[val] = model.predict_proba(x_val)
    print('Logloss score: {}'.format(score))
 
    # Making prediction probabities
    lgbmpred = model.predict_proba(x_test)
    sklearnscores.append(score)
    lgbmpreds.append(lgbmpred)
    i += 1
print('mean log scores from sklearn: {} '.format(np.mean(sklearnscores)))

########### Fold number 1 
Training until validation scores don't improve for 200 rounds.
[100]	valid_0's multi_logloss: 0.588482
[200]	valid_0's multi_logloss: 0.520628
[300]	valid_0's multi_logloss: 0.508524
[400]	valid_0's multi_logloss: 0.502297
[500]	valid_0's multi_logloss: 0.501468
[600]	valid_0's multi_logloss: 0.500962
[700]	valid_0's multi_logloss: 0.500866
[800]	valid_0's multi_logloss: 0.499771
[900]	valid_0's multi_logloss: 0.49983
Early stopping, best iteration is:
[782]	valid_0's multi_logloss: 0.499435
Logloss score: 0.4994349175192044
########### Fold number 2 
Training until validation scores don't improve for 200 rounds.
[100]	valid_0's multi_logloss: 0.566512
[200]	valid_0's multi_logloss: 0.504784
[300]	valid_0's multi_logloss: 0.496097
[400]	valid_0's multi_logloss: 0.494882
[500]	valid_0's multi_logloss: 0.494969
Early stopping, best iteration is:
[314]	valid_0's multi_logloss: 0.493904
Logloss score: 0.49390427059538083
########### Fold number 3 
Training until 

## Predictions

In [None]:
# Predictions of temporal model ( TempCNN ) from planet-5days
planet_pred = np.array(planet_time_preds[[0, 1,	2, 3, 4]])

In [None]:
# Ensemble predictions of boosting models sing geometric mean
cbpreds_mean = gmean(catboostpreds, axis=0)
lgbm_mean = gmean(lgbmpreds, axis=0)
xgbm_mean = gmean(xgbmpreds, axis=0)

# Weighted sum of boosting trees models
predictions = 0.80*(cbpreds_mean*0.20 + lgbm_mean*0.80) + xgbm_mean*0.20

# Weighted sum of boosting models & temporal model "TempCNN" - 
predictions = 0.70*predictions + 0.30*planet_pred

predictions = pd.DataFrame(predictions)
predictions['fid'] = list(test_fields)

In [None]:
# In this part we format the DataFrame to have column names and order similar to the sample submission file. 
predictions = predictions.rename(columns={
    'fid':'fid',
    0:1,
    1:2, 
    2:3,
    3:4,
    4:5})
crop_id = list(predictions.drop(columns=['fid']).idxmax(axis = 1))
predictions['crop_id'] = crop_id

predictions = predictions.rename(columns={
    1:'Wheat',
    2:'Barley', 
    3:'Canola',
    4:'Lucerne/Medics',
    5:'Small grain grazing'})

crop_name = list(predictions.drop(columns=['fid','crop_id']).idxmax(axis = 1))
predictions['crop_name'] = crop_name

In [None]:
predictions.head()

Unnamed: 0,Wheat,Barley,Canola,Lucerne/Medics,Small grain grazing,fid,crop_id,crop_name
0,0.307055,0.328326,0.022638,0.066507,0.25362,185471,2,Barley
1,0.017097,0.010305,0.007057,0.954203,0.007096,185489,4,Lucerne/Medics
2,0.147395,0.459312,0.100253,0.145523,0.124887,185490,2,Barley
3,0.003344,0.005196,0.006333,0.969266,0.013802,185495,4,Lucerne/Medics
4,0.00428,0.00852,0.01162,0.924614,0.04854,185502,4,Lucerne/Medics


In [None]:
output_list = []
for index, row in predictions.iterrows():
  predicted_probabilities = list(row[['Wheat','Barley','Canola','Lucerne/Medics', 'Small grain grazing']])

  output_list.append({'fid': int(row['fid']),
                    'crop_id': int(row['crop_id']),
                    'crop_name': str(row['crop_name']),
                    'crop_probs':predicted_probabilities})


In [None]:
output_name = './South_Africa_Submssion' 
output_frame = pd.DataFrame.from_dict(output_list)
output_frame.to_json(output_name)
print('Submission was saved to location: {}'.format(output_name))

Submission was saved to location: ./South_Africa_Submssion
