In [1]:
# Load the auto reload extension to automatically reload modules when files on disk are updated
%load_ext autoreload
# it will automatically be reloaded without the need to restart the kernel.
%autoreload 2

In [2]:
import boto3
import geopandas as gpd
import os
from io import BytesIO
import pandas as pd
from folium import GeoJson
import folium
import numpy as np
from tqdm import tqdm
from glob import glob
#import local modules
import sys
sys.path.append("/agrilearn_app/agrilearn/submodules/commons/")
from eolearn.core import EOPatch

#sys.path.append("/agrilearn_app/agrilearn/")
from agrilearn.commons.s3 import s3_utils
from agrilearn.mvp import gpkg_utils
from agrilearn.commons.crop_calendar import (CropCalendar)

## 1. Read Geopackage

In [3]:
#BASE_URL="/agrilearn_app/datasets"
# BASE_URL/algodao_2022_2023_2023_2023/eopatch/input_model"
# "$BASE_URL/arroz_jan_23_24/eopatch/input_model"
# "$BASE_URL/base/eopatch/input_model"
# "$BASE_URL/soja_2022_2023/eopatch/input_model"

In [4]:
GEOPACKAGE_PATH = "/agrilearn_app/datasets/base/geopackage/raw/wheat_train_v3.gpkg" #tá algodao tbm
EOPATCH_PATH = "/agrilearn_app/datasets/soja_2022_2023/eopatch/input_model/"

OUTPUT_ERROR_CHECK = "/agrilearn_app/datasets/erros_eopatch.csv"

label_monitoring_class = 'monitoring_class'
# 'sentinel_eopatch_current' #eopath_location
label_eopatch_path = "eopath_location"

In [5]:
df = gpd.read_file(GEOPACKAGE_PATH)

In [6]:
df.head()

Unnamed: 0,monitoring_class,period,fonte,cultura_2,state,area,micro,start_season,end_season,peak_start,...,planting_end,start_of_cycle,end_of_cycle,length_of_cycle,is_valid,is_valid_POS,is_valid_LOS,set_type,sampled_date,geometry
0,SOYBEAN,2022/2023,Agrosatélite - Grãos Amazonia e Cerrado 2022/2023,,DF,357844.982246,559,2022-10-01,2023-05-01,2022-12-01,...,2023-01-01,2022-10-01T00:00:00,2023-02-28T00:00:00,150,True,True,True,test,mar_2025,"MULTIPOLYGON (((-47.35296 -15.69628, -47.35297..."
1,SOYBEAN,2022/2023,Agrosatélite - Grãos Amazonia e Cerrado 2022/2023,,DF,134693.415467,559,2022-10-01,2023-05-01,2022-12-01,...,2023-01-01,2022-10-01T00:00:00,2023-04-29T00:00:00,210,False,True,False,test,mar_2025,"MULTIPOLYGON (((-47.64978 -15.69533, -47.64978..."
2,SOYBEAN,2022/2023,Agrosatélite - Grãos Amazonia e Cerrado 2022/2023,,DF,129433.058009,559,2022-10-01,2023-05-01,2022-12-01,...,2023-01-01,2022-10-01T00:00:00,2023-03-05T00:00:00,155,True,True,True,test,mar_2025,"MULTIPOLYGON (((-47.35802 -15.81954, -47.358 -..."
3,SOYBEAN,2022/2023,Agrosatélite - Grãos Amazonia e Cerrado 2022/2023,,DF,388247.624199,559,2022-10-01,2023-05-01,2022-12-01,...,2023-01-01,2022-10-01T00:00:00,2023-02-28T00:00:00,150,True,True,True,test,mar_2025,"MULTIPOLYGON (((-47.52799 -15.94517, -47.52799..."
4,SOYBEAN,2022/2023,Agrosatélite - Grãos Amazonia e Cerrado 2022/2023,,DF,651343.073042,559,2022-10-01,2023-05-01,2022-12-01,...,2023-01-01,2022-10-01T00:00:00,2023-04-04T00:00:00,185,False,True,False,test,mar_2025,"MULTIPOLYGON (((-47.53758 -15.91131, -47.53751..."


In [7]:
df.shape

(66835, 32)

In [8]:
#df['dataset_part'].value_counts()

In [9]:
df[label_monitoring_class].value_counts()

monitoring_class
SOYBEAN    66835
Name: count, dtype: int64

In [10]:
df[label_eopatch_path].unique()

array(['start_2022-10-01_end_2023-05-01_monitoring_class_SOYBEAN_epsg4326_minxymaxxy_-47dot359820599480145_-15dot705615438375181_-47dot35240757834703_-15dot696252959420322',
       'start_2022-10-01_end_2023-05-01_monitoring_class_SOYBEAN_epsg4326_minxymaxxy_-47dot65330020280199_-15dot695913633306677_-47dot649553017349646_-15dot689246100204185',
       'start_2022-10-01_end_2023-05-01_monitoring_class_SOYBEAN_epsg4326_minxymaxxy_-47dot361617230048395_-15dot821777566316653_-47dot357720176744685_-15dot81654624433955',
       ...,
       'start_2022-09-01_end_2023-06-01_monitoring_class_SOYBEAN_epsg4326_minxymaxxy_-49dot574527100948416_-28dot99268663731538_-49dot56866805160163_-28dot988354723951606',
       'start_2022-09-01_end_2023-06-01_monitoring_class_SOYBEAN_epsg4326_minxymaxxy_-49dot58225261239184_-28dot99610023539503_-49dot579088508897414_-28dot99311579495743',
       'start_2022-09-01_end_2023-06-01_monitoring_class_SOYBEAN_epsg4326_minxymaxxy_-49dot59985959196058_-29dot011095830

In [11]:
dic_erros = {}

for eopatch_location_id in tqdm(df[label_eopatch_path].unique()[:2]):
    try:

        final_eopatch_path = os.path.join(EOPATCH_PATH,
                                          eopatch_location_id,
                                          "eopatch_0_col-0_row-0")

        eopatch = EOPatch.load(final_eopatch_path)

    except Exception as e:
        dic_erros[final_eopatch_path] = e

#df_erros = pd.DataFrame(list(dic_erros.items()), columns=['Chave', 'Valor'])
#df_erros.to_csv(OUTPUT_ERROR_CHECK, index=False)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 100.17it/s]


In [12]:
df.head()

Unnamed: 0,monitoring_class,period,fonte,cultura_2,state,area,micro,start_season,end_season,peak_start,...,planting_end,start_of_cycle,end_of_cycle,length_of_cycle,is_valid,is_valid_POS,is_valid_LOS,set_type,sampled_date,geometry
0,SOYBEAN,2022/2023,Agrosatélite - Grãos Amazonia e Cerrado 2022/2023,,DF,357844.982246,559,2022-10-01,2023-05-01,2022-12-01,...,2023-01-01,2022-10-01T00:00:00,2023-02-28T00:00:00,150,True,True,True,test,mar_2025,"MULTIPOLYGON (((-47.35296 -15.69628, -47.35297..."
1,SOYBEAN,2022/2023,Agrosatélite - Grãos Amazonia e Cerrado 2022/2023,,DF,134693.415467,559,2022-10-01,2023-05-01,2022-12-01,...,2023-01-01,2022-10-01T00:00:00,2023-04-29T00:00:00,210,False,True,False,test,mar_2025,"MULTIPOLYGON (((-47.64978 -15.69533, -47.64978..."
2,SOYBEAN,2022/2023,Agrosatélite - Grãos Amazonia e Cerrado 2022/2023,,DF,129433.058009,559,2022-10-01,2023-05-01,2022-12-01,...,2023-01-01,2022-10-01T00:00:00,2023-03-05T00:00:00,155,True,True,True,test,mar_2025,"MULTIPOLYGON (((-47.35802 -15.81954, -47.358 -..."
3,SOYBEAN,2022/2023,Agrosatélite - Grãos Amazonia e Cerrado 2022/2023,,DF,388247.624199,559,2022-10-01,2023-05-01,2022-12-01,...,2023-01-01,2022-10-01T00:00:00,2023-02-28T00:00:00,150,True,True,True,test,mar_2025,"MULTIPOLYGON (((-47.52799 -15.94517, -47.52799..."
4,SOYBEAN,2022/2023,Agrosatélite - Grãos Amazonia e Cerrado 2022/2023,,DF,651343.073042,559,2022-10-01,2023-05-01,2022-12-01,...,2023-01-01,2022-10-01T00:00:00,2023-04-04T00:00:00,185,False,True,False,test,mar_2025,"MULTIPOLYGON (((-47.53758 -15.91131, -47.53751..."


In [13]:
eopatch['meta_info']['LABELS']

[['SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN',
  'SOYBEAN']]

In [None]:
import shutil

missing_files_df = pd.DataFrame(columns=['MissingPath'])

def copy_files(row):
    source_path = row['Chave'].replace('teste_pre_safra_2024_2025', 'base')
    destination_path = row['Chave']

    if os.path.exists(source_path):
        shutil.copytree(source_path, destination_path)
    else:
        missing_files_df.loc[len(missing_files_df)] = [source_path]

In [None]:
df_erros_v2.apply(copy_files, axis=1)

In [None]:
missing_files_df

In [None]:
missing_files_df.to_csv('/agrilearn_app/datasets/teste_pre_safra_2024_2025/reports/missing_path.csv.csv', index=False)

In [None]:
# path_id = {}
# failed = {}

# for index, row in tqdm(df.iterrows(), total=len(df)):

#     try:
    
#         eopatch = EOPatch.load(row['local_eopatch_path'], 
#                                lazy_loading=True)
    
#         path_id[row['local_eopatch_path']] = eopatch.mask['SCL'].shape[0]
#     except Exception as e:
#         failed[row['local_eopatch_path']] = e
    
# df_checked = pd.DataFrame(list(path_id.items()), columns=['path', "shape"])
# df_failed = pd.DataFrame(list(failed.items()), columns=['path', "shape"])

In [None]:
eopatch_test = EOPatch.load(df['local_eopatch_path'].loc[200], 
                       lazy_loading=True)

In [None]:
eopatch_test['data']['X_DATA']

In [None]:
eopatch_test