In [1]:
# Load the auto reload extension to automatically reload modules when files on disk are updated
%load_ext autoreload
# it will automatically be reloaded without the need to restart the kernel.
%autoreload 2

In [2]:
import boto3
import os
import geopandas as gpd
import pandas as pd
import folium
import numpy as np
from folium import GeoJson
from io import BytesIO
from shapely.geometry import Polygon, MultiPolygon

In [3]:
import sys
sys.path.append("../agrilearn/submodules/commons/")

In [4]:
from agrilearn.commons.s3 import s3_utils
from agrilearn.crop_classification import evaluate_utils, yaml_utils, processing

2025-04-17 13:12:13.492379: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-17 13:12:14.144545: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744895534.385922  652153 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744895534.451767  652153 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-17 13:12:15.067547: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

### Resumo da análise

- Existem colunas duplicadas que representam a mesma variável [``start_season``, ``start_of_season``], além de [``end_season``, ``end_of_season``], [``peak_of_season``, ``peak_season``]
- Existem dados duplicados considerando o ``eopatch_location``
- Não existe mais a coluna ``id_talhao``?

### Global Variables

In [6]:
#GEOPACKAGE_PATH = ["/agrilearn_app/output/experiment_12/data/geopackage/processed/SOYBEAN_96505_CORN_67593_SUGAR_CANE_35276_COTTON_6344_RICE_1842_processed.gpkg"]

GEOPACKAGE_PATH = ["/agrilearn_app/datasets/COTTON/2023_2023/geopackage/processed/COTTON_2980.gpkg"]


LABEL_MONITORING_CLASS = 'monitoring_class'
LABEL_EOPATCH_LOCATION = "eopath_location" # 'sentinel_eopatch_current' #eopath_location

EOPATCH_PATH_PROCESSED = ["/agrilearn_app/datasets/COTTON/eopatch/processed/"]
EOPATCH_PATH_INPUT_MODEL = ["/agrilearn_app/datasets/COTTON/eopatch/input_model/"]

OUTPUT_PATH = ["/agrilearn_app/datasets/COTTON/2023_2023/reports"]  

In [7]:
index = 0
geopackage = GEOPACKAGE_PATH[index]
eopatch_path = EOPATCH_PATH_PROCESSED[index]
eopatch_input_model = EOPATCH_PATH_INPUT_MODEL[index]
output_path = OUTPUT_PATH[index]
df = gpd.read_file(geopackage)

In [8]:
list(df.columns)

['monitoring_class',
 'period',
 'fonte',
 'state',
 'area',
 'micro',
 'eopath_location',
 'start_season',
 'end_season',
 'peak_start',
 'peak_end',
 'start_of_season',
 'peak_of_season',
 'end_of_season',
 'length_of_season',
 'start_of_cycle',
 'end_of_cycle',
 'length_of_cycle',
 'dataset_source',
 'meso',
 'field_id',
 'planting_start',
 'planting_end',
 'cycle_start',
 'cycle_end',
 'LOS',
 'is_valid',
 'is_valid_cvt',
 'peaks',
 'set_type',
 'sampled_date',
 'is_valid_metrics',
 'sos_valid',
 'pos_valid',
 'eos_valid',
 'los_valid',
 'is_valid_POS',
 'is_valid_LOS',
 'dataset_part',
 'cultura_2',
 'obs_extra',
 'sentinel_eopatch_current',
 'contour_score',
 'contour_selected_timestamp',
 'compac_index',
 'key_bucket',
 'local_eopatch_path',
 'total_imagens_do_intervalo',
 'total_de_imagem',
 'status_missing_images',
 'days_gap',
 'dates_diff',
 'mean_dates_diff',
 'status_series_missing',
 'monitoring_class_path',
 'interest_area_id',
 'emergence_date',
 'gt_class',
 'obs_conto

In [9]:
df[LABEL_MONITORING_CLASS].value_counts()

monitoring_class
COTTON    2980
Name: count, dtype: int64

In [11]:
df_processed, df_overview_processed = processing.check_processed_eopatches_features_based_geopackage(df, 
                                                                                         eopatch_path=eopatch_path,
                                                                                         label_eopatch_path=LABEL_EOPATCH_LOCATION)

100%|██████████| 2980/2980 [06:56<00:00,  7.16it/s]


In [None]:
df_overview_processed

In [13]:
os.makedirs(output_path, exist_ok=True)
df_overview_processed.to_csv(f"{output_path}/overview_eopatch.csv")

In [14]:
df_processed = pd.merge(df, 
                   df_processed,
                   left_on=LABEL_EOPATCH_LOCATION,
                   right_on='geopackage')

In [15]:
df_processed.head()

Unnamed: 0,monitoring_class,period,fonte,state,area,micro,eopath_location,start_season,end_season,peak_start,...,set,geometry,geopackage,geopackage_class,eopatch_processed_exists,len_data,len_bands,len_time_interval,len_timestamp,error_processed
0,COTTON,2023/2023,Agrosatélite - Grãos 2022/2023,RO,,6.0,start_2023-01-01_end_2023-09-01_monitoring_cla...,2023-01-01,2023-09-01,2023-02-01,...,,"MULTIPOLYGON (((-60.09317 -12.68038, -60.09279...",start_2023-01-01_end_2023-09-01_monitoring_cla...,COTTON,False,,,,,'bands'
1,COTTON,2023/2023,Agrosatélite - Grãos 2022/2023,MT,1342.74499544682,6.0,start_2022-12-01_end_2023-10-01_monitoring_cla...,2022-12-01,2023-10-01,2023-02-01,...,,"MULTIPOLYGON (((-60.08045 -12.7151, -60.08176 ...",start_2022-12-01_end_2023-10-01_monitoring_cla...,COTTON,True,4.0,10.0,2.0,30.0,
2,COTTON,2023/2023,Agrosatélite - Grãos 2022/2023,RO,2464683.315293218,6.0,start_2023-01-01_end_2023-09-01_monitoring_cla...,2023-01-01,2023-09-01,2023-02-01,...,,"MULTIPOLYGON (((-60.08677 -12.68559, -60.08677...",start_2023-01-01_end_2023-09-01_monitoring_cla...,COTTON,False,,,,,'bands'
3,COTTON,2023/2023,Agrosatélite - Grãos 2022/2023,RO,2587796.098465558,6.0,start_2023-01-01_end_2023-09-01_monitoring_cla...,2023-01-01,2023-09-01,2023-02-01,...,,"MULTIPOLYGON (((-60.09277 -12.68095, -60.09225...",start_2023-01-01_end_2023-09-01_monitoring_cla...,COTTON,False,,,,,'bands'
4,COTTON,2023/2023,Agrosatélite - Grãos 2022/2023,RO,1905122.280322101,6.0,start_2023-01-01_end_2023-09-01_monitoring_cla...,2023-01-01,2023-09-01,2023-02-01,...,,"MULTIPOLYGON (((-60.10734 -12.71187, -60.10736...",start_2023-01-01_end_2023-09-01_monitoring_cla...,COTTON,False,,,,,'bands'


In [19]:
df_processed[df_processed['eopatch_processed_exists']==False]['error_processed'].value_counts()

error_processed
'bands'    96
Name: count, dtype: int64

In [20]:
df_processed[df_processed['eopatch_processed_exists']==False]['monitoring_class'].value_counts()

monitoring_class
COTTON    102
Name: count, dtype: int64

In [18]:
df_processed[df_processed['eopatch_processed_exists']==False]['dataset_source'].value_counts()

dataset_source
/home/ec2-user/datasets/algodao_2022_2023_2023_2023/geopackage/raw/algodao_jan_4751.gpkg    102
Name: count, dtype: int64

In [None]:
# result_string= '_'.join([f"{cls}_{count}" for cls, count in df_join['monitoring_class'].value_counts().items()])
# filename= f"/agrilearn_app/output/experiment_12/data/geopackage/processed/{result_string}_processed.gpkg"
# filename

In [None]:
# df_processed.to_file(filename, driver='GPKG', engine='fiona')

In [None]:
# GEOPACKAGE_PATH = ["SOYBEAN_96505_CORN_67593_SUGAR_CANE_35276_COTTON_6344_RICE_1842.gpkg"]

In [None]:
df_input_model, df_overview_input_model = processing.check_input_model_eopatches_features_based_geopackage(df_processed, 
                                                                                                           eopatch_path=eopatch_input_model,
                                                                                                           label_eopatch_path=LABEL_EOPATCH_LOCATION)

In [None]:
df_input_model = pd.merge(df_processed, 
                   df_input_model,
                   right_on='geopackage', 
                   left_on=LABEL_EOPATCH_LOCATION)

In [None]:
df_input_model.columns

In [None]:
df_input_model.to_file("/agrilearn_app/output/experiment_12/data/geopackage/processed/SOYBEAN_96505_CORN_67593_SUGAR_CANE_35276_COTTON_6344_RICE_1842_processed_and_input.gpkg", driver='GPKG', engine='fiona')

In [None]:
# result_string= '_'.join([f"{cls}_{count}" for cls, count in df_join['monitoring_class'].value_counts().items()])
# filename= f"/agrilearn_app/output/experiment_12/data/geopackage/processed/{result_string}_processed.gpkg"
# filename

In [None]:
# df_overview_processed

In [None]:
# df_overview_input_model

In [None]:
# df_full.to_file(path.replace('.gpkg', '_features.gpkg'),
#                 driver='GPKG', 
#                 engine='fiona')

In [None]:
# for path in GEOPACKAGE_PATH:
#     print(f'... reading {path}')
#     df = gpd.read_file(path)

#     print(f'... processing {path}')
#     df_results, df_overview = processing.check_processed_eopatches_features_based_geopackage(df, 
#                                                                                              eopatch_path=EOPATCH_PATH_PROCESSED,
#                                                                                              label_eopatch_path=LABEL_EOPATCH_LOCATION)
#     print(f'... merge data with processed features {}')
#     df = pd.merge(df_results, 
#                   df, 
#                   left_on='geopackage', 
#                   right_on=LABEL_EOPATCH_LOCATION)

#     print(f'... processing {path}')
#     df_results, df_overview = processing.check_input_model_eopatches_features_based_geopackage(df, 
#                                                                                                eopatch_path=EOPATCH_PATH_INPUT_MODEL,
#                                                                                                label_eopatch_path=LABEL_EOPATCH_LOCATION)

#     print(f'... merge data with input_model features')
#     df = pd.merge(df_results, 
#                   df, 
#                   left_on='geopackage', 
#                   right_on=LABEL_EOPATCH_LOCATION)

    
#     # print('... savind new file')
#     # df.to_file(path, driver='GPKG', engine='fiona')