In [1]:
import boto3
import geopandas as gpd
import pandas as pd
import folium
import numpy as np
from folium import GeoJson
from io import BytesIO

In [6]:
import sys
sys.path.append("/agrilearn_app/agrilearn/")
from agrilearn.crop_classification import s3_utils, str_utils
from agrilearn.crop_classification import evaluate_utils as eval_util
from agrilearn.crop_classification import yaml_utils, processing

2025-01-15 18:18:31.661889: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-15 18:18:31.679674: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1736965111.699630   11347 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1736965111.705805   11347 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-15 18:18:31.727060: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

### Global Variables

In [7]:
config = yaml_utils.load_config("/agrilearn_app/output/experiment/experiment_10.yaml")

In [8]:
GEOPACKAGE_PATH = config['data']['geopackage_train_data_path']
label_monitoring_class = 'monitoring_class'
label_eopatch_path = "eopath_location" # 'sentinel_eopatch_current' #eopath_location

# 1. Read Datasets

### Data description
- monitoring_class: é a cultura de interesse dp cliente ou da amostra de treinamento (SOYBEAN, CORN)
- period:  a safra agricola (2023/2024, 2024/2025)
- state: é o estado do polígono
- field_id: id do talhão
- fonte: a origem dos dados (mapas temáticos, banco de dados)
- area:
- micro:
- start_season: é a data que inicia a safra pra determinada cultura (monitoring_class) e safra (period)
- end_season: é a data que termina a safra pra determinada cultura (monitoring_class) e safra (period)
- peak_start: é a data que inicia o período em que pode ocorrer o pico pra determinada cultura (monitoring_class) e safra (period)
- peak_end: é a data que termina o período em que pode ocorrer o pico pra determinada cultura (monitoring_class) e safra (period) (Obs: temos um calendário pra isso, um arquivo yaml)
- start_of_cycle: data do inio do cultivo segundo o calendário agrícola
- end_of_cycle: data do fim do caledário de cuiltivo segundo calendário agrícola
- length_of_cycle:
- start_of_season: data da emergencia da cultura
- end_of_season: data da colheita da cultura
- peak_of_season: data do pico vegetativo da cultura
- length_of_season: Duração do cultivo
- eopath_location: é caminho onde o eopatch está salvo (imagens p/ inferência)
- geometry: é a geometria do polígono]

In [9]:
df = gpd.read_file(GEOPACKAGE_PATH)

In [10]:
df['monitoring_class'].value_counts()

monitoring_class
SOYBEAN    29670
CORN       14600
COTTON      1619
RICE        1172
Name: count, dtype: int64

In [11]:
df['state'].value_counts()

state
PR    7035
MT    6641
MG    6110
SP    5871
RS    4154
GO    4051
MS    2801
MA    1887
TO    1867
RO    1357
SC    1220
BA    1202
PA    1202
PI    1164
RR     275
DF     185
AP      39
Name: count, dtype: int64

In [12]:
print(f"There are {df.shape[0]} rows and {df.shape[1]} columns")

There are 47061 rows and 54 columns


In [13]:
print(f"Dados de {df['start_season'].min()} a {df['end_season'].max()}")

Dados de 2017-09-01 00:00:00 a 2024-09-01 00:00:00


In [14]:
list(df.columns)

['monitoring_class',
 'period',
 'fonte',
 'state',
 'area',
 'meso',
 'eopath_location',
 'start_season',
 'end_season',
 'peak_start',
 'peak_end',
 'field_id',
 'planting_start',
 'planting_end',
 'cycle_start',
 'cycle_end',
 'LOS',
 'is_valid',
 'is_valid_cvt',
 'start_of_season',
 'end_of_season',
 'peaks',
 'length_of_season',
 'set_type',
 'sampled_date',
 'micro',
 'peak_of_season',
 'is_valid_metrics',
 'sos_valid',
 'pos_valid',
 'eos_valid',
 'los_valid',
 'start_of_cycle',
 'end_of_cycle',
 'length_of_cycle',
 'is_valid_POS',
 'is_valid_LOS',
 'dataset_part',
 'cultura_2',
 'obs_extra',
 'sentinel_eopatch_current',
 'contour_score',
 'contour_selected_timestamp',
 'compac_index',
 'key_bucket',
 'local_eopatch_path',
 'total_imagens_do_intervalo',
 'total_de_imagem',
 'status_missing_images',
 'days_gap',
 'dates_diff',
 'mean_dates_diff',
 'status_series_missing',
 'geometry']

In [15]:
df['dataset_part'].value_counts(normalize=True)*100

dataset_part
train    69.979388
test     15.020930
val      14.999681
Name: proportion, dtype: float64

# 2. Data Processing

### 2.1 Check NaN Values

In [16]:
df.isna().sum()

monitoring_class                  0
period                            0
fonte                             0
state                             0
area                              0
meso                          12378
eopath_location                   0
start_season                      0
end_season                        0
peak_start                        0
peak_end                          0
field_id                      12378
planting_start                    0
planting_end                      0
cycle_start                   12378
cycle_end                     12378
LOS                           12378
is_valid                          0
is_valid_cvt                  12378
start_of_season                   0
end_of_season                     0
peaks                         12378
length_of_season                  0
set_type                      33483
sampled_date                  33483
micro                         34683
peak_of_season                34683
is_valid_metrics            

### 2.2 Check Duplicated

In [17]:
shape_before = df.shape[0]
idx_drop_duplicated = df[df.duplicated(subset=['period', 'geometry'])].index
df.drop(idx_drop_duplicated, inplace=True)
shape_after = df.shape[0]
print(f"{shape_before-shape_after} registros duplicados foram encontrados")

0 registros duplicados foram encontrados


### 2.3 Data Formatation

In [18]:
df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 47061 entries, 0 to 47060
Data columns (total 54 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   monitoring_class            47061 non-null  object        
 1   period                      47061 non-null  object        
 2   fonte                       47061 non-null  object        
 3   state                       47061 non-null  object        
 4   area                        47061 non-null  float64       
 5   meso                        34683 non-null  float64       
 6   eopath_location             47061 non-null  object        
 7   start_season                47061 non-null  datetime64[ms]
 8   end_season                  47061 non-null  datetime64[ms]
 9   peak_start                  47061 non-null  datetime64[ms]
 10  peak_end                    47061 non-null  datetime64[ms]
 11  field_id                    34683 non-null  ob

In [19]:
datetime_columns = ['start_season', 
                    'end_season', 
                    'peak_start', 
                    'peak_end']

for col in datetime_columns:
    df[col] = pd.to_datetime(df[col])

df[datetime_columns].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47061 entries, 0 to 47060
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   start_season  47061 non-null  datetime64[ms]
 1   end_season    47061 non-null  datetime64[ms]
 2   peak_start    47061 non-null  datetime64[ms]
 3   peak_end      47061 non-null  datetime64[ms]
dtypes: datetime64[ms](4)
memory usage: 1.4 MB


### 2.4 Vamos manter os dados nulos no campo ``contour_score``

In [20]:
df[df['contour_score'].isna()]

Unnamed: 0,monitoring_class,period,fonte,state,area,meso,eopath_location,start_season,end_season,peak_start,...,key_bucket,local_eopatch_path,total_imagens_do_intervalo,total_de_imagem,status_missing_images,days_gap,dates_diff,mean_dates_diff,status_series_missing,geometry
0,CORN,2020/2021,Agrosatélite - Grãos Brasil 2020/2021,BA,7477129.0,222.0,start_2020-10-01_end_2021-07-01_monitoring_cla...,2020-10-01,2021-07-01,2021-01-01,...,,,,,,,,,,"MULTIPOLYGON (((-45.7944 -14.74452, -45.79466 ..."
4,CORN,2020/2021,Agrosatélite - Grãos Brasil 2020/2021,BA,8072990.0,220.0,start_2020-10-01_end_2021-07-01_monitoring_cla...,2020-10-01,2021-07-01,2021-01-01,...,,,,,,,,,,"MULTIPOLYGON (((-45.97327 -13.19431, -45.97295..."
5,CORN,2020/2021,Agrosatélite - Grãos Brasil 2020/2021,BA,953564.0,220.0,start_2020-10-01_end_2021-07-01_monitoring_cla...,2020-10-01,2021-07-01,2021-01-01,...,,,,,,,,,,"MULTIPOLYGON (((-46.23854 -11.65442, -46.23862..."
10,CORN,2020/2021,Agrosatélite - Grãos Brasil 2020/2021,BA,1702437.0,222.0,start_2020-10-01_end_2021-07-01_monitoring_cla...,2020-10-01,2021-07-01,2021-01-01,...,,,,,,,,,,"MULTIPOLYGON (((-45.8026 -13.4712, -45.8026 -1..."
11,CORN,2020/2021,Agrosatélite - Grãos Brasil 2020/2021,BA,7536828.0,220.0,start_2020-10-01_end_2021-07-01_monitoring_cla...,2020-10-01,2021-07-01,2021-01-01,...,,,,,,,,,,"MULTIPOLYGON (((-46.12814 -12.56927, -46.12266..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47011,RICE,2018/2019,Conab - Arroz Irrigado 2018/2019,SC,1918965.0,469.0,start_2018-09-01_end_2019-06-01_monitoring_cla...,2018-09-01,2019-06-01,2018-11-01,...,datasets/culture/culture_v02/rice_test_v2.gpkg,/agrilearn_app/datasets/eopatchs/processed/SOY...,55,35,OK,,,5.44,OK,"POLYGON ((-49.5122 -28.69859, -49.51208 -28.69..."
47034,RICE,2019/2020,Conab - Arroz Irrigado 2019/2020,RS,714262.0,505.0,start_2019-09-01_end_2020-06-01_monitoring_cla...,2019-09-01,2020-06-01,2019-11-01,...,datasets/culture/culture_v02/rice_test_v2.gpkg,/agrilearn_app/datasets/eopatchs/processed/SOY...,68,34,OK,,,5.15,OK,"POLYGON ((-52.55275 -32.29614, -52.55275 -32.2..."
47050,RICE,2018/2019,Conab - Arroz Irrigado 2018/2019,SC,1918965.0,469.0,start_2018-09-01_end_2019-06-01_monitoring_cla...,2018-09-01,2019-06-01,2018-11-01,...,datasets/culture/culture_v02/rice_test_v2.gpkg,/agrilearn_app/datasets/eopatchs/processed/SOY...,43,30,OK,,,5.69,OK,"POLYGON ((-49.51818 -28.68918, -49.51812 -28.6..."
47051,RICE,2018/2019,Conab - Arroz Irrigado 2018/2019,SC,1918965.0,469.0,start_2018-09-01_end_2019-06-01_monitoring_cla...,2018-09-01,2019-06-01,2018-11-01,...,datasets/culture/culture_v02/rice_test_v2.gpkg,/agrilearn_app/datasets/eopatchs/processed/SOY...,43,36,OK,,,5.57,OK,"POLYGON ((-49.52119 -28.70226, -49.52169 -28.7..."


In [21]:
df[df['contour_score'].isna()].shape[0] / df.shape[0]

0.3097256751875226

In [22]:
df[df['contour_score'].isna()]['monitoring_class'].value_counts()

monitoring_class
CORN       13628
SOYBEAN      825
RICE          79
COTTON        44
Name: count, dtype: int64

### 2.5 Drop data out cicle

In [23]:
drop_data = processing.filter_data_from_culture_cycle(df,
                                          label_monitoring_class='monitoring_class',
                                          label_los='LOS',
                                          culture_cycles={
                                               'COTTON': (140, 220),
                                               'CORN': (105, 160),
                                               'SOYBEAN': (90, 160),
                                               'WHEAT': (100, 160),
                                               'RICE': (100, 150),
                                               'BEAN': (60, 100),
                                               'SUGAR_CANE': (300, 570)
                                          })

Dados deletados: 5369
Porcentagem de dados deletados: 11.41%


In [24]:
drop_data['monitoring_class'].value_counts()

monitoring_class
SOYBEAN    2739
COTTON     1238
RICE        730
CORN        662
Name: count, dtype: int64

In [25]:
GEOPACKAGE_PATH

'/agrilearn_app/datasets/base/geopackage/processed/SOYBEAN_29670_CORN_14600_COTTON_1619_RICE_1172.gpkg'

In [26]:
# df.to_file("/agrilearn_app/datasets/base/geopackage/processed/SOYBEAN_29670_CORN_21919_COTTON_1619_RICE_1172_clear_cicle.gpkg", driver='GPKG', engine='fiona')

In [27]:
df['dataset_part']

0        train
1         test
4        train
5        train
6          val
         ...  
47037    train
47038    train
47039    train
47047     test
47060    train
Name: dataset_part, Length: 41692, dtype: object

# 3. Data Analysis

### 4.1 Analysing Target y (monitoring_class)

In [28]:
pd.concat([df['monitoring_class'].value_counts(),
          df['monitoring_class'].value_counts(normalize=True)*100], axis=1)

Unnamed: 0_level_0,count,proportion
monitoring_class,Unnamed: 1_level_1,Unnamed: 2_level_1
SOYBEAN,26931,64.595126
CORN,13938,33.430874
RICE,442,1.060155
COTTON,381,0.913844


### 5.2 Analysing fonte de dados

In [29]:
pd.concat([df['fonte'].value_counts(), 
           df['fonte'].value_counts(normalize=True)*100], axis=1)

Unnamed: 0_level_0,count,proportion
fonte,Unnamed: 1_level_1,Unnamed: 2_level_1
Agrosatélite - Grãos Brasil 2021/2022,15029,36.047683
Agrosatélite - Grãos Brasil 2020/2021,13843,33.203013
Agrosatélite - Grãos 2022/2023,6611,15.856759
Agrosatélite - Grãos Sul 2022/2023,2960,7.099683
Agrosatélite - Grãos Amazonia e Cerrado 2022/2023,2807,6.732707
Conab - Arroz Irrigado 2019/2020,181,0.434136
Conab - Arroz Irrigado 2018/2019,144,0.34539
Conab - Arroz Irrigado 2017/2018,89,0.21347
Conab - Arroz Irrigado 2021/2022,28,0.067159


### 5.3 Analysing dados por estado e classes

In [30]:
pd.concat([df['state'].value_counts(),
          df['state'].value_counts(normalize=True)*100], axis=1)

Unnamed: 0_level_0,count,proportion
state,Unnamed: 1_level_1,Unnamed: 2_level_1
PR,6339,15.204356
MT,5834,13.993092
MG,5577,13.376667
SP,5211,12.498801
GO,3817,9.155234
RS,3293,7.898398
MS,2648,6.351338
TO,1728,4.14468
MA,1658,3.976782
RO,1308,3.137293


In [31]:
df.groupby(['state', 'monitoring_class']).agg({'period':'count'})

Unnamed: 0_level_0,Unnamed: 1_level_0,period
state,monitoring_class,Unnamed: 2_level_1
AP,SOYBEAN,30
BA,CORN,229
BA,COTTON,36
BA,SOYBEAN,457
DF,CORN,87
DF,SOYBEAN,90
GO,CORN,1183
GO,COTTON,25
GO,RICE,72
GO,SOYBEAN,2537


### 5.4 Analysis field_id (existem IDs duplicados para os talhões)

In [32]:
df['field_id'].nunique()

28489

In [33]:
df[df.duplicated(['field_id','period'])]

Unnamed: 0,monitoring_class,period,fonte,state,area,meso,eopath_location,start_season,end_season,peak_start,...,key_bucket,local_eopatch_path,total_imagens_do_intervalo,total_de_imagem,status_missing_images,days_gap,dates_diff,mean_dates_diff,status_series_missing,geometry
6,CORN,2020/2021,Agrosatélite - Grãos Brasil 2020/2021,BA,953564.0,220.0,start_2020-10-01_end_2021-07-01_monitoring_cla...,2020-10-01,2021-07-01,2021-01-01,...,datasets/culture/culture_v02/corn_train_v2.gpkg,/agrilearn_app/datasets/eopatchs/processed/SOY...,55,24,OK,,,5.00,OK,"POLYGON ((-46.23854 -11.65442, -46.23862 -11.6..."
29,CORN,2020/2021,Agrosatélite - Grãos Brasil 2020/2021,BA,12082522.0,222.0,start_2020-10-01_end_2021-07-01_monitoring_cla...,2020-10-01,2021-07-01,2021-01-01,...,datasets/culture/culture_v02/corn_train_v2.gpkg,/agrilearn_app/datasets/eopatchs/processed/SOY...,55,28,OK,,,5.00,OK,"POLYGON ((-46.15444 -13.83715, -46.15445 -13.8..."
34,CORN,2020/2021,Agrosatélite - Grãos Brasil 2020/2021,BA,14934182.0,63.0,start_2020-10-01_end_2021-07-01_monitoring_cla...,2020-10-01,2021-07-01,2021-01-01,...,,,,,,,,,,"MULTIPOLYGON (((-46.57307 -11.36588, -46.57307..."
39,CORN,2020/2021,Agrosatélite - Grãos Brasil 2020/2021,BA,4422121.0,222.0,start_2020-10-01_end_2021-07-01_monitoring_cla...,2020-10-01,2021-07-01,2021-01-01,...,datasets/culture/culture_v02/corn_train_v2.gpkg,/agrilearn_app/datasets/eopatchs/processed/SOY...,55,33,OK,,,5.00,OK,"POLYGON ((-45.7407 -14.02816, -45.72816 -14.01..."
55,CORN,2020/2021,Agrosatélite - Grãos Brasil 2020/2021,BA,1096067.0,220.0,start_2020-10-01_end_2021-07-01_monitoring_cla...,2020-10-01,2021-07-01,2021-01-01,...,,,,,,,,,,"MULTIPOLYGON (((-45.92617 -11.53204, -45.92617..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46980,RICE,2021/2022,Conab - Arroz Irrigado 2021/2022,GO,408391.0,551.0,start_2021-10-01_end_2022-06-01_monitoring_cla...,2021-10-01,2022-06-01,2021-12-01,...,datasets/culture/culture_v02/rice_val_v2.gpkg,/agrilearn_app/datasets/eopatchs/processed/SOY...,55,20,OK,,,5.00,OK,"POLYGON ((-47.09137 -14.46074, -47.09155 -14.4..."
46999,RICE,2017/2018,Conab - Arroz Irrigado 2017/2018,TO,971807.0,59.0,start_2017-10-01_end_2018-06-01_monitoring_cla...,2017-10-01,2018-06-01,2017-12-01,...,datasets/culture/culture_v02/rice_test_v2.gpkg,/agrilearn_app/datasets/eopatchs/processed/SOY...,55,29,OK,,,5.18,OK,"POLYGON ((-49.68549 -10.5037, -49.6819 -10.503..."
47020,RICE,2019/2020,Conab - Arroz Irrigado 2019/2020,RS,384161.0,493.0,start_2019-09-01_end_2020-06-01_monitoring_cla...,2019-09-01,2020-06-01,2019-11-01,...,datasets/culture/culture_v02/rice_test_v2.gpkg,/agrilearn_app/datasets/eopatchs/processed/SOY...,55,28,OK,,,5.19,OK,"POLYGON ((-51.41222 -29.70816, -51.41241 -29.7..."
47038,RICE,2021/2022,Conab - Arroz Irrigado 2021/2022,GO,125860.0,551.0,start_2021-10-01_end_2022-06-01_monitoring_cla...,2021-10-01,2022-06-01,2021-12-01,...,datasets/culture/culture_v02/rice_test_v2.gpkg,/agrilearn_app/datasets/eopatchs/processed/SOY...,68,28,OK,,,5.00,OK,"POLYGON ((-47.03756 -14.50845, -47.03755 -14.5..."


In [34]:
df[df.duplicated(['field_id','period'])]['field_id'].unique()

array(['2143_1', '2229_2', '538_16', '2259_0', '2164_3', '2145_2',
       '2209_5', '2136_0', '537_14', None, '11506_5', '11502_20',
       '1279_3', '11203_0', '11219_3', '11013_4', '11353_0', '10760_2',
       '11399_2', '11205_0', '1276_2', '11327_9', '11463_1', '11313_9',
       '10783_1', '11480_0', '557_2', '1887_3', '1800_0', '1584_3',
       '1765_5', '593_3', '1667_1', '563_4', '1733_1', '2779_1', '2549_3',
       '3622_2', '2312_17', '2644_2', '3938_2', '4038_5', '2809_5',
       '2449_3', '4166_1', '3617_2', '3285_1', '3835_4', '2544_10',
       '2344_4', '3043_4', '3982_1', '2355_3', '3602_0', '674_5',
       '2471_2', '3038_0', '2448_8', '3937_2', '10311_4', '10217_2',
       '10384_5', '10533_7', '10527_3', '10463_5', '1180_9', '10533_11',
       '10534_1', '10544_1', '10503_0', '10657_1', '1226_2', '10441_5',
       '10414_15', '10394_3', '10437_4', '10437_10', '10606_10', '511_3',
       '511_1', '1430_6', '2083_7', '1990_12', '1992_10', '6343_9',
       '6435_1', '905_

In [35]:
#df[df['field_id'].isin(['0_6'])].to_csv('/agrilearn_app/datasets/cana-v1/csvs/duplicated_lines_CANA-sample-02.csv')

In [36]:
df[['cultura_2', 'monitoring_class']]

Unnamed: 0,cultura_2,monitoring_class
0,,CORN
1,,CORN
4,,CORN
5,,CORN
6,,CORN
...,...,...
47037,,RICE
47038,,RICE
47039,,RICE
47047,,RICE


### 5.5 Analysis of length_of_season (LOS)

In [37]:
df.groupby('LOS').agg(omission_count=('geometry','count')).sort_values('omission_count', ascending=False)

Unnamed: 0_level_0,omission_count
LOS,Unnamed: 1_level_1
130.0,3124
125.0,2906
135.0,2896
140.0,2787
120.0,2654
...,...
172.0,1
178.0,1
182.0,1
188.0,1


### 5.6 Tamanho do Talhão

In [38]:
df.columns

Index(['monitoring_class', 'period', 'fonte', 'state', 'area', 'meso',
       'eopath_location', 'start_season', 'end_season', 'peak_start',
       'peak_end', 'field_id', 'planting_start', 'planting_end', 'cycle_start',
       'cycle_end', 'LOS', 'is_valid', 'is_valid_cvt', 'start_of_season',
       'end_of_season', 'peaks', 'length_of_season', 'set_type',
       'sampled_date', 'micro', 'peak_of_season', 'is_valid_metrics',
       'sos_valid', 'pos_valid', 'eos_valid', 'los_valid', 'start_of_cycle',
       'end_of_cycle', 'length_of_cycle', 'is_valid_POS', 'is_valid_LOS',
       'dataset_part', 'cultura_2', 'obs_extra', 'sentinel_eopatch_current',
       'contour_score', 'contour_selected_timestamp', 'compac_index',
       'key_bucket', 'local_eopatch_path', 'total_imagens_do_intervalo',
       'total_de_imagem', 'status_missing_images', 'days_gap', 'dates_diff',
       'mean_dates_diff', 'status_series_missing', 'geometry'],
      dtype='object')

In [39]:
# # discretização ()
# df['area_ha_cat'] = pd.cut(df['area_ha'], bins=[0, 10, 100, np.inf], labels=['pequena', "média", "grande"])

In [40]:
# df.groupby('area_ha_cat').agg(total=('geometry','count'))

In [None]:
# gdf.to_file(f"/agrilearn_app/datasets/base/geopackage/processed/{filename}", driver='GPKG', engine='fiona')

## 7. Generating Html Report

In [None]:
# from ydata_profiling import ProfileReport
# from ydata_profiling.config import Settings
# %matplotlib inline

In [None]:
# df_copy = gdf.drop(columns=['cultura_2', 'geometry'])  # Supondo que 'cultura_2' seja a coluna problemática

In [None]:
# df_e_object = df_copy.select_dtypes(exclude=['object'])
# df_e_object

In [None]:
# df_i_object = df_copy.select_dtypes(include=['object'])
# df_i_object

In [None]:
# df_e_object_colums = df_copy.select_dtypes(exclude=['object']).columns

In [None]:
# title = "data_report_06_12_2024_cana_data"
# config = Settings()
# config.vars.cat.words = False  # Desativa a nuvem de palavras
# report  = ProfileReport(df=df_copy[list(df_e_object_colums) + ['monitoring_class', 'state', 'period', 'fonte']], 
#                         title=title,
#                         minimal=False,
#                         config=config)

# report.to_file(f'{title}.html')

In [None]:
# df_number = gdf.select_dtypes(exclude=['float','int', 'datetime'])
# df_number