In [1]:
import geopandas as gpd
import pandas as pd

- Filter empty geometry
- Ensure EPSG:4326 coordinates 
- negative buffer of 10m to avoid boundary effects (of course, we can also do validation with boundary pixels)

In [None]:
!ogr2ogr -t_srs EPSG:4326 -sql "SELECT ST_Buffer(geometry,-10), * FROM flanders_crop where geometry is not null" -dialect sqlite -lco ID_FIELD=sampleID validate_polys5.geojson ~/Downloads/flanders_crop.geojson

In [127]:
ref = gpd.read_file("/tmp/validate_polys5.geojson")

- Filter out classes 'other' and 'permanent crops'
TODO: try to do validation for 'other' class as well, or at least cases where other crops are detected as one of the classes in the legend.

In [25]:
crops = ref[(ref.crop_type!=7) & (ref.crop_type!=1)]
crops.to_file("/tmp/crops.geojson")

In [58]:
crops

Unnamed: 0,id,level_0,level_1,crop_type,crop_type_name,geometry
0,2021_BE_LPIS-Flanders_POLY_110-411034,410264,0,2,CEREALS,"POLYGON ((4.72244 50.79582, 4.72256 50.79591, ..."
1,2021_BE_LPIS-Flanders_POLY_110-411035,410265,0,2,CEREALS,"POLYGON ((4.72277 50.79605, 4.72481 50.79746, ..."
2,2021_BE_LPIS-Flanders_POLY_110-411036,410266,0,2,CEREALS,"POLYGON ((4.72486 50.79776, 4.72465 50.79793, ..."
4,2021_BE_LPIS-Flanders_POLY_110-411045,410275,0,2,CEREALS,"POLYGON ((4.71904 50.79339, 4.72016 50.79421, ..."
6,2021_BE_LPIS-Flanders_POLY_110-411054,410284,0,3,MAIZE,"POLYGON ((4.72172 50.82230, 4.72183 50.82232, ..."
...,...,...,...,...,...,...
69874,2021_BE_LPIS-Flanders_POLY_110-564501,563381,0,3,MAIZE,"POLYGON ((5.52935 50.76038, 5.52940 50.76042, ..."
69876,2021_BE_LPIS-Flanders_POLY_110-564503,563383,0,3,MAIZE,"POLYGON ((5.52978 50.76082, 5.52987 50.76088, ..."
69882,2021_BE_LPIS-Flanders_POLY_110-564572,563452,0,2,CEREALS,"POLYGON ((5.52970 50.77295, 5.52969 50.77297, ..."
69884,2021_BE_LPIS-Flanders_POLY_110-564623,563503,0,2,CEREALS,"POLYGON ((5.53027 50.77047, 5.53028 50.77047, ..."


We need to get rid of overlapping polygons in the reference data.
This is not only weird from validation perspective, but also causes extraction to be very slow.

In [59]:
s_ = gpd.GeoDataFrame(
  geometry=[crops.unary_union]).explode(
  index_parts=False).reset_index(
  drop=True)

s_


Unnamed: 0,geometry
0,"POLYGON ((4.72115 50.79402, 4.72075 50.79384, ..."
1,"POLYGON ((4.72300 50.79465, 4.72194 50.79547, ..."
2,"POLYGON ((4.72477 50.79607, 4.72453 50.79571, ..."
3,"POLYGON ((4.72512 50.79872, 4.72564 50.79828, ..."
4,"POLYGON ((4.73025 50.80292, 4.73024 50.80292, ..."
...,...
22800,"POLYGON ((5.50527 50.90082, 5.50527 50.90082, ..."
22801,"POLYGON ((5.50650 50.90202, 5.50649 50.90203, ..."
22802,"POLYGON ((5.50537 50.94375, 5.50538 50.94375, ..."
22803,"POLYGON ((5.50453 50.98839, 5.50453 50.98877, ..."


In [60]:
s_ = gpd.sjoin(s_, crops, how='left').drop(columns=['index_right'])
crops_without_overlap = s_.dissolve(s_.index, aggfunc='first')

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  s_ = gpd.sjoin(s_, crops, how='left').drop(columns=['index_right'])


Perform filter on area (in degrees!) also optional, but there are simply limits caused by working at 10m resolution.
It could also be relevant to performm validation on small fields separately, to analyze what the limits are.

In [61]:
crops_without_overlap=crops_without_overlap[crops_without_overlap.area>0.0000001]

In [62]:
crops_without_overlap

Unnamed: 0,geometry,id,level_0,level_1,crop_type,crop_type_name
0,"POLYGON ((4.72115 50.79402, 4.72075 50.79384, ...",2021_BE_LPIS-Flanders_POLY_110-411045,410275,0,2,CEREALS
1,"POLYGON ((4.72300 50.79465, 4.72194 50.79547, ...",2021_BE_LPIS-Flanders_POLY_110-411034,410264,0,2,CEREALS
2,"POLYGON ((4.72477 50.79607, 4.72453 50.79571, ...",2021_BE_LPIS-Flanders_POLY_110-411035,410265,0,2,CEREALS
3,"POLYGON ((4.72512 50.79872, 4.72564 50.79828, ...",2021_BE_LPIS-Flanders_POLY_110-411036,410266,0,2,CEREALS
4,"POLYGON ((4.73025 50.80292, 4.73024 50.80292, ...",2021_BE_LPIS-Flanders_POLY_110-416802,416004,0,2,CEREALS
...,...,...,...,...,...,...
22799,"POLYGON ((5.51634 50.89759, 5.51634 50.89759, ...",2021_BE_LPIS-Flanders_POLY_110-561444,560324,0,3,MAIZE
22800,"POLYGON ((5.50527 50.90082, 5.50527 50.90082, ...",2021_BE_LPIS-Flanders_POLY_110-561471,560351,0,3,MAIZE
22801,"POLYGON ((5.50650 50.90202, 5.50649 50.90203, ...",2021_BE_LPIS-Flanders_POLY_110-561472,560352,0,3,MAIZE
22802,"POLYGON ((5.50537 50.94375, 5.50538 50.94375, ...",2021_BE_LPIS-Flanders_POLY_110-561530,560410,0,3,MAIZE


In [63]:
crops_without_overlap.to_file("/tmp/crops_no_overlap.geojson")

Now extract a histogram over reference polygons using openEO. This means first creating one band per histogram bucket, then using a 'sum' reducer.

In [48]:
import openeo
from openeo.internal.graph_building import PGNode
from openeo.processes import vector_buffer, array_create

c= openeo.connect("openeo-dev.vito.be").authenticate_oidc()
croptype = c.load_collection(
    "OPENEO_CROPTYPE_2021_V2",
    bands=["croptype"],
    temporal_extent=["2020-05-01", "2022-06-01"],
)

croptype = croptype.apply_dimension(lambda b:array_create([b.array_element(0)==x for x in range(1,8)]),dimension="bands").rename_labels("bands",[ "c1","c2","c3","c4","c5","c6","c7"])


parcels = c.vectorcube_from_paths(["/data/users/Public/driesj/crops_no_overlap.geojson"], format="GeoJSON")


aggregations = croptype.aggregate_spatial(
    geometries=parcels,
    reducer="sum",
)


aggregations.save_result(format="CSV").execute_batch("validate_count.csv",format="csv")

Authenticated using refresh token.
0:00:00 Job 'j-6b2f0c228cb44d67b2e7b64a331fea4e': send 'start'
0:00:37 Job 'j-6b2f0c228cb44d67b2e7b64a331fea4e': queued (progress N/A)
0:00:42 Job 'j-6b2f0c228cb44d67b2e7b64a331fea4e': queued (progress N/A)
0:00:49 Job 'j-6b2f0c228cb44d67b2e7b64a331fea4e': queued (progress N/A)
0:00:57 Job 'j-6b2f0c228cb44d67b2e7b64a331fea4e': queued (progress N/A)
0:01:07 Job 'j-6b2f0c228cb44d67b2e7b64a331fea4e': queued (progress N/A)
0:01:19 Job 'j-6b2f0c228cb44d67b2e7b64a331fea4e': queued (progress N/A)
0:01:34 Job 'j-6b2f0c228cb44d67b2e7b64a331fea4e': queued (progress N/A)
0:01:54 Job 'j-6b2f0c228cb44d67b2e7b64a331fea4e': queued (progress N/A)
0:02:18 Job 'j-6b2f0c228cb44d67b2e7b64a331fea4e': queued (progress N/A)
0:02:48 Job 'j-6b2f0c228cb44d67b2e7b64a331fea4e': queued (progress N/A)
0:03:25 Job 'j-6b2f0c228cb44d67b2e7b64a331fea4e': running (progress N/A)
0:04:12 Job 'j-6b2f0c228cb44d67b2e7b64a331fea4e': running (progress N/A)
0:05:11 Job 'j-6b2f0c228cb44d67b2e7b

In [92]:
extract_df=pd.read_csv("validate_count.csv")

In [93]:
extract_df.index = extract_df.feature_index
extract_df = extract_df.sort_index()

In [94]:
extract_df.columns = ['date', 'feature_index', '1', '2', '3','4', '5', '6', '7']
extract_df

Unnamed: 0_level_0,date,feature_index,1,2,3,4,5,6,7
feature_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,2020-01-01T00:00:00.000Z,0,0.0,174.0,0.0,0.0,0.0,0.0,14.0
1,2020-01-01T00:00:00.000Z,1,0.0,70.0,0.0,0.0,0.0,0.0,12.0
2,2020-01-01T00:00:00.000Z,2,0.0,161.0,0.0,0.0,0.0,0.0,60.0
3,2020-01-01T00:00:00.000Z,3,0.0,173.0,0.0,0.0,0.0,0.0,35.0
4,2020-01-01T00:00:00.000Z,4,0.0,103.0,0.0,0.0,0.0,0.0,124.0
...,...,...,...,...,...,...,...,...,...
21129,2020-01-01T00:00:00.000Z,21129,0.0,0.0,15.0,0.0,0.0,0.0,5.0
21130,2020-01-01T00:00:00.000Z,21130,0.0,0.0,29.0,0.0,0.0,0.0,30.0
21131,2020-01-01T00:00:00.000Z,21131,0.0,0.0,139.0,0.0,0.0,0.0,51.0
21132,2020-01-01T00:00:00.000Z,21132,0.0,0.0,63.0,0.0,0.0,0.0,14.0


It's very important to read the geojson again, because the index needs to be reset to match the index in the csv generated by openEO

In [98]:
crops_without_overlap = gpd.read_file("/tmp/crops_no_overlap.geojson")
crops_without_overlap

Unnamed: 0,id,level_0,level_1,crop_type,crop_type_name,geometry
0,2021_BE_LPIS-Flanders_POLY_110-411045,410275,0,2,CEREALS,"POLYGON ((4.72115 50.79402, 4.72075 50.79384, ..."
1,2021_BE_LPIS-Flanders_POLY_110-411034,410264,0,2,CEREALS,"POLYGON ((4.72300 50.79465, 4.72194 50.79547, ..."
2,2021_BE_LPIS-Flanders_POLY_110-411035,410265,0,2,CEREALS,"POLYGON ((4.72477 50.79607, 4.72453 50.79571, ..."
3,2021_BE_LPIS-Flanders_POLY_110-411036,410266,0,2,CEREALS,"POLYGON ((4.72512 50.79872, 4.72564 50.79828, ..."
4,2021_BE_LPIS-Flanders_POLY_110-416802,416004,0,2,CEREALS,"POLYGON ((4.73025 50.80292, 4.73024 50.80292, ..."
...,...,...,...,...,...,...
21129,2021_BE_LPIS-Flanders_POLY_110-561444,560324,0,3,MAIZE,"POLYGON ((5.51634 50.89759, 5.51634 50.89759, ..."
21130,2021_BE_LPIS-Flanders_POLY_110-561471,560351,0,3,MAIZE,"POLYGON ((5.50527 50.90082, 5.50527 50.90082, ..."
21131,2021_BE_LPIS-Flanders_POLY_110-561472,560352,0,3,MAIZE,"POLYGON ((5.50650 50.90202, 5.50649 50.90203, ..."
21132,2021_BE_LPIS-Flanders_POLY_110-561530,560410,0,3,MAIZE,"POLYGON ((5.50537 50.94375, 5.50538 50.94375, ..."


In [99]:
extract_df=extract_df.join(crops_without_overlap)
extract_df

Unnamed: 0_level_0,date,feature_index,1,2,3,4,5,6,7,id,level_0,level_1,crop_type,crop_type_name,geometry
feature_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,2020-01-01T00:00:00.000Z,0,0.0,174.0,0.0,0.0,0.0,0.0,14.0,2021_BE_LPIS-Flanders_POLY_110-411045,410275,0,2,CEREALS,"POLYGON ((4.72115 50.79402, 4.72075 50.79384, ..."
1,2020-01-01T00:00:00.000Z,1,0.0,70.0,0.0,0.0,0.0,0.0,12.0,2021_BE_LPIS-Flanders_POLY_110-411034,410264,0,2,CEREALS,"POLYGON ((4.72300 50.79465, 4.72194 50.79547, ..."
2,2020-01-01T00:00:00.000Z,2,0.0,161.0,0.0,0.0,0.0,0.0,60.0,2021_BE_LPIS-Flanders_POLY_110-411035,410265,0,2,CEREALS,"POLYGON ((4.72477 50.79607, 4.72453 50.79571, ..."
3,2020-01-01T00:00:00.000Z,3,0.0,173.0,0.0,0.0,0.0,0.0,35.0,2021_BE_LPIS-Flanders_POLY_110-411036,410266,0,2,CEREALS,"POLYGON ((4.72512 50.79872, 4.72564 50.79828, ..."
4,2020-01-01T00:00:00.000Z,4,0.0,103.0,0.0,0.0,0.0,0.0,124.0,2021_BE_LPIS-Flanders_POLY_110-416802,416004,0,2,CEREALS,"POLYGON ((4.73025 50.80292, 4.73024 50.80292, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21129,2020-01-01T00:00:00.000Z,21129,0.0,0.0,15.0,0.0,0.0,0.0,5.0,2021_BE_LPIS-Flanders_POLY_110-561444,560324,0,3,MAIZE,"POLYGON ((5.51634 50.89759, 5.51634 50.89759, ..."
21130,2020-01-01T00:00:00.000Z,21130,0.0,0.0,29.0,0.0,0.0,0.0,30.0,2021_BE_LPIS-Flanders_POLY_110-561471,560351,0,3,MAIZE,"POLYGON ((5.50527 50.90082, 5.50527 50.90082, ..."
21131,2020-01-01T00:00:00.000Z,21131,0.0,0.0,139.0,0.0,0.0,0.0,51.0,2021_BE_LPIS-Flanders_POLY_110-561472,560352,0,3,MAIZE,"POLYGON ((5.50650 50.90202, 5.50649 50.90203, ..."
21132,2020-01-01T00:00:00.000Z,21132,0.0,0.0,63.0,0.0,0.0,0.0,14.0,2021_BE_LPIS-Flanders_POLY_110-561530,560410,0,3,MAIZE,"POLYGON ((5.50537 50.94375, 5.50538 50.94375, ..."


In [100]:
valid_crop=extract_df[(extract_df['1']>0) | (extract_df['2']>0) | (extract_df['3']>0) | (extract_df['4']>0) | (extract_df['5']>0) | (extract_df['6']>0)]

df = valid_crop
df

Unnamed: 0_level_0,date,feature_index,1,2,3,4,5,6,7,id,level_0,level_1,crop_type,crop_type_name,geometry
feature_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,2020-01-01T00:00:00.000Z,0,0.0,174.0,0.0,0.0,0.0,0.0,14.0,2021_BE_LPIS-Flanders_POLY_110-411045,410275,0,2,CEREALS,"POLYGON ((4.72115 50.79402, 4.72075 50.79384, ..."
1,2020-01-01T00:00:00.000Z,1,0.0,70.0,0.0,0.0,0.0,0.0,12.0,2021_BE_LPIS-Flanders_POLY_110-411034,410264,0,2,CEREALS,"POLYGON ((4.72300 50.79465, 4.72194 50.79547, ..."
2,2020-01-01T00:00:00.000Z,2,0.0,161.0,0.0,0.0,0.0,0.0,60.0,2021_BE_LPIS-Flanders_POLY_110-411035,410265,0,2,CEREALS,"POLYGON ((4.72477 50.79607, 4.72453 50.79571, ..."
3,2020-01-01T00:00:00.000Z,3,0.0,173.0,0.0,0.0,0.0,0.0,35.0,2021_BE_LPIS-Flanders_POLY_110-411036,410266,0,2,CEREALS,"POLYGON ((4.72512 50.79872, 4.72564 50.79828, ..."
4,2020-01-01T00:00:00.000Z,4,0.0,103.0,0.0,0.0,0.0,0.0,124.0,2021_BE_LPIS-Flanders_POLY_110-416802,416004,0,2,CEREALS,"POLYGON ((4.73025 50.80292, 4.73024 50.80292, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21129,2020-01-01T00:00:00.000Z,21129,0.0,0.0,15.0,0.0,0.0,0.0,5.0,2021_BE_LPIS-Flanders_POLY_110-561444,560324,0,3,MAIZE,"POLYGON ((5.51634 50.89759, 5.51634 50.89759, ..."
21130,2020-01-01T00:00:00.000Z,21130,0.0,0.0,29.0,0.0,0.0,0.0,30.0,2021_BE_LPIS-Flanders_POLY_110-561471,560351,0,3,MAIZE,"POLYGON ((5.50527 50.90082, 5.50527 50.90082, ..."
21131,2020-01-01T00:00:00.000Z,21131,0.0,0.0,139.0,0.0,0.0,0.0,51.0,2021_BE_LPIS-Flanders_POLY_110-561472,560352,0,3,MAIZE,"POLYGON ((5.50650 50.90202, 5.50649 50.90203, ..."
21132,2020-01-01T00:00:00.000Z,21132,0.0,0.0,63.0,0.0,0.0,0.0,14.0,2021_BE_LPIS-Flanders_POLY_110-561530,560410,0,3,MAIZE,"POLYGON ((5.50537 50.94375, 5.50538 50.94375, ..."


In [106]:
gpd.GeoDataFrame(df.drop(["feature_index"],axis=1)).to_file("reference_data_and_extracts.geojson")

In [101]:

false_maize = df[(df.crop_type == 3) & (df['3'] == 0)]
false_maize

Unnamed: 0_level_0,date,feature_index,1,2,3,4,5,6,7,id,level_0,level_1,crop_type,crop_type_name,geometry
feature_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
374,2020-01-01T00:00:00.000Z,374,0.0,16.0,0.0,0.0,0.0,0.0,0.0,2021_BE_LPIS-Flanders_POLY_110-411715,410945,0,3,MAIZE,"POLYGON ((4.72594 50.94097, 4.72427 50.94030, ..."
375,2020-01-01T00:00:00.000Z,375,0.0,0.0,0.0,28.0,1.0,7.0,196.0,2021_BE_LPIS-Flanders_POLY_110-411714,410944,0,3,MAIZE,"POLYGON ((4.71984 50.94097, 4.72003 50.94115, ..."
419,2020-01-01T00:00:00.000Z,419,0.0,1.0,0.0,0.0,0.0,11.0,46.0,2021_BE_LPIS-Flanders_POLY_110-417871,417073,0,3,MAIZE,"POLYGON ((4.72829 50.95588, 4.72825 50.95584, ..."
524,2020-01-01T00:00:00.000Z,524,0.0,3.0,0.0,0.0,0.0,0.0,46.0,2021_BE_LPIS-Flanders_POLY_110-416860,416062,0,3,MAIZE,"POLYGON ((4.75977 50.82230, 4.75981 50.82193, ..."
1631,2020-01-01T00:00:00.000Z,1631,0.0,46.0,0.0,0.0,0.0,0.0,27.0,2021_BE_LPIS-Flanders_POLY_110-433946,433118,0,3,MAIZE,"POLYGON ((4.79546 50.91899, 4.79547 50.91898, ..."
1662,2020-01-01T00:00:00.000Z,1662,0.0,0.0,0.0,9.0,0.0,0.0,71.0,2021_BE_LPIS-Flanders_POLY_110-425179,424371,0,3,MAIZE,"POLYGON ((4.79504 50.94302, 4.79505 50.94302, ..."
2386,2020-01-01T00:00:00.000Z,2386,0.0,58.0,0.0,0.0,0.0,0.0,11.0,2021_BE_LPIS-Flanders_POLY_110-433003,432184,0,3,MAIZE,"POLYGON ((4.82306 50.86821, 4.82306 50.86821, ..."
2389,2020-01-01T00:00:00.000Z,2389,0.0,23.0,0.0,0.0,0.0,0.0,33.0,2021_BE_LPIS-Flanders_POLY_110-433019,432200,0,3,MAIZE,"POLYGON ((4.82081 50.86828, 4.82081 50.86825, ..."
3522,2020-01-01T00:00:00.000Z,3522,0.0,83.0,0.0,0.0,0.0,0.0,0.0,2021_BE_LPIS-Flanders_POLY_110-439824,438990,0,3,MAIZE,"POLYGON ((4.85313 50.79241, 4.85313 50.79241, ..."
3888,2020-01-01T00:00:00.000Z,3888,0.0,43.0,0.0,0.0,0.0,0.0,21.0,2021_BE_LPIS-Flanders_POLY_110-441968,441132,0,3,MAIZE,"POLYGON ((4.85840 50.94887, 4.85840 50.94887, ..."


In [102]:
false_maize.drop(["feature_index"],axis=1)

Unnamed: 0_level_0,date,1,2,3,4,5,6,7,id,level_0,level_1,crop_type,crop_type_name,geometry
feature_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
374,2020-01-01T00:00:00.000Z,0.0,16.0,0.0,0.0,0.0,0.0,0.0,2021_BE_LPIS-Flanders_POLY_110-411715,410945,0,3,MAIZE,"POLYGON ((4.72594 50.94097, 4.72427 50.94030, ..."
375,2020-01-01T00:00:00.000Z,0.0,0.0,0.0,28.0,1.0,7.0,196.0,2021_BE_LPIS-Flanders_POLY_110-411714,410944,0,3,MAIZE,"POLYGON ((4.71984 50.94097, 4.72003 50.94115, ..."
419,2020-01-01T00:00:00.000Z,0.0,1.0,0.0,0.0,0.0,11.0,46.0,2021_BE_LPIS-Flanders_POLY_110-417871,417073,0,3,MAIZE,"POLYGON ((4.72829 50.95588, 4.72825 50.95584, ..."
524,2020-01-01T00:00:00.000Z,0.0,3.0,0.0,0.0,0.0,0.0,46.0,2021_BE_LPIS-Flanders_POLY_110-416860,416062,0,3,MAIZE,"POLYGON ((4.75977 50.82230, 4.75981 50.82193, ..."
1631,2020-01-01T00:00:00.000Z,0.0,46.0,0.0,0.0,0.0,0.0,27.0,2021_BE_LPIS-Flanders_POLY_110-433946,433118,0,3,MAIZE,"POLYGON ((4.79546 50.91899, 4.79547 50.91898, ..."
1662,2020-01-01T00:00:00.000Z,0.0,0.0,0.0,9.0,0.0,0.0,71.0,2021_BE_LPIS-Flanders_POLY_110-425179,424371,0,3,MAIZE,"POLYGON ((4.79504 50.94302, 4.79505 50.94302, ..."
2386,2020-01-01T00:00:00.000Z,0.0,58.0,0.0,0.0,0.0,0.0,11.0,2021_BE_LPIS-Flanders_POLY_110-433003,432184,0,3,MAIZE,"POLYGON ((4.82306 50.86821, 4.82306 50.86821, ..."
2389,2020-01-01T00:00:00.000Z,0.0,23.0,0.0,0.0,0.0,0.0,33.0,2021_BE_LPIS-Flanders_POLY_110-433019,432200,0,3,MAIZE,"POLYGON ((4.82081 50.86828, 4.82081 50.86825, ..."
3522,2020-01-01T00:00:00.000Z,0.0,83.0,0.0,0.0,0.0,0.0,0.0,2021_BE_LPIS-Flanders_POLY_110-439824,438990,0,3,MAIZE,"POLYGON ((4.85313 50.79241, 4.85313 50.79241, ..."
3888,2020-01-01T00:00:00.000Z,0.0,43.0,0.0,0.0,0.0,0.0,21.0,2021_BE_LPIS-Flanders_POLY_110-441968,441132,0,3,MAIZE,"POLYGON ((4.85840 50.94887, 4.85840 50.94887, ..."


In [103]:
gpd.GeoDataFrame(false_maize.drop(["feature_index"],axis=1)).to_file("/tmp/false_maize.geojson")

print some results per crop type

In [126]:
for x in range(1,8):
    #number of parcels with crop type
    print(len(df[(df.crop_type == x)]))
    #number of parcels where crop type is not at all detected
    print(str(x) + ": " + str(df[(df.crop_type == x) & (df[str(x)] == 0)].shape))
    #number of parcels where crop type is detected but not present
    print(str(x) + ": " + str(df[(df.crop_type != x) & (df[str(x)] > 0)].shape))
    print(df[(df.crop_type == x) & (df[str(x)] == 0)].sum())

0
1: (0, 15)
1: (0, 15)
date              0.0
feature_index     0.0
1                 0.0
2                 0.0
3                 0.0
4                 0.0
5                 0.0
6                 0.0
7                 0.0
id                0.0
level_0           0.0
level_1           0.0
crop_type         0.0
crop_type_name    0.0
dtype: float64
8355
2: (39, 15)
2: (209, 15)
date              2020-01-01T00:00:00.000Z2020-01-01T00:00:00.00...
feature_index                                                333598
1                                                               0.0
2                                                               0.0
3                                                            1248.0
4                                                              72.0
5                                                             147.0
6                                                              91.0
7                                                             543.0
id         

  print(df[(df.crop_type == x) & (df[str(x)] == 0)].sum())


In [124]:
df.sum()

  df.sum()


date              2020-01-01T00:00:00.000Z2020-01-01T00:00:00.00...
feature_index                                             202535224
1                                                               0.0
2                                                         1226024.0
3                                                          643547.0
4                                                          331650.0
5                                                          357271.0
6                                                           10001.0
7                                                          277187.0
id                2021_BE_LPIS-Flanders_POLY_110-4110452021_BE_L...
level_0                                                  9550733591
level_1                                                           0
crop_type                                                     56055
crop_type_name    CEREALSCEREALSCEREALSCEREALSCEREALSSUGAR_BEETC...
dtype: object