# Earth Observation Validation Report Notebook

The following notebook provides a report on the progress of validations when performed
using the Teams option. The second portion of this notebook generates the overall aggregated database.

In [1]:
import os
import pandas as pd
from glob import glob
import geopandas as gpd
from pathlib import Path
from datetime import date
from tabulate import tabulate

## Validation Report

In [2]:
# specify the data_dir option used in the ValidationDashboard notebook
data_dir = '/home/jovyan/efs/BinPeng_Colombia/validation'

In [7]:
report_list = []
for username in os.listdir(data_dir):
    
    if username in [".ipynb_checkpoints", "original_points"]:
        continue
    
    filenames = glob(os.path.join(data_dir, username, '*.gpkg'))

    for filename in filenames:
        
        try:
            gdf = gpd.read_file(filename)
            report_list.append(
                [
                    gdf.shape[0], # total points
                    gdf['verified'].sum(), # verified points
                    round((gdf['verified'].sum() / gdf.shape[0]) * 100, 2), # percentage done
                    pd.to_numeric(gdf['seconds_taken'], errors='coerce').mean(), # seconds per point
                    username
                ])
        except:
            report_list.append(["broken file", 0, Path(filename).stem])

print (tabulate(report_list, headers=[
    "Number of Points", "Verified Points", "Percentage", "Seconds Per Point (Mean)", "Username-Filename"]))

  Number of Points    Verified Points    Percentage    Seconds Per Point (Mean)  Username-Filename
------------------  -----------------  ------------  --------------------------  -------------------
              6849                  5          0.07                      2.6895  jacaraba


# Validation Database Generation

In [8]:
# specify the database_filename option used to name the Validation Database, ends with .gpkg
database_filename = f'validation-database-{date.today()}.gpkg'

In [9]:
report_list = []
database_list = []
for username in os.listdir(data_dir):
    
    if username in [".ipynb_checkpoints", "original_points"]:
        continue
    
    filenames = glob(os.path.join(data_dir, username, '*.gpkg'))
    
    for filename in filenames:
        
        try:
            gdf = gpd.read_file(filename)

            report_list.append(
                [
                    gdf.shape[0], # total points
                    gdf['verified'].sum(), # verified points
                    round((gdf['verified'].sum() / gdf.shape[0]) * 100, 2), # percentage done
                    pd.to_numeric(gdf['seconds_taken'], errors='coerce').mean(), # seconds per point
                    username
                ])

            gdf['username'] = username
            gdf['short_filename'] = Path(filename).stem
            database_list.append(gdf)

        except:
            report_list.append(["broken file", 0, Path(filename).stem])

print (tabulate(report_list, headers=[
    "Number of Points", "Verified Points", "Percentage", "Seconds Per Point (Mean)", "Username-Filename"]))

full_database = pd.concat(database_list)
full_database.to_file(database_filename, driver='GPKG', layer='validation') 
full_database.head()

  Number of Points    Verified Points    Percentage    Seconds Per Point (Mean)  Username-Filename
------------------  -----------------  ------------  --------------------------  -------------------
              6849                  5          0.07                      2.6895  jacaraba


Unnamed: 0,system_ind,y,x,ID,numBreaks,.geo,Group,verified,operator,burnt,confidence,date,seconds_taken,geometry,username,short_filename
0,0,5.590268213243818,-69.49037746246758,0,0.0,"{""type"":""MultiPoint"",""coordinates"":[]}",Group1,True,Cropland (herbaceous),0,1,2024-10-18 12:29:04.038476,3.3705,"POLYGON ((-69.49051 5.59013, -69.49051 5.59040...",jacaraba,ValidationPolygonColumbia-EO-Validation
1,1,-0.280635410843089,-74.06498427496926,1,0.0,"{""type"":""MultiPoint"",""coordinates"":[]}",Group2,False,Cropland (herbaceous),0,1,,,"POLYGON ((-74.06512 -0.28077, -74.06512 -0.280...",jacaraba,ValidationPolygonColumbia-EO-Validation
2,2,8.769448920575387,-74.79074191309566,2,0.0,"{""type"":""MultiPoint"",""coordinates"":[]}",Group2,False,Cropland (herbaceous),0,1,,,"POLYGON ((-74.79088 8.76931, -74.79088 8.76958...",jacaraba,ValidationPolygonColumbia-EO-Validation
3,3,2.41193965661879,-68.14512028684788,3,0.0,"{""type"":""MultiPoint"",""coordinates"":[]}",Group1,True,Urban,0,1,2024-10-18 12:29:09.007764,2.3015,"POLYGON ((-68.14526 2.41180, -68.14526 2.41207...",jacaraba,ValidationPolygonColumbia-EO-Validation
4,4,7.765432183136857,-72.7911939319179,4,0.0,"{""type"":""MultiPoint"",""coordinates"":[]}",Group1,True,Wetland,0,1,2024-10-18 12:29:16.005267,2.2609,"POLYGON ((-72.79133 7.76530, -72.79133 7.76557...",jacaraba,ValidationPolygonColumbia-EO-Validation
