# Earth Observation Validation Report Notebook

The following notebook provides a report on the progress of validations when performed
using the Teams option. The second portion of this notebook generates the overall aggregated database.

In [None]:
import os
import pandas as pd
from glob import glob
import geopandas as gpd
from pathlib import Path
from datetime import date
from tabulate import tabulate

## Validation Report

In [None]:
# specify the data_dir option used in the ValidationDashboard notebook
data_dir = '/efs/projects/3sl/validation'

In [None]:
report_list = []
for username in os.listdir(data_dir):
    
    if username in [".ipynb_checkpoints", "original_points"]:
        continue
    
    filenames = glob(os.path.join(data_dir, username, 'data', 'Tappan', '*.gpkg'))
    
    for filename in filenames:
        
        try:
            gdf = gpd.read_file(filename)
            report_list.append([gdf.shape[0], gdf['verified'].sum(), Path(filename).stem])
        except:
            report_list.append(["broken file", 0, Path(filename).stem])

print (tabulate(report_list, headers=["Number of Points", "Verified Points", "Username-Filename"]))

# Validation Database Generation

In [None]:
# specify the database_filename option used to name the Validation Database, ends with .gpkg
database_filename = f'validation-database-{date.today()}.gpkg'

In [None]:
report_list = []
database_list = []
for username in os.listdir(data_dir):
    
    if username in [".ipynb_checkpoints", "original_points"]:
        continue
    
    filenames = glob(os.path.join(data_dir, username, 'data', 'Tappan', '*.gpkg'))
    
    for filename in filenames:
        
        try:
            gdf = gpd.read_file(filename)
            report_list.append([gdf.shape[0], gdf['verified'].sum(), Path(filename).stem])
            username, short_filename = (Path(filename).stem).split('-')
            gdf['username'] = username
            gdf['short_filename'] = short_filename
            database_list.append(gdf)
        except:
            report_list.append(["broken file", 0, Path(filename).stem])

full_database = pd.concat(database_list)
full_database.to_file(database_filename, driver='GPKG', layer='validation') 
full_database.head()