# Decrease size of data

In [2]:
import harptools as ht
import harp
import json
import os
import glob

ROOT = "../../Data/"
# Open aoi.json
with open(f"{ROOT}aoi.json", "r") as f:
    aoi = json.load(f)
ROOT = "E:/thesis dump/"

In [5]:
def simple_operations(product: str, spatial_extent: list, filter_vars: bool = False) -> str:
    """
    Returns a string of operations to be used in the harp.import_product function
    This function needs to be changed if a user wants to use different operations or products.
    The product names in harp are different from the product names in the S5P data. And need to be changed manually to ensure accuracy.
    # product names: https://stcorp.github.io/harp/doc/html/ingestions/index.html#sentinel-5p-products
    """
    lon_min, lat_min, lon_max, lat_max = spatial_extent
    variables = {
    "HCHO": ["HCHO_slant_column_number_density", "tropospheric_HCHO_column_number_density_validity"],
    "NO2": ["NO2_slant_column_number_density", "tropospheric_NO2_column_number_density_validity"],
    "SO2": ["SO2_slant_column_number_density", "SO2_column_number_density_validity"],
    "misc": ["datetime_start", "datetime_length", "latitude", "longitude", "cloud_fraction", 
             "surface_meridional_wind_velocity", "surface_zonal_wind_velocity", "latitude_bounds", "longitude_bounds",
             "solar_zenith_angle", "sensor_zenith_angle", "solar_azimuth_angle", "sensor_azimuth_angle"],
    }
    
    ops = [f"latitude>={lat_min}",f"latitude<={lat_max}",f"longitude>={lon_min}",f"longitude<={lon_max}"]
    
    if filter_vars:
        ops.append(f"keep({','.join(variables['HCHO']+variables['NO2']+variables['SO2']+variables['misc'])})")
    
    operations = ";".join(ops)
    return operations

In [6]:
for aoi_name, area_of_interest in aoi.items():
    # load tropomi product
    # get list of all files with glob
    path = f"{ROOT}EODATA/{aoi_name}/Sentinel-5P/TROPOMI/L3__Merged_/"
    operations = simple_operations("NO2", area_of_interest, filter_vars=True)

    files = glob.glob(os.path.join(path, "*/*/*/*.nc"))
    for file in files:
        tropomi_data = harp.import_product(file, operations=operations)
        # export product
        # print(file)
        # change EODATA to TinyEODATA
        export_path = file.replace("EODATA", "TinyEODATA")
        os.makedirs(os.path.dirname(export_path), exist_ok=True)
        harp.export_product(tropomi_data, export_path)
    