In [19]:
import os
from pathlib import Path
import pandas as pd
from joblib import Parallel, delayed
import numpy as np
from rasterio.enums import Resampling

In [20]:
from Raster_benchmarking.generate_rasters import generate_nlm_rasters

### Running Benchmarks- Run all cells, and just change the num_files_to_open variable to desired benchmark

In [21]:
num_files_to_open = 100     #Change to desired input number <--------------!!!!!!!!!!!!

#### Generating Benchmark Data (seeded)

In [22]:
generate_nlm_rasters('./Raster_benchmarking/data', layers=num_files_to_open)

# Running DGGS Benchmark

In [23]:
from Raster_benchmarking.DGGS_funcs import h3index_raster

In [24]:
discrete_dir = Path('Raster_benchmarking/data/discrete')
continuous_dir = Path('Raster_benchmarking/data/continuous')
output_dir = Path('Raster_benchmarking/data/dggs')
os.makedirs(output_dir, exist_ok=True)
mode = lambda x: pd.Series.mode(x)[0]

### Indexing

##### Continuous files

In [25]:
%%time
_ = Parallel(n_jobs=-1)(delayed(h3index_raster)(file, output_dir, stem='continuous', operation=mode) for file in list(sorted(continuous_dir.glob('*.asc')))[:num_files_to_open])

CPU times: total: 141 ms
Wall time: 4.32 s


##### Discrete files

In [26]:
%%time
_ = Parallel(n_jobs=-1)(delayed(h3index_raster)(file, output_dir, stem='discrete', operation=mode) for file in list(sorted(discrete_dir.glob('*.asc')))[:num_files_to_open])

CPU times: total: 125 ms
Wall time: 4.84 s


### Join and Classify

In [27]:
from Raster_benchmarking.DGGS_funcs import classify, summing, final_plotting

In [28]:
c_files = list(output_dir.glob('continuous*.parquet'))
d_files = list(output_dir.glob('discrete*.parquet'))

##### Continuous files

In [29]:
cont_df = classify(c_files, num_files_to_open, scale=100)
cont_df = summing(cont_df)

Processing files: 100%|██████████| 100/100 [00:01<00:00, 53.26it/s]
Joining DataFrames: 100%|██████████| 99/99 [00:00<00:00, 2189.52it/s]


##### Discrete files

In [30]:
disc_df = classify(d_files, num_files_to_open, scale=100)
disc_df = summing(disc_df)

Processing files: 100%|██████████| 100/100 [00:01<00:00, 50.75it/s]
Joining DataFrames: 100%|██████████| 99/99 [00:00<00:00, 2106.20it/s]


# Running Raster Benchmark

In [31]:
from Raster_benchmarking.Raster_funcs import process_rasters, compute

In [32]:
os.makedirs('Raster_benchmarking/data/raster', exist_ok=True)

### Stacking & joining files

##### Continuous files

In [33]:
%%time
process_rasters('Raster_benchmarking/data/continuous', num_files_to_open , output_file=f'Raster_benchmarking/data/raster/continuous_{num_files_to_open}.tif', nodata=np.iinfo(np.uint8).max, dtype=np.uint8, resampling=Resampling.nearest)

CPU times: total: 156 ms
Wall time: 960 ms


##### Discrete files

In [34]:
%%time
process_rasters('Raster_benchmarking/data/discrete', num_files_to_open , output_file=f'Raster_benchmarking/data/raster/discrete_{num_files_to_open}.tif', nodata=np.iinfo(np.uint8).max, dtype=np.uint8, resampling=Resampling.nearest)

CPU times: total: 78.1 ms
Wall time: 333 ms


### Classification

##### Continuous files

In [35]:
%%time
compute(Path(f'Raster_benchmarking/data/raster/continuous_{num_files_to_open}.tif'), band_limit=num_files_to_open)

CPU times: total: 297 ms
Wall time: 6.77 s


##### Discrete files

In [36]:
%%time
compute(Path(f'Raster_benchmarking/data/raster/discrete_{num_files_to_open}.tif'), band_limit=num_files_to_open)

CPU times: total: 734 ms
Wall time: 7.52 s
