# xr_fresh vs ts_raster

This notebook compares the performance of `xr_fresh` and `ts_raster` for processing large raster datasets. The goal is to evaluate which library is more efficient in terms of execution time and memory usage.

In [1]:
import timeit

import matplotlib.pyplot as plt
import tsraster.prep as tr
from tsraster.calculate import calculateFeatures

start = timeit.default_timer()


path = "../xr_fresh/data/temperature/"
rasters = tr.image_to_series(path)


fc_parameters = {
    "mean": None,
    "maximum": None,
    "minimum": None,
    "quantile": [{"q": 0.15}, {"q": 0.95}],
}


ts_features = calculateFeatures(path, parameters=fc_parameters, reset_df=True)

end = timeit.default_timer()
print(f"Execution time: {end - start:.4f} seconds")



df: ../xr_fresh/data/temperature/my_df.csv


Feature Extraction: 100%|██████████| 60/60 [02:25<00:00,  2.43s/it]


features:../xr_fresh/data/temperature_features/extracted_features.csv
tif:../xr_fresh/data/temperature_features/extracted_features.tiff
Execution time: 245.2262 seconds


In [1]:
import os
from datetime import datetime
import pandas as pd
from glob import glob
from xr_fresh.extractors_series import extract_features_series
import tempfile
from pathlib import Path
import timeit

temp_dir = Path(tempfile.mkdtemp())

start2 = timeit.default_timer()


# change working directory
os.chdir("../xr_fresh/data/temperature")

band_name = "tmx"  # used to rename outputs
file_glob = f"tmx*tif"
strp_glob = f"tmx-%Y%m.tif"

dates = sorted(
    datetime.strptime(string, strp_glob) for string in sorted(glob(file_glob))
)
files = sorted(glob(file_glob))
# print dates and files in a table
print(pd.DataFrame({"date": dates, "file": files}))

fc_parameters = {
    "mean": [{}],
    "maximum": [{}],
    "minimum": [{}],
    "quantile": [{"q": 0.15}, {"q": 0.95}],
}


# Extract features from the geospatial time series
extract_features_series(files, fc_parameters, band_name, temp_dir)


end2 = timeit.default_timer()
print(f"Execution time: {end2 - start2:.4f} seconds")

  from .autonotebook import tqdm as notebook_tqdm


Jax is running on: gpu
        date            file
0 2005-01-01  tmx-200501.tif
1 2005-02-01  tmx-200502.tif
2 2005-03-01  tmx-200503.tif
3 2006-01-01  tmx-200601.tif
4 2006-02-01  tmx-200602.tif
5 2006-03-01  tmx-200603.tif
6 2007-01-01  tmx-200701.tif
7 2007-02-01  tmx-200702.tif
8 2007-03-01  tmx-200703.tif


100%|██████████| 6/6 [00:00<00:00, 34.56it/s]
100%|██████████| 6/6 [00:00<00:00, 36.89it/s]
100%|██████████| 6/6 [00:00<00:00, 36.47it/s]
100%|██████████| 6/6 [00:00<00:00, 20.44it/s]
100%|██████████| 6/6 [00:00<00:00, 947.04it/s]

Execution time: 1.5252 seconds





In [15]:
print("ts_raster execution time: 245.23")
print(f"xr_fresh execution time: {end2 - start2:.2f}")
print(f"Speedup: {245.2262 / (end2 - start2):.2f}x")

ts_raster execution time: 245.23
xr_fresh execution time: 1.53
Speedup: 160.79x
