In [1]:
%load_ext autoreload
%autoreload 2
%load_ext jupyter_black

In [2]:
from __future__ import annotations
import os

os.environ["PATH_TO_SEVIR"] = "/mnt/nuc/c/sevir"  # you can set the default path via an environment variable

import polars as pl
import pandas as pd

import sevir
from sevir.constants import DEFAULT_PATH_TO_SEVIR, DEFAULT_CATALOG, DEFAULT_DATA  # import path info
from sevir.constants import (
    IMG_TYPE,
    ID,
    TIME_UTC,
    EVENT_TYPE,
    EVENT_ID,
    EPISODE_ID,
    FILE_NAME,
    FILE_INDEX,
)  # import column names

SEVIR_CATALOG = os.path.join(DEFAULT_PATH_TO_SEVIR, DEFAULT_CATALOG)
SEVIR_DATA = os.path.join(DEFAULT_PATH_TO_SEVIR, DEFAULT_DATA)
assert os.path.exists(SEVIR_CATALOG) and os.path.exists(SEVIR_DATA)
print(SEVIR_CATALOG, SEVIR_DATA)

ModuleNotFoundError: No module named 'sevir'

In [None]:
%timeit pl.read_csv(SEVIR_CATALOG, use_pyarrow=True)
%timeit pd.read_csv(SEVIR_CATALOG, low_memory=False)

# reading data with polars

In [None]:
df = pl.read_csv(
    SEVIR_CATALOG,
    dtypes={ID: pl.Utf8, FILE_NAME: pl.Utf8, IMG_TYPE: pl.Utf8, TIME_UTC: pl.Datetime},
    use_pyarrow=True,
)
df.head()

# Filtering unwanted rows

In [None]:
img_types = ["vis", "ir069"]
df = df.filter(df[IMG_TYPE].is_in(img_types))
assert df[IMG_TYPE].n_unique() == len(set(img_types))
count = df.groupby(ID).count()
f_ids = count.filter(count["count"] >= len(img_types))[ID]
df = df.filter(df[ID].is_in(f_ids))
df.head()

The read function in the catalog module does some filtering and preprocessing to the data. To create absolute paths
exits to prevent downstream issues.

In [None]:
from sevir.constants import VIS, IR_069, IR_107

sevir.catalog.read(SEVIR_CATALOG, SEVIR_DATA, img_types=[VIS, IR_069, IR_107]).head()