In [1]:
import polars as pl

In [2]:
revisions = (
    pl
    .read_parquet(
        '/Users/lowell/Projects/bls-revisions/data/revisions.parquet'
    )
    .with_columns(
        current=pl.lit(0, pl.UInt8)
    )
)


In [3]:
releases = (
    pl
    .read_parquet(
        '/Users/lowell/Projects/bls-estimates/data/releases.parquet'
    )
    .with_columns(
        current=pl.lit(1, pl.UInt8)
    )
)

In [4]:
series = (
    pl.concat([
        releases,
        revisions
    ])
    .with_columns(
        industry_type=pl.when(pl.col('industry_code').eq('00'))
                        .then(pl.lit('national', pl.Utf8))
                        .otherwise(pl.col('industry_type'))
    )
    .sort(
        'source',
        'seasonally_adjusted',
        'geographic_type', 'geographic_code',
        'industry_type', 'industry_code',
        'ref_date', 'vintage_date',
        'revision', 'benchmark_revision',
        'current'
    )
    .group_by(
        'source',
        'seasonally_adjusted',
        'geographic_type', 'geographic_code',
        'industry_type', 'industry_code',
        'ref_date', 'vintage_date',
        'revision', 'benchmark_revision',
        maintain_order=True
    )
    .agg(
        employment=pl.col('employment').last()
    )
    .write_parquet(
        '/Users/lowell/Projects/alt_nfp/data/raw/vintages/vintage_store',
        use_pyarrow=True,
        pyarrow_options={
            'partition_cols': ['source', 'seasonally_adjusted']
        }
    )
)