In [None]:

from src import read_bls, read_census
from datetime import date
import pandas as pd

## AWS credentials are assumed to be configured in the environment

In [None]:
# Config for S3 bucket and file path
S3_BUCKET  = "rearc-raw-bucket-dev"
BLS_KEY    = "bls/pr/"
CENSUS_KEY = "census/"

s3 = boto3.client("s3")

In [None]:
# Importing read_ble and read_census function to extract bls and census data
census_folder_date = date.today().isoformat()

bls_df = read_bls(S3_BUCKET, BLS_KEY, "pr.data.0.Current")
census_df = read_census(S3_BUCKET, CENSUS_KEY, census_folder_date, "census.json")

bls_df.head(), census_df.head()

In [None]:
# Generating mean and std for census data
census_2013_2018 = census_df[
    (census_df["year"] >= 2013) &
    (census_df["year"] <= 2018)
    ].copy()

census_mean = round(float(census_2013_2018["population"].mean()), 2)
census_std  = round(float(census_2013_2018["population"].std()), 2)
census_stats = pd.DataFrame({
    "metric": ["mean_population_2013_2018", "std_population_2013_2018"],
    "value": [census_mean, census_std]
})

census_stats.head()

In [None]:
# Generating max total value of series for bls
bls_grouped = (bls_df
                   .groupby(["series_id", "year"], as_index=False)["value"]
                   .sum()
                   .rename(columns={"value": "total_value"})
               )

bls_best_years = (bls_grouped
                      .loc[bls_grouped.groupby("series_id")["total_value"]
                      .idxmax()]
                      .reset_index(drop=True)
                      .sort_values(["series_id", "year", "total_value"], ascending=[True, False, False])
                  )

best_years.head()

In [None]:
# Join census and bls data
bls_6032_q1 = bls_data.loc[
                (bls_data["series_id"].str.strip() == "PRS30006032") &
                (bls_data["year"] == 2018) &
                (bls_data["period"].str.strip() == "Q01"),
                ["series_id", "year", "period", "value"]
            ]

bls_census_merged_df = pd.merge(
    census_data,
    bls_6032_q1,
    left_on="year",
    right_on="year",
    how="inner"
)

bls_census_merged_df.head()