In [None]:
import os
os.chdir("..")

In [None]:

import arviz as az
import matplotlib.pyplot as plt
from dotenv import load_dotenv
import os

load_dotenv()

az.style.use("arviz-darkgrid")


In [None]:
# Initialize a dictionary to group InferenceData objects by NAICS code and FIPS code
naics_groups = {}

# Iterate over files in the specified directory
for file in os.listdir("data/processed/"):
    if not file.endswith(".nc"):
        continue

    filepath = os.path.join("data/processed/", file)
    print(f"Processing: {file}")

    # Load the InferenceData object from the NetCDF file
    idata = az.from_netcdf(filepath)

    # Extract identifiers from the filename
    parts = file[:-3].split("_")
    fips = parts[1]    # FIPS code
    naics = parts[2]   # NAICS code
    varname = f"Kaits for Naics {naics}"

    # Rename the variable to a more descriptive name
    idata = idata.rename({"log_k_index": varname})

    # Group the InferenceData objects by NAICS and FIPS codes
    if naics not in naics_groups:
        naics_groups[naics] = {}
    naics_groups[naics][fips] = idata

# Filter to include only NAICS codes with more than one FIPS group
valid_naics = [n for n in naics_groups if len(naics_groups[n]) > 1]

# Determine the number of rows and columns for the subplots
n = len(valid_naics)
n_cols = 2
n_rows = (n + 1) // n_cols + (1 if (n + 1) % n_cols else 0)

# Create a figure with subplots
fig, axes = plt.subplots(n_rows, n_cols, figsize=(12, 3 * n_rows))
axes = axes.flatten()

# Initialize a variable to store legend handles
legend_handles = None

# Iterate over the valid NAICS codes and their corresponding FIPS groups
for idx, naics in enumerate(valid_naics):
    varname = f"Kaits for Naics {naics}"
    fips_groups = naics_groups[naics]

    # Collect InferenceData objects and labels for plotting
    idatas = [fips_groups[fips] for fips in sorted(fips_groups) if varname in fips_groups[fips].posterior]
    labels = [f"FIPS {fips}" for fips in sorted(fips_groups) if varname in fips_groups[fips].posterior]

    # Skip if no valid data is available for this NAICS code
    if not idatas:
        axes[idx].axis("off")
        continue

    # Plot the posterior density
    ax = axes[idx]
    az.plot_density(
        idatas,
        data_labels=labels,
        var_names=[varname],
        shade=0.2,
        ax=ax,
        show=False
    )

    # Set the title for the subplot
    ax.set_title(f"NAICS {naics}", fontsize=14)

    # Remove the legend from the subplot
    ax.legend_.remove()

    # Store the legend handles for the global legend
    if legend_handles is None:
        handles, labels = ax.get_legend_handles_labels()
        legend_handles = (handles, labels)

# Hide any unused subplots
for ax in axes[len(valid_naics):]:
    ax.axis("off")

# Add a global legend to the figure
if legend_handles:
    fig.legend(
        *legend_handles,
        loc="upper right",
        ncol=2,
        fontsize=12,
        frameon=True
    )

# Set the overall title for the figure
fig.suptitle("Posterior Distributions by FIPS Code per NAICS", fontsize=16)

# Adjust layout to prevent overlap
plt.tight_layout(rect=[0, 0.05, 1, 0.95])

# Display the plot
plt.show()


In [None]:
import os
import arviz as az
import matplotlib.pyplot as plt

az.style.use("arviz-doc")

# Group InferenceData objects by NAICS code, then by FIPS
naics_groups = {}

for file in os.listdir("data/processed/"):
    if not file.endswith(".nc"):
        continue

    filepath = os.path.join("data/processed/", file)
    print(file)
    idata = az.from_netcdf(filepath)
    parts = file[:-3].split("_")

    fips = parts[1]    # FIPS code
    naics = parts[2]   # NAICS code
    varname = f"Kaits for Naics {naics}"

    idata = idata.rename({"log_k_index": varname})

    if naics not in naics_groups:
        naics_groups[naics] = {}
    naics_groups[naics][fips] = idata

# Keep only NAICS codes with more than one FIPS group
valid_naics = [n for n in naics_groups if len(naics_groups[n]) > 1]

n = len(valid_naics)
n_cols = 2
n_rows = (n + 1) // n_cols

fig, axes = plt.subplots(n_rows, n_cols, figsize=(12, 3 * n_rows))
axes = axes.flatten()

legend_handles = None

for idx, naics in enumerate(valid_naics):
    varname = f"Kaits for Naics {naics}"
    fips_groups = naics_groups[naics]

    idatas = [fips_groups[fips] for fips in sorted(fips_groups)]
    labels = [f"FIPS {fips}" for fips in sorted(fips_groups)]

    ax = axes[idx]

    az.plot_density(
        idatas,
        data_labels=labels,
        var_names=[varname],
        shade=0.2,
        ax=ax,
        show=False
    )

    ax.set_title(f"NAICS {naics}")
    ax.legend_.remove()  # Remove subplot legend

    if legend_handles is None:
        handles, labels = ax.get_legend_handles_labels()
        legend_handles = (handles, labels)

# Hide unused subplots
for ax in axes[len(valid_naics):]:
    ax.axis("off")

# Global legend
if legend_handles:
    fig.legend(
        *legend_handles,
        loc="upper right",
        ncol=2,
        fontsize=12,
        frameon=True
    )

fig.suptitle("Posterior Distributions by FIPS Code per NAICS", fontsize=16)
plt.tight_layout(rect=[0, 0.05, 1, 0.95])
plt.show()


In [None]:
import arviz as az
import matplotlib.pyplot as plt
import os
import re
from collections import defaultdict

# Folder with netCDF files
folder = "data/processed"

# Pattern to extract FIPS and NAICS
pattern = re.compile(r"results_(\d+)_(\d+)\.nc")

# Collect files grouped by NAICS code
naics_dict = defaultdict(list)

for filename in os.listdir(folder):
    match = pattern.match(filename)
    if match:
        fips, naics = match.groups()
        filepath = os.path.join(folder, filename)
        idata = az.from_netcdf(filepath)
        naics_dict[naics].append((f"FIPS {fips}", idata))

# Plot each NAICS group
for naics, data_list in naics_dict.items():
    labels, idata_list = zip(*data_list)
    
    axes = az.plot_density(
        idata_list,
        data_labels=labels,
        var_names=["log_k_index"],
        shade=0.2,
    )
    
    fig = axes.flatten()[0].get_figure()
    fig.suptitle(f"94% High Density Intervals for log_k_index (NAICS {naics})")
    plt.show()


In [None]:
idata1 = az.from_netcdf("data/test/processed/results_36_21.nc")
idata2 = az.from_netcdf("data/test/processed/results_25_21.nc")
idata3 = az.from_netcdf("data/test/processed/results_37_21.nc")
idata4 = az.from_netcdf("data/test/processed/results_29_21.nc")

centered_data = az.load_arviz_data("centered_eight")
non_centered_data = az.load_arviz_data("non_centered_eight")

axes = az.plot_density(
    [idata1, idata2, idata3, idata4],
    data_labels=["Centered", "Non-Centered", "dest", "asdf"],
    var_names=["log_k_index"],
    shade=0.2,
)

fig = axes.flatten()[0].get_figure()
fig.suptitle("94% High Density Intervals for Theta")

plt.show()