In [4]:
# Process Phase - Logging Setup
import logging
import sys

LOG_FILE = "claude_nursing_ratio.log"

logging.basicConfig(
    level=logging.INFO,
    filename=LOG_FILE,
    filemode="w",  # Overwrite previous logs on each run
    format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger()

# Redirect print statements to the logger
class LoggerWriter:
    def __init__(self, level):
        self.level = level
    def write(self, message):
        if message.strip():
            self.level(message.strip())
    def flush(self):
        pass

sys.stdout = LoggerWriter(logger.info)
sys.stderr = LoggerWriter(logger.error)

print("=== Starting Process Phase Notebook: Logging Setup Complete ===")


In [5]:
# Process Phase - Cell 1: Load Prepared Datasets

import os
import pandas as pd

# Define the path where the prepared datasets are stored
PREPARED_PATH = "./data/prepared/"

# Dictionary mapping dataset names to their prepared file names
files = {
    "pbj_nurse": "pbj_nurse_prepared.parquet",
    "pbj_non_nurse": "pbj_non_nurse_prepared.parquet",
    "qrp_provider": "qrp_provider_prepared.parquet",
    "nh_survey": "nh_survey_prepared.parquet",
    "nh_quality_mds": "nh_quality_mds_prepared.parquet",
    "nh_ownership": "nh_ownership_prepared.parquet",
    "nh_citations": "nh_citations_prepared.parquet"
}

loaded_datasets = {}
for key, filename in files.items():
    file_path = os.path.join(PREPARED_PATH, filename)
    try:
        loaded_datasets[key] = pd.read_parquet(file_path)
        print(f"Loaded {key} with shape {loaded_datasets[key].shape}")
    except Exception as e:
        print(f"Error loading {key}: {e}")

# Optionally, check the columns of one dataset to verify
print("Columns in pbj_nurse:", loaded_datasets['pbj_nurse'].columns.tolist())


In [6]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def calculate_daily_ratios(df_nurse, df_non_nurse):
    """
    Calculate daily temporary staffing ratios and total hours for nursing and non-nursing roles
    """
    # Process nursing data
    nursing_grouped = df_nurse.groupby('workdate').agg({
        'hrs_rn_ctr': 'sum',
        'hrs_rn_emp': 'sum',
        'hrs_lpn_ctr': 'sum',
        'hrs_lpn_emp': 'sum',
        'hrs_cna_ctr': 'sum',
        'hrs_cna_emp': 'sum'
    }).reset_index()
    
    # Calculate nursing ratios and total hours
    nursing_grouped['nursing_ratio'] = (
        (nursing_grouped['hrs_rn_ctr'] + nursing_grouped['hrs_lpn_ctr'] + nursing_grouped['hrs_cna_ctr']) /
        (nursing_grouped['hrs_rn_ctr'] + nursing_grouped['hrs_rn_emp'] +
         nursing_grouped['hrs_lpn_ctr'] + nursing_grouped['hrs_lpn_emp'] +
         nursing_grouped['hrs_cna_ctr'] + nursing_grouped['hrs_cna_emp']) * 100
    )
    
    nursing_grouped['nursing_total_hours'] = (
        nursing_grouped['hrs_rn_ctr'] + nursing_grouped['hrs_rn_emp'] +
        nursing_grouped['hrs_lpn_ctr'] + nursing_grouped['hrs_lpn_emp'] +
        nursing_grouped['hrs_cna_ctr'] + nursing_grouped['hrs_cna_emp']
    )
    
    # Process non-nursing data
    contract_cols = [col for col in df_non_nurse.columns if col.endswith('_ctr')]
    employee_cols = [col for col in df_non_nurse.columns if col.endswith('_emp')]
    
    non_nursing_grouped = df_non_nurse.groupby('workdate').agg({
        **{col: 'sum' for col in contract_cols},
        **{col: 'sum' for col in employee_cols}
    }).reset_index()
    
    # Calculate non-nursing ratios and total hours
    non_nursing_grouped['non_nursing_ratio'] = (
        non_nursing_grouped[contract_cols].sum(axis=1) /
        (non_nursing_grouped[contract_cols].sum(axis=1) + 
         non_nursing_grouped[employee_cols].sum(axis=1)) * 100
    )
    
    non_nursing_grouped['non_nursing_total_hours'] = (
        non_nursing_grouped[contract_cols].sum(axis=1) + 
        non_nursing_grouped[employee_cols].sum(axis=1)
    )
    
    return nursing_grouped, non_nursing_grouped

def create_comparison_plots(nursing_grouped, non_nursing_grouped):
    """
    Create visualization comparing nursing and non-nursing staffing metrics
    """
    # Create figure with secondary y-axis
    fig = make_subplots(rows=2, cols=1, 
                       subplot_titles=('Temporary Staffing Ratios', 'Daily Total Hours'),
                       vertical_spacing=0.15,
                       row_heights=[0.7, 0.3])

    # Add traces for ratios (top plot)
    fig.add_trace(
        go.Scatter(x=nursing_grouped['workdate'], 
                  y=nursing_grouped['nursing_ratio'],
                  name="Direct Care (Nursing)",
                  line=dict(color='blue')),
        row=1, col=1
    )

    fig.add_trace(
        go.Scatter(x=non_nursing_grouped['workdate'], 
                  y=non_nursing_grouped['non_nursing_ratio'],
                  name="Support (Non-Nursing)",
                  line=dict(color='green')),
        row=1, col=1
    )

    # Add traces for total hours (bottom plot)
    fig.add_trace(
        go.Scatter(x=nursing_grouped['workdate'], 
                  y=nursing_grouped['nursing_total_hours'],
                  name="Direct Care Total Hours",
                  line=dict(color='blue', dash='dot')),
        row=2, col=1
    )

    fig.add_trace(
        go.Scatter(x=non_nursing_grouped['workdate'], 
                  y=non_nursing_grouped['non_nursing_total_hours'],
                  name="Support Total Hours",
                  line=dict(color='green', dash='dot')),
        row=2, col=1
    )

    # Update layout
    fig.update_layout(
        title='Temporary Staffing Analysis - Q2 2024',
        height=800,
        showlegend=True,
        hovermode='x unified'
    )

    # Update y-axes labels
    fig.update_yaxes(title_text="Temporary Staffing Ratio (%)", row=1, col=1)
    fig.update_yaxes(title_text="Total Hours", row=2, col=1)

    return fig

def print_summary_statistics(nursing_grouped, non_nursing_grouped):
    """
    Print summary statistics for both nursing and non-nursing metrics
    """
    print("\nDirect Care (Nursing) Statistics:")
    print(f"Average Ratio: {nursing_grouped['nursing_ratio'].mean():.2f}%")
    print(f"Minimum Ratio: {nursing_grouped['nursing_ratio'].min():.2f}%")
    print(f"Maximum Ratio: {nursing_grouped['nursing_ratio'].max():.2f}%")
    print(f"Average Daily Hours: {nursing_grouped['nursing_total_hours'].mean():.2f}")
    
    print("\nSupport Staff (Non-Nursing) Statistics:")
    print(f"Average Ratio: {non_nursing_grouped['non_nursing_ratio'].mean():.2f}%")
    print(f"Minimum Ratio: {non_nursing_grouped['non_nursing_ratio'].min():.2f}%")
    print(f"Maximum Ratio: {non_nursing_grouped['non_nursing_ratio'].max():.2f}%")
    print(f"Average Daily Hours: {non_nursing_grouped['non_nursing_total_hours'].mean():.2f}")

# Main analysis code
def run_staffing_analysis(df_nurse, df_non_nurse):
    """
    Run the complete staffing analysis
    """
    # Calculate daily ratios
    nursing_grouped, non_nursing_grouped = calculate_daily_ratios(df_nurse, df_non_nurse)
    
    # Create and display plots
    fig = create_comparison_plots(nursing_grouped, non_nursing_grouped)
    fig.show()
    
    # Print summary statistics
    print_summary_statistics(nursing_grouped, non_nursing_grouped)
    
    return nursing_grouped, non_nursing_grouped

# Example usage:
nursing_grouped, non_nursing_grouped = run_staffing_analysis(
    loaded_datasets['pbj_nurse'],
    loaded_datasets['pbj_non_nurse']
)