In [1]:
!pip install statsmodels patsy



In [10]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
from tqdm import tqdm


In [3]:
# Create array of category names as they appear in the detections data. See paper for details of each category.
categories = ['car', 'person', 'trotro', 'stall', 'truck', 'stove', 'motorcycle', 'vendor', 'lorry', 'umbrella', 'bus', 'trash', 'taxi', 'van', 'debris', 'loudspeaker', 'bowl', 'food', 'animal', 'bicycle']

# Column names in the data frame for the number of counts of each category type in an image.
count_cols = [cat+'_counts' for cat in categories]

super_count_cols = ['people'+'_counts', 'small_vehicles'+'_counts', 'two_wheelers'+'_counts', 'large_vehicles'+'_counts', 'refuse'+'_counts', 'market'+'_counts', 'animal'+'_counts']

all_count_cols = count_cols + super_count_cols

vehicle_categories = ['car', 'trotro', 'truck', 'motorcycle', 'lorry', 'bus', 'taxi', 'van', 'bicycle']

# Define super categories
super_categories = {
    'people': ['person', 'vendor'],
    'small_vehicles': ['car', 'taxi', 'truck'],
    'two_wheelers': ['bicycle', 'motorcycle'],
    'large_vehicles': ['trotro', 'van', 'lorry', 'bus'],
    'refuse': ['trash', 'debris'],
    'market': ['umbrella', 'stall', 'bowl', 'food'],
    'animal': ['animal']
}

In [39]:
# Load the model coefficients from the CSV file
site = 'ASH'
model_df = pd.read_csv(f'./{site}_model_coefficients.csv')



['inflate_const', 'inflate_hour_1', 'inflate_hour_2', 'inflate_hour_3', 'inflate_hour_4', 'inflate_hour_5', 'inflate_hour_6', 'inflate_hour_7', 'inflate_hour_8', 'inflate_hour_9', 'inflate_hour_10', 'inflate_hour_11', 'inflate_hour_12', 'inflate_hour_13', 'inflate_hour_14', 'inflate_hour_15', 'inflate_hour_16', 'inflate_hour_17', 'inflate_hour_18', 'inflate_hour_19', 'inflate_hour_20', 'inflate_hour_21', 'inflate_hour_22', 'inflate_hour_23', 'inflate_day_2', 'inflate_day_3', 'inflate_day_4', 'inflate_day_5', 'inflate_day_6', 'inflate_day_7', 'inflate_week_2', 'inflate_week_3', 'inflate_week_4', 'inflate_week_5', 'inflate_week_6', 'inflate_week_7', 'inflate_week_8', 'inflate_week_9', 'inflate_week_10', 'inflate_week_11', 'inflate_week_12', 'inflate_week_13', 'inflate_week_14', 'inflate_week_15', 'inflate_week_16', 'inflate_week_17', 'inflate_week_18', 'inflate_week_19', 'inflate_week_20', 'inflate_week_21', 'inflate_week_22', 'inflate_week_23', 'inflate_week_24', 'inflate_week_25', 'inf

In [11]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import glob

# Function to calculate the confidence intervals and coefficients
def calculate_effects_and_ci(row):
    coef = row['estimate']
    lower = row['conf.low']
    upper = row['conf.high']
    return coef, lower, upper

# Function to plot effects for the site with enhanced formatting
def plot_effects_single_site(model_df, x_labels, title, ref_class_label, site, super_category):
    plt.figure(figsize=(12, 6))
    
    estimates, lower_bounds, upper_bounds = zip(*model_df.apply(calculate_effects_and_ci, axis=1))
    
    x_labels_mod = [ref_class_label] + x_labels
    
    estimates = [1] + list(estimates)
    lower_bounds = [1] + list(lower_bounds)
    upper_bounds = [1] + list(upper_bounds)
    
    # Plotting the effects with error bars
    plt.errorbar(
        range(len(x_labels_mod)), 
        estimates, 
        yerr=[np.array(estimates) - np.array(lower_bounds), np.array(upper_bounds) - np.array(estimates)], 
        fmt='o', 
        capsize=5,
        label=f'Site: {site}'
    )
    
    # Adding a thicker black line at y = 1
    plt.axhline(y=1, color='black', linewidth=2, linestyle='--')
    
    plt.xticks(ticks=range(len(x_labels_mod)), labels=x_labels_mod, rotation=45)
    plt.xlabel('Categories')
    plt.ylabel('Multiplicative Effect on Counts')
    plt.title(title)
    plt.grid(True)
    plt.legend()
    
    # Create directory if it doesn't exist
    output_dir = f'./results/time_series/{site}/{super_category}/'
    os.makedirs(output_dir, exist_ok=True)
    
    # Save the figure
    plt.savefig(f'{output_dir}/{title.replace(" ", "_")}.png', bbox_inches='tight')
    plt.close()

# Loop over all files matching the pattern *_conditional_model_coefficients.csv
for filepath in tqdm(glob.glob('*_conditional_model_coefficients.csv')):
    # Extract the site ID from the filename
    site = os.path.basename(filepath).split('_')[0]
    
    # Load the conditional model coefficients from the CSV file
    model_df = pd.read_csv(filepath)  # Ensure the correct path
    
    # Variables to plot
    hour_vars = [var for var in model_df['term'] if 'hour' in var]
    day_vars = [var for var in model_df['term'] if 'day' in var]
    year_vars = [var for var in model_df['term'] if 'year' in var]
    week_vars = [var for var in model_df['term'] if 'week' in var]
    
    # X-axis labels
    hour_labels = [f'Hour {i}' for i in range(1, 24)]
    day_labels = ['Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']  # Assuming 'Mon' is the reference category
    year_labels = ['2020', '2021', '2022', '2023', '2024']  # Assuming '2019' is the reference category
    week_labels = [f'Week {i}' for i in range(2, 54)]  # Assuming 'Week 1' is the reference category
    
    # Ensure the number of labels matches the number of data points
    hour_labels_filtered = [label for var, label in zip(hour_vars, hour_labels) if var in model_df['term'].values]
    day_labels_filtered = [label for var, label in zip(day_vars, day_labels) if var in model_df['term'].values]
    year_labels_filtered = [label for var, label in zip(year_vars, year_labels) if var in model_df['term'].values]
    week_labels_filtered = [label for var, label in zip(week_vars, week_labels) if var in model_df['term'].values]
    
    # Super category (for folder structure, assuming it's 'people' as in your original code)
    super_category = 'people'
    
    # Plot each category
    if hour_vars:
        plot_effects_single_site(model_df[model_df['term'].isin(hour_vars)], hour_labels_filtered, 'Effect of Hour of Day on People Counts', 'Hour 0', site, super_category)
    
    if day_vars:
        plot_effects_single_site(model_df[model_df['term'].isin(day_vars)], day_labels_filtered, 'Effect of Day of Week on People Counts', 'Mon', site, super_category)
    
    if year_vars:
        plot_effects_single_site(model_df[model_df['term'].isin(year_vars)], year_labels_filtered, 'Effect of Year on People Counts', '2019', site, super_category)
    
    if week_vars:
        plot_effects_single_site(model_df[model_df['term'].isin(week_vars)], week_labels_filtered, 'Effect of Week on People Counts', 'Week 1', site, super_category)


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:12<00:00,  1.26s/it]
