In [None]:
# Import python packages
import streamlit as st
import pandas as pd

# Get an active session from snowflake.snowpark
from snowflake.snowpark.context import get_active_session
session = get_active_session()


In [None]:
from snowflake.snowpark.functions import col, concat_ws, to_timestamp, date_trunc, count, lit

# Reading relevant columns, filtering for Kitchen
sf_df = session.table("vitalise_data_light_01").select(
    col('"HOUSE_NAME"').alias('HOUSE_NAME'),
    col('"DATE_COL"').alias('DATE_COL'),
    col('"TIME_COL"').alias('TIME_COL'),
    col('"PARENT_NAME"').alias('PARENT_NAME'),
    col('"ROOM_NAME"').alias('ROOM_NAME')
).filter(col('"ROOM_NAME"') == lit('Kitchen'))

# Create timestamp and truncate to hourly bucket
sf_df = sf_df.with_column(
    'DATETIME',
    to_timestamp(concat_ws(lit(' '), col('DATE_COL'), col('TIME_COL')))
)

sf_df = sf_df.with_column(
    'TIME_BUCKET',
    date_trunc('hour', col('DATETIME'))
)

# Aggregate by house, hour, and parent name
agg_df = sf_df.group_by('HOUSE_NAME', 'TIME_BUCKET', 'PARENT_NAME').agg(count('*').alias('COUNT'))


# Convert snowpark df to pandas df for mapping and pivoting
pdf = agg_df.to_pandas()
print("Aggregated data shape:", pdf.shape)
print(pdf.head())


# Mapping dictionary for all 147 parent names
parent_to_category = {
    #  Cooking & Domestic
    "Domestic sounds, home sounds": "Cooking/Domestic",
    "Toothbrush": "Cooking/Domestic",
    "Glass": "Cooking/Domestic",
    "Surface contact": "Cooking/Domestic",
    "Wood": "Cooking/Domestic",
    "Door": "Cooking/Domestic",
    "Sounds of things": "Cooking/Domestic",

    #  Water / Liquid
    "Water": "Water/Liquid",
    "Liquid": "Water/Liquid",
    "Steam": "Water/Liquid",
    "Pour": "Water/Liquid",
    "Splash, splatter": "Water/Liquid",

    #  Music / Instruments
    "Music": "Music",
    "Rock music": "Music",
    "Hip hop music": "Music",
    "Classical music": "Music",
    "Christian music": "Music",
    "Music of Asia": "Music",
    "Music of Latin America": "Music",
    "Keyboard (musical)": "Music",
    "Guitar": "Music",
    "Violin, fiddle": "Music",
    "Bowed string instrument": "Music",
    "Plucked string instrument": "Music",
    "Brass instrument": "Music",
    "Wind instrument, woodwind instrument": "Music",
    "Percussion": "Music",
    "Mallet percussion": "Music",
    "Snare drum": "Music",
    "Drum": "Music",
    "Drum kit": "Music",
    "Bell": "Music",
    "Chime": "Music",
    "Synthesizer": "Music",
    "Musical instrument": "Music",
    "Music role": "Music",
    "Music genre": "Music",
    "Music mood": "Music",
    "Sound reproduction": "Music",
    "Vocal music": "Music",
    "Recording": "Music",

    #  Speech / Humans
    "Speech": "Speech/Human",
    "Singing": "Speech/Human",
    "Human voice": "Speech/Human",
    "Hands": "Speech/Human",
    "Crying, sobbing": "Speech/Human",
    "Breathing": "Speech/Human",
    "Respiratory sounds": "Speech/Human",
    "Laughter": "Speech/Human",
    "Cough": "Speech/Human",
    "Digestive": "Speech/Human",
    "Human sounds": "Speech/Human",
    "Human locomotion": "Speech/Human",
    "Human group actions": "Speech/Human",
    "Chant": "Speech/Human",
    "Shout": "Speech/Human",

    #  Animals / Pets / Livestock
    "Dog": "Animals/Pets/Livestock",
    "Cat": "Animals/Pets/Livestock",
    "Bird": "Animals/Pets/Livestock",
    "Fowl": "Animals/Pets/Livestock",
    "Chicken, rooster": "Animals/Pets/Livestock",
    "Pig": "Animals/Pets/Livestock",
    "Goat": "Animals/Pets/Livestock",
    "Sheep": "Animals/Pets/Livestock",
    "Cattle, bovinae": "Animals/Pets/Livestock",
    "Horse": "Animals/Pets/Livestock",
    "Owl": "Animals/Pets/Livestock",
    "Crow": "Animals/Pets/Livestock",
    "Pigeon, dove": "Animals/Pets/Livestock",
    "Duck": "Animals/Pets/Livestock",
    "Goose": "Animals/Pets/Livestock",
    "Frog": "Animals/Pets/Livestock",
    "Bee, wasp, etc.": "Animals/Pets/Livestock",
    "Canidae, dogs, wolves": "Animals/Pets/Livestock",
    "Rodents, rats, mice": "Animals/Pets/Livestock",
    "Wild animals": "Animals/Pets/Livestock",
    "Livestock, farm animals, working animals": "Animals/Pets/Livestock",
    "Animal": "Animals/Pets/Livestock",

    #  Vehicles / Transport
    "Car": "Vehicles/Transport",
    "Motor vehicle (road)": "Vehicles/Transport",
    "Truck": "Vehicles/Transport",
    "Vehicle": "Vehicles/Transport",
    "Vehicle horn, car horn, honking": "Vehicles/Transport",
    "Aircraft engine": "Vehicles/Transport",
    "Aircraft": "Vehicles/Transport",
    "Train": "Vehicles/Transport",
    "Rail transport": "Vehicles/Transport",
    "Boat, Water vehicle": "Vehicles/Transport",
    "Non-motorized land vehicle": "Vehicles/Transport",
    "Bicycle": "Vehicles/Transport",
    "Engine": "Vehicles/Transport",
    "Emergency vehicle": "Vehicles/Transport",

    # Tools / Appliances
    "Tools": "Appliances/Tools",
    "Power tool": "Appliances/Tools",
    "Drill": "Appliances/Tools",
    "Mechanisms": "Appliances/Tools",
    "Clicking": "Appliances/Tools",
    "Doorbell": "Appliances/Tools",
    "Clock": "Appliances/Tools",
    "Camera": "Appliances/Tools",
    "Telephone": "Appliances/Tools",
    "Typing": "Appliances/Tools",
    "Sound equipment": "Appliances/Tools",
    "Light engine (high frequency)": "Appliances/Tools",

    #  Hazards / Safety
    "Fire": "Safety/Hazard",
    "Explosion": "Safety/Hazard",
    "Fireworks": "Safety/Hazard",
    "Gunshot, gunfire": "Safety/Hazard",
    "Siren": "Safety/Hazard",
    "Alarm": "Safety/Hazard",

    #  Environmental / Natural
    "Rain": "Environmental/Natural",
    "Wind": "Environmental/Natural",
    "Ocean": "Environmental/Natural",
    "Thunderstorm": "Environmental/Natural",
    "Acoustic environment": "Environmental/Natural",
    "Channel, environment and background": "Environmental/Natural",
    "Natural sounds": "Environmental/Natural",

    #  Other / Ambient (rare/uncategorized)
    "Miscellaneous sources": "Other/Ambient",
    "Generic impact sounds": "Other/Ambient",
    "Specific impact sounds": "Other/Ambient",
    "Background noise": "Other/Ambient",
    "Source-ambiguous sounds": "Other/Ambient",
    "Onomatopoeia": "Other/Ambient",
    "Brief tone": "Other/Ambient",
    "Other sourceless": "Other/Ambient",
    "Sine wave": "Other/Ambient",
    "Arrow": "Other/Ambient"
}


# Map parent to category
pdf['CATEGORY'] = pdf['PARENT_NAME'].map(parent_to_category).fillna('Other/Ambient')


#  Pivot data
pivot = pdf.pivot_table(
    index=['HOUSE_NAME', 'TIME_BUCKET'],
    columns='CATEGORY',
    values='COUNT',
    fill_value=0
).reset_index()

print(" Pivot table shape:", pivot.shape)
print(pivot.head())


#  Save or analyse
pivot.to_csv("kitchen_hourly_activity.csv", index=False)
print("Saved kitchen_hourly_activity.csv")


In [None]:
# COMPLETE Kitchen Activity Analysis

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

# GLOBAL SETTINGS
plt.rcParams.update({
    'font.size': 14,
    'axes.titlesize': 18,
    'axes.labelsize': 16,
    'xtick.labelsize': 13,
    'ytick.labelsize': 13,
    'legend.fontsize': 14,
    'figure.titlesize': 20,
    'figure.dpi': 100,
    'savefig.dpi': 300,
    'axes.linewidth': 1.5,
    'grid.linewidth': 0.8,
    'lines.linewidth': 2.5,
    'lines.markersize': 8
})

plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("husl")

def create_activity_heatmaps(df):
    """Creating heatmaps with proper units and readable text"""
    
    activity_cols = [col for col in df.columns if col not in ['TIME_BUCKET', 'HOUSE_NAME', 'HOUR', 'DATE', 'DAY_OF_WEEK']]
    
    fig, axes = plt.subplots(2, 2, figsize=(24, 18))
    
    # 1.1 Hour vs Category Heatmap
    hourly_avg = df.groupby('HOUR')[activity_cols].mean()
    
    sns.heatmap(hourly_avg.T, annot=True, fmt='.0f', cmap='YlOrRd', 
                ax=axes[0,0], 
                cbar_kws={'label': 'Mean Activity Count (Events/Hour)', 'shrink': 0.8},
                annot_kws={'size': 10})
    
    axes[0,0].set_title('Kitchen Activity Patterns by Hour of Day\n(Hourly Averages Across All Households)', 
                       fontsize=18, fontweight='bold', pad=25)
    axes[0,0].set_xlabel('Hour of Day (24-Hour Format)', fontsize=16, fontweight='bold')
    axes[0,0].set_ylabel('Activity Category', fontsize=16, fontweight='bold')
    
    # Proper hour labels
    axes[0,0].set_xticks(range(len(hourly_avg.index)))
    axes[0,0].set_xticklabels([f'{h:02d}:00' for h in hourly_avg.index], 
                             rotation=45, ha='right', fontsize=13)
    axes[0,0].tick_params(axis='y', labelsize=13)
    
    # 1.2 House vs Category with UNITS
    house_daily = df.groupby(['HOUSE_NAME', 'DATE'])[activity_cols].sum().groupby('HOUSE_NAME').mean()
    
    sns.heatmap(house_daily, annot=True, fmt='.0f', cmap='viridis', 
                ax=axes[0,1], 
                cbar_kws={'label': 'Mean Daily Activity Count (Events/Day)', 'shrink': 0.8},
                annot_kws={'size': 12, 'weight': 'bold'})
    
    axes[0,1].set_title('Average Daily Kitchen Activity by Household\n(Mean Events per Day)', 
                       fontsize=18, fontweight='bold', pad=25)
    axes[0,1].set_xlabel('Activity Category', fontsize=16, fontweight='bold')
    axes[0,1].set_ylabel('Household ID', fontsize=16, fontweight='bold')
    axes[0,1].tick_params(axis='x', rotation=45, labelsize=13)
    axes[0,1].tick_params(axis='y', labelsize=13)
    
    # 1.3 Day of week patterns
    dow_patterns = df.groupby('DAY_OF_WEEK')[activity_cols].mean()
    day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    dow_patterns = dow_patterns.reindex([day for day in day_order if day in dow_patterns.index])
    
    sns.heatmap(dow_patterns.T, annot=True, fmt='.0f', cmap='plasma', 
                ax=axes[1,0], 
                cbar_kws={'label': 'Mean Hourly Activity Count (Events/Hour)', 'shrink': 0.8},
                annot_kws={'size': 10})
    
    axes[1,0].set_title('Kitchen Activity Patterns by Day of Week\n(Hourly Averages by Weekday)', 
                       fontsize=18, fontweight='bold', pad=25)
    axes[1,0].set_xlabel('Day of Week', fontsize=16, fontweight='bold')
    axes[1,0].set_ylabel('Activity Category', fontsize=16, fontweight='bold')
    axes[1,0].tick_params(axis='x', labelsize=13)
    axes[1,0].tick_params(axis='y', labelsize=13)
    
    # 1.4 Correlation matrix
    correlation_matrix = df[activity_cols].corr()
    
    sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='RdBu_r', center=0,
                ax=axes[1,1], 
                cbar_kws={'label': 'Pearson Correlation Coefficient', 'shrink': 0.8},
                annot_kws={'size': 10})
    
    axes[1,1].set_title('Inter-Category Activity Correlations\n(Pearson Correlation Matrix)', 
                       fontsize=18, fontweight='bold', pad=25)
    axes[1,1].set_xlabel('Activity Category', fontsize=16, fontweight='bold')
    axes[1,1].set_ylabel('Activity Category', fontsize=16, fontweight='bold')
    axes[1,1].tick_params(axis='x', rotation=45, labelsize=11)
    axes[1,1].tick_params(axis='y', rotation=45, labelsize=11)
    
    plt.tight_layout(pad=3.0)
    plt.savefig('kitchen_activity_heatmaps.png', dpi=300, bbox_inches='tight',
                facecolor='white', edgecolor='none')
    plt.show()
    
    return hourly_avg, house_daily

def create_circadian_analysis(df, min_households=4):
    """Create circadian rhythm visualisations with PROPER TIME AXES and UNITS"""
    
    activity_cols = [col for col in df.columns if col not in ['TIME_BUCKET', 'HOUSE_NAME', 'HOUR', 'DATE', 'DAY_OF_WEEK']]
    
    hourly_patterns = df.groupby('HOUR')[activity_cols].mean()
    hourly_std = df.groupby('HOUR')[activity_cols].std()
    
    fig, axes = plt.subplots(2, 3, figsize=(22, 14))
    axes = axes.flatten()
    
    key_categories = ['Cooking/Domestic', 'Water/Liquid', 'Speech/Human', 
                     'Appliances/Tools', 'Music', 'Other/Ambient']
    
    for i, category in enumerate(key_categories):
        if category in hourly_patterns.columns:
            hours = hourly_patterns.index.values
            
            axes[i].plot(hours, hourly_patterns[category], 
                        linewidth=4, marker='o', markersize=10, 
                        label='Mean Activity', color='darkblue')
            
            upper_bound = hourly_patterns[category] + hourly_std[category]
            lower_bound = hourly_patterns[category] - hourly_std[category]
            
            axes[i].fill_between(hours, lower_bound, upper_bound, 
                               alpha=0.3, label='±1 Standard Deviation', color='lightblue')
            
            axes[i].set_title(f'{category} - Circadian Activity Pattern\n(24-Hour Cycle)', 
                             fontweight='bold', fontsize=16, pad=15)
            axes[i].set_xlabel('Hour of Day (0-23 Format)', fontsize=14, fontweight='bold')
            axes[i].set_ylabel('Activity Count\n(Events per Hour)', fontsize=14, fontweight='bold')
            
            axes[i].set_xticks(range(0, 24, 3))
            axes[i].set_xticklabels([f'{h:02d}:00' for h in range(0, 24, 3)], fontsize=12)
            axes[i].tick_params(axis='y', labelsize=12)
            
            axes[i].grid(True, alpha=0.4, linestyle='--')
            axes[i].legend(fontsize=12, loc='upper right')
            
            # Peak annotations
            peak_hour = hourly_patterns[category].idxmax()
            peak_value = hourly_patterns[category].max()
            
            axes[i].annotate(f'Daily Peak\nTime: {peak_hour:02d}:00\nValue: {peak_value:.0f} events/hour', 
                           xy=(peak_hour, peak_value), 
                           xytext=(peak_hour+3, peak_value*1.15),
                           arrowprops=dict(arrowstyle='->', color='red', lw=2.5),
                           fontsize=11, ha='center', fontweight='bold',
                           bbox=dict(boxstyle="round,pad=0.5", facecolor="yellow", 
                                   alpha=0.8, edgecolor='red'))
    
    plt.tight_layout(pad=3.0)
    plt.savefig('circadian_patterns.png', dpi=300, bbox_inches='tight',
                facecolor='white', edgecolor='none')
    plt.show()
    
    return hourly_patterns

def create_house_radar_charts(df):
    """Create radar charts comparing house activity profiles """
    
    activity_cols = [col for col in df.columns if col not in ['TIME_BUCKET', 'HOUSE_NAME', 'HOUR', 'DATE', 'DAY_OF_WEEK']]
    
    house_profiles = df.groupby('HOUSE_NAME')[activity_cols].mean()
    house_profiles_norm = house_profiles.div(house_profiles.max())
    
    categories = list(house_profiles_norm.columns)
    houses = house_profiles_norm.index.tolist()
    N = len(categories)
    
    fig, axes = plt.subplots(2, 4, figsize=(24, 12), subplot_kw=dict(projection='polar'))
    axes = axes.flatten()
    
    colors = plt.cm.Set3(np.linspace(0, 1, len(houses)))
    
    for i, house in enumerate(houses):
        if i < len(axes):
            ax = axes[i]
            
            values = house_profiles_norm.loc[house].values.tolist()
            values += values[:1]
            
            angles = [n / float(N) * 2 * np.pi for n in range(N)]
            angles += angles[:1]
            
            ax.plot(angles, values, linewidth=3, linestyle='solid', color=colors[i])
            ax.fill(angles, values, color=colors[i], alpha=0.25)
            
            ax.set_xticks(angles[:-1])
            ax.set_xticklabels([cat.replace('/', '/\n') for cat in categories], fontsize=11)
            ax.set_ylim(0, 1)
            ax.set_title(f'{house} Activity Profile\n(Normalized Scale 0-1)', 
                        size=14, fontweight='bold', pad=25)
            ax.grid(True, alpha=0.3)
            
            ax.set_yticks([0.2, 0.4, 0.6, 0.8, 1.0])
            ax.set_yticklabels(['0.2', '0.4', '0.6', '0.8', '1.0'], fontsize=10)
    
    # Hide extra subplots
    for i in range(len(houses), len(axes)):
        axes[i].set_visible(False)
    
    plt.tight_layout(pad=3.0)
    plt.savefig('house_radar_charts.png', dpi=300, bbox_inches='tight',
                facecolor='white', edgecolor='none')
    plt.show()
    
    return house_profiles_norm

def create_activity_timeseries(df):
    """Create time series with CLEAR TIME SCALES and UNITS"""
    
    activity_cols = [col for col in df.columns if col not in ['TIME_BUCKET', 'HOUSE_NAME', 'HOUR', 'DATE', 'DAY_OF_WEEK']]
    
    # Create weighted activity score
    weights = {
        'Cooking/Domestic': 0.25,
        'Water/Liquid': 0.20,
        'Speech/Human': 0.15,
        'Appliances/Tools': 0.15,
        'Music': 0.10,
        'Animals/Pets/Livestock': 0.05,
        'Vehicles/Transport': 0.03,
        'Environmental/Natural': 0.03,
        'Safety/Hazard': 0.02,
        'Other/Ambient': 0.02
    }
    
    df['Activity_Score'] = 0
    for category, weight in weights.items():
        if category in df.columns:
            df['Activity_Score'] += df[category] * weight
    
    fig, axes = plt.subplots(3, 1, figsize=(18, 16))
    
    # Plot 1: HOURLY activity score over time
    hourly_by_house = df.groupby(['HOUSE_NAME', 'HOUR'])['Activity_Score'].mean().reset_index()
    
    houses = df['HOUSE_NAME'].unique()
    colors = plt.cm.tab20(np.linspace(0, 1, len(houses)))   # generate distinct colors

    for house in df['HOUSE_NAME'].unique():
        house_data = hourly_by_house[hourly_by_house['HOUSE_NAME'] == house]
        axes[0].plot(house_data['HOUR'], house_data['Activity_Score'], 
                    label=house, linewidth=3, marker='o', markersize=8)
    
    axes[0].set_title('Kitchen Activity Intensity by Hour of Day\n(Mean Weighted Activity Score per Hour)', 
                     fontsize=18, fontweight='bold', pad=20)
    axes[0].set_xlabel('Hour of Day (0-23 Format)', fontsize=16, fontweight='bold')
    axes[0].set_ylabel('Weighted Activity Score\n(Dimensionless Index)', fontsize=16, fontweight='bold')
    
    axes[0].set_xticks(range(0, 24, 2))
    axes[0].set_xticklabels([f'{h:02d}:00' for h in range(0, 24, 2)], fontsize=13)
    axes[0].tick_params(axis='y', labelsize=13)
    axes[0].legend(fontsize=12, loc='upper right')
    axes[0].grid(True, alpha=0.4)
    
    # Plot 2: DAILY totals
    daily_activity = df.groupby(['HOUSE_NAME', 'DATE'])['Activity_Score'].sum().reset_index()
    daily_activity['DATE'] = pd.to_datetime(daily_activity['DATE'])
    
    for house in daily_activity['HOUSE_NAME'].unique():
        house_daily = daily_activity[daily_activity['HOUSE_NAME'] == house].sort_values('DATE')
        axes[1].plot(house_daily['DATE'], house_daily['Activity_Score'], 
                    label=house, linewidth=3, marker='s', markersize=8, alpha=0.8)
    
    axes[1].set_title('Daily Kitchen Activity Totals by Household\n(Sum of Weighted Activity Scores per Day)', 
                     fontsize=18, fontweight='bold', pad=20)
    axes[1].set_xlabel('Date', fontsize=16, fontweight='bold')
    axes[1].set_ylabel('Daily Total Activity Score\n(Dimensionless Index)', fontsize=16, fontweight='bold')
    axes[1].tick_params(axis='x', labelsize=12, rotation=45)
    axes[1].tick_params(axis='y', labelsize=13)
    axes[1].legend(fontsize=12)
    axes[1].grid(True, alpha=0.4)
    
    # Plot 3: Distribution boxplot
    activity_data = []
    house_labels = []
    for house in df['HOUSE_NAME'].unique():
        house_scores = df[df['HOUSE_NAME'] == house]['Activity_Score'].values
        activity_data.append(house_scores)
        house_labels.append(house)
    
    box_plot = axes[2].boxplot(activity_data, labels=house_labels, patch_artist=True,
                              boxprops=dict(linewidth=2),
                              whiskerprops=dict(linewidth=2),
                              capprops=dict(linewidth=2),
                              medianprops=dict(linewidth=3, color='red'))
    
    colors = plt.cm.Set3(np.linspace(0, 1, len(house_labels)))
    for patch, color in zip(box_plot['boxes'], colors):
        patch.set_facecolor(color)
        patch.set_alpha(0.7)
    
    axes[2].set_title('Distribution of Activity Scores by Household\n(Statistical Summary)', 
                     fontsize=18, fontweight='bold', pad=20)
    axes[2].set_xlabel('Household ID', fontsize=16, fontweight='bold')
    axes[2].set_ylabel('Activity Score Distribution\n(Dimensionless Index)', fontsize=16, fontweight='bold')
    axes[2].tick_params(axis='x', labelsize=13)
    axes[2].tick_params(axis='y', labelsize=13)
    axes[2].grid(True, alpha=0.4)
    
    plt.tight_layout(pad=3.0)
    plt.savefig('activity_timeseries.png', dpi=300, bbox_inches='tight',
                facecolor='white', edgecolor='none')
    plt.show()
    
    return df

def create_interactive_dashboard(df):
    """Create interactive Plotly visualisations"""
    
    activity_cols = [col for col in df.columns if col not in ['TIME_BUCKET', 'HOUSE_NAME', 'HOUR', 'DATE', 'DAY_OF_WEEK']]
    
    # 1. Interactive Heatmap
    hourly_avg = df.groupby('HOUR')[activity_cols].mean()
    
    fig1 = go.Figure(data=go.Heatmap(
        z=hourly_avg.values.T,
        x=[f'{h:02d}:00' for h in hourly_avg.index],
        y=hourly_avg.columns,
        colorscale='Viridis',
        hovertemplate='Hour: %{x}<br>Category: %{y}<br>Count: %{z:.0f} events/hour<extra></extra>',
        colorbar=dict(title='Events per Hour')
    ))
    
    fig1.update_layout(
        title='Interactive Kitchen Activity Heatmap<br><sub>Hover for detailed values</sub>',
        xaxis_title='Hour of Day (24-Hour Format)',
        yaxis_title='Activity Category',
        height=700,
        font=dict(size=14)
    )
    
    fig1.write_html("interactive_heatmap.html")
    
    # 2. Multi-line time series
    fig2 = go.Figure()
    
    for category in ['Cooking/Domestic', 'Water/Liquid', 'Speech/Human', 'Appliances/Tools']:
        if category in activity_cols:
            hourly_data = df.groupby('HOUR')[category].mean()
            fig2.add_trace(go.Scatter(
                x=[f'{h:02d}:00' for h in hourly_data.index],
                y=hourly_data.values,
                mode='lines+markers',
                name=category,
                line=dict(width=4),
                marker=dict(size=8),
                hovertemplate=f'{category}<br>Hour: %{{x}}<br>Count: %{{y:.0f}} events/hour<extra></extra>'
            ))
    
    fig2.update_layout(
        title='Kitchen Activity Patterns Throughout the Day<br><sub>Interactive circadian rhythms</sub>',
        xaxis_title='Hour of Day (24-Hour Format)',
        yaxis_title='Activity Count (Events per Hour)',
        height=600,
        hovermode='x unified',
        font=dict(size=14)
    )
    
    fig2.write_html("interactive_timeseries.html")
    
    # 3. 3D Surface Plot
    pivot_3d = df.pivot_table(
        index='HOUSE_NAME', 
        columns='HOUR', 
        values='Cooking/Domestic', 
        aggfunc='mean'
    ).fillna(0)
    
    fig3 = go.Figure(data=[go.Surface(
        z=pivot_3d.values,
        x=[f'{h:02d}:00' for h in pivot_3d.columns],
        y=pivot_3d.index,
        colorscale='Viridis',
        hovertemplate='House: %{y}<br>Hour: %{x}<br>Activity: %{z:.0f} events/hour<extra></extra>'
    )])
    
    fig3.update_layout(
        title='3D Kitchen Cooking Activity Surface<br><sub>House × Hour × Activity Intensity</sub>',
        scene=dict(
            xaxis_title='Hour of Day (24-Hour Format)',
            yaxis_title='Household ID',
            zaxis_title='Cooking Activity Count (Events/Hour)'
        ),
        height=700,
        font=dict(size=14)
    )
    
    fig3.write_html("3d_surface.html")
    
    return fig1, fig2, fig3

def create_statistical_analysis(df):
    """Statistical analysis with proper units and larger text"""
    
    activity_cols = [col for col in df.columns if col not in ['TIME_BUCKET', 'HOUSE_NAME', 'HOUR', 'DATE', 'DAY_OF_WEEK']]
    
    fig, axes = plt.subplots(2, 2, figsize=(20, 16))
    
    # 1. Distribution analysis
    cooking_data = df['Cooking/Domestic'].dropna()
    
    axes[0,0].hist(cooking_data, bins=30, alpha=0.7, color='skyblue', 
                   edgecolor='black', linewidth=1.5)
    axes[0,0].axvline(cooking_data.mean(), color='red', linestyle='--', linewidth=3, 
                     label=f'Mean: {cooking_data.mean():.0f} events/hour')
    axes[0,0].axvline(cooking_data.median(), color='green', linestyle='--', linewidth=3, 
                     label=f'Median: {cooking_data.median():.0f} events/hour')
    
    axes[0,0].set_title('Distribution of Cooking/Domestic Activity\n(Frequency Analysis)', 
                       fontweight='bold', fontsize=16, pad=15)
    axes[0,0].set_xlabel('Activity Count (Events per Hour)', fontsize=14, fontweight='bold')
    axes[0,0].set_ylabel('Frequency (Number of Observations)', fontsize=14, fontweight='bold')
    axes[0,0].tick_params(labelsize=12)
    axes[0,0].legend(fontsize=13)
    axes[0,0].grid(True, alpha=0.4)
    
    # Statistics box
    stats_text = f'N = {len(cooking_data)}\nStd Dev = {cooking_data.std():.1f}\nSkewness = {cooking_data.skew():.2f}'
    axes[0,0].text(0.75, 0.85, stats_text, transform=axes[0,0].transAxes, 
                   fontsize=12, bbox=dict(boxstyle="round,pad=0.3", facecolor="lightyellow"))
    
    # 2. Correlation analysis
    correlation_matrix = df[activity_cols].corr()
    mask = np.triu(np.ones_like(correlation_matrix, dtype=bool))
    
    sns.heatmap(correlation_matrix, mask=mask, annot=True, fmt='.2f', 
                cmap='RdBu_r', center=0, ax=axes[0,1],
                square=True, linewidths=1,
                cbar_kws={'label': 'Pearson Correlation Coefficient', 'shrink': 0.8},
                annot_kws={'size': 10})
    
    axes[0,1].set_title('Inter-Category Correlation Matrix\n(Pearson Correlations)', 
                       fontweight='bold', fontsize=16, pad=15)
    axes[0,1].tick_params(axis='x', rotation=45, labelsize=11)
    axes[0,1].tick_params(axis='y', rotation=45, labelsize=11)
    
    # 3. Peak activity analysis
    peak_hours = df.groupby('HOUSE_NAME').apply(
        lambda x: x.groupby('HOUR')['Cooking/Domestic'].mean().idxmax()
    )
    
    bars = axes[1,0].bar(range(len(peak_hours)), peak_hours.values, 
                        color=plt.cm.Set3(np.linspace(0, 1, len(peak_hours))),
                        edgecolor='black', linewidth=1.5)
    
    axes[1,0].set_xticks(range(len(peak_hours)))
    axes[1,0].set_xticklabels(peak_hours.index, rotation=45, fontsize=12)
    axes[1,0].set_title('Peak Cooking Activity Hour by Household\n(Hour of Maximum Daily Activity)', 
                       fontweight='bold', fontsize=16, pad=15)
    axes[1,0].set_xlabel('Household ID', fontsize=14, fontweight='bold')
    axes[1,0].set_ylabel('Peak Activity Hour (24-Hour Format)', fontsize=14, fontweight='bold')
    axes[1,0].tick_params(labelsize=12)
    axes[1,0].grid(True, alpha=0.4)
    
    # Enhanced bar labels
    for i, (bar, hour) in enumerate(zip(bars, peak_hours.values)):
        height = bar.get_height()
        axes[1,0].text(bar.get_x() + bar.get_width()/2., height + 0.3,
                      f'{int(hour):02d}:00', ha='center', va='bottom', 
                      fontweight='bold', fontsize=12)
    
    # 4. Activity variance analysis
    activity_variance = df.groupby('HOUSE_NAME')[activity_cols].var().mean(axis=1)
    
    bars = axes[1,1].bar(range(len(activity_variance)), activity_variance.values,
                        color=plt.cm.viridis(np.linspace(0, 1, len(activity_variance))),
                        edgecolor='black', linewidth=1.5)
    
    axes[1,1].set_xticks(range(len(activity_variance)))
    axes[1,1].set_xticklabels(activity_variance.index, rotation=45, fontsize=12)
    axes[1,1].set_title('Activity Variability by Household\n(Mean Variance Across Categories)', 
                       fontweight='bold', fontsize=16, pad=15)
    axes[1,1].set_xlabel('Household ID', fontsize=14, fontweight='bold')
    axes[1,1].set_ylabel('Average Variance\n(Events²/Hour²)', fontsize=14, fontweight='bold')
    axes[1,1].tick_params(labelsize=12)
    axes[1,1].grid(True, alpha=0.4)
    
    # Value labels on bars
    for i, (bar, var) in enumerate(zip(bars, activity_variance.values)):
        height = bar.get_height()
        axes[1,1].text(bar.get_x() + bar.get_width()/2., height + height*0.02,
                      f'{var:.0f}', ha='center', va='bottom', 
                      fontweight='bold', fontsize=11)
    
    plt.tight_layout(pad=3.0)
    plt.savefig('statistical_analysis.png', dpi=300, bbox_inches='tight',
                facecolor='white', edgecolor='none')
    plt.show()
    
    return peak_hours, activity_variance

def create_pattern_mining_analysis(df):
    """Create advanced pattern mining visualisations"""
    
    activity_cols = [col for col in df.columns if col not in ['TIME_BUCKET', 'HOUSE_NAME', 'HOUR', 'DATE', 'DAY_OF_WEEK']]
    
    # Calculate activity bursts
    cooking_mean = df['Cooking/Domestic'].mean()
    cooking_std = df['Cooking/Domestic'].std()
    burst_threshold = cooking_mean + 2 * cooking_std
    
    df['Is_Burst'] = df['Cooking/Domestic'] > burst_threshold
    
    fig, axes = plt.subplots(2, 2, figsize=(20, 16))
    
    # 1. Burst detection visualization
    for house in df['HOUSE_NAME'].unique():
        house_data = df[df['HOUSE_NAME'] == house].sort_values('TIME_BUCKET')
        
        axes[0,0].plot(house_data['TIME_BUCKET'], house_data['Cooking/Domestic'], 
                      label=house, alpha=0.7, linewidth=2)
        
        burst_data = house_data[house_data['Is_Burst']]
        if not burst_data.empty:
            axes[0,0].scatter(burst_data['TIME_BUCKET'], burst_data['Cooking/Domestic'], 
                            color='red', s=80, alpha=0.9, zorder=5)
    
    axes[0,0].axhline(burst_threshold, color='red', linestyle='--', linewidth=3,
                     label=f'Burst Threshold ({burst_threshold:.0f} events/hour)')
    axes[0,0].set_title('Activity Burst Detection Over Time\n(Red dots = abnormal activity spikes)', 
                       fontweight='bold', fontsize=16, pad=15)
    axes[0,0].set_xlabel('Date and Time', fontsize=14, fontweight='bold')
    axes[0,0].set_ylabel('Cooking Activity Count\n(Events per Hour)', fontsize=14, fontweight='bold')
    axes[0,0].tick_params(axis='x', rotation=45, labelsize=12)
    axes[0,0].tick_params(axis='y', labelsize=12)
    axes[0,0].legend(fontsize=12, loc='upper right')
    axes[0,0].grid(True, alpha=0.4)
    
    # 2. Burst frequency by house
    burst_counts = df[df['Is_Burst']].groupby('HOUSE_NAME').size()
    
    bars = axes[0,1].bar(range(len(burst_counts)), burst_counts.values,
                        color=plt.cm.Reds(np.linspace(0.3, 1, len(burst_counts))),
                        edgecolor='black', linewidth=1.5)
    
    axes[0,1].set_xticks(range(len(burst_counts)))
    axes[0,1].set_xticklabels(burst_counts.index, rotation=45, fontsize=12)
    axes[0,1].set_title('Activity Burst Frequency by Household\n(Number of abnormal activity spikes)', 
                       fontweight='bold', fontsize=16, pad=15)
    axes[0,1].set_xlabel('Household ID', fontsize=14, fontweight='bold')
    axes[0,1].set_ylabel('Number of Burst Events\n(Count)', fontsize=14, fontweight='bold')
    axes[0,1].tick_params(labelsize=12)
    axes[0,1].grid(True, alpha=0.4)
    
    # Adding value labels on bars
    for bar, count in zip(bars, burst_counts.values):
        height = bar.get_height()
        axes[0,1].text(bar.get_x() + bar.get_width()/2., height + 0.1,
                      str(count), ha='center', va='bottom', 
                      fontweight='bold', fontsize=12)
    
    # 3. Activity sequence patterns
    df['Activity_Level'] = pd.cut(df['Cooking/Domestic'], 
                                 bins=[0, 500, 1500, 3000, float('inf')], 
                                 labels=['Low', 'Medium', 'High', 'Very High'])
    
    # Calculate transitions
    transitions = []
    for house in df['HOUSE_NAME'].unique():
        house_data = df[df['HOUSE_NAME'] == house].sort_values('TIME_BUCKET')
        for i in range(len(house_data) - 1):
            current = house_data.iloc[i]['Activity_Level']
            next_state = house_data.iloc[i + 1]['Activity_Level']
            if pd.notna(current) and pd.notna(next_state):
                transitions.append((current, next_state))
    
    # Create transition matrix
    transition_counts = Counter(transitions)
    levels = ['Low', 'Medium', 'High', 'Very High']
    transition_matrix = np.zeros((len(levels), len(levels)))
    
    for i, from_state in enumerate(levels):
        for j, to_state in enumerate(levels):
            transition_matrix[i, j] = transition_counts.get((from_state, to_state), 0)
    
    # Normalise to probabilities
    transition_probs = transition_matrix / transition_matrix.sum(axis=1, keepdims=True)
    transition_probs = np.nan_to_num(transition_probs)
    
    sns.heatmap(transition_probs, annot=True, fmt='.2f', 
                xticklabels=levels, yticklabels=levels,
                cmap='Blues', ax=axes[1,0],
                cbar_kws={'label': 'Transition Probability', 'shrink': 0.8},
                annot_kws={'size': 12, 'weight': 'bold'})
    
    axes[1,0].set_title('Activity Level Transition Probabilities\n(Markov Chain Analysis)', 
                       fontweight='bold', fontsize=16, pad=15)
    axes[1,0].set_xlabel('Next Activity Level', fontsize=14, fontweight='bold')
    axes[1,0].set_ylabel('Current Activity Level', fontsize=14, fontweight='bold')
    axes[1,0].tick_params(labelsize=12)
    
    # 4. Routine consistency analysis
    routine_consistency = df.groupby('HOUSE_NAME')['Cooking/Domestic'].agg(['mean', 'std'])
    routine_consistency['CV'] = routine_consistency['std'] / routine_consistency['mean']
    
    bars = axes[1,1].bar(range(len(routine_consistency)), routine_consistency['CV'].values,
                        color=plt.cm.viridis(np.linspace(0, 1, len(routine_consistency))),
                        edgecolor='black', linewidth=1.5)
    
    axes[1,1].set_xticks(range(len(routine_consistency)))
    axes[1,1].set_xticklabels(routine_consistency.index, rotation=45, fontsize=12)
    axes[1,1].set_title('Routine Consistency by Household\n(Lower values = more consistent routines)', 
                       fontweight='bold', fontsize=16, pad=15)
    axes[1,1].set_xlabel('Household ID', fontsize=14, fontweight='bold')
    axes[1,1].set_ylabel('Coefficient of Variation\n(Dimensionless)', fontsize=14, fontweight='bold')
    axes[1,1].tick_params(labelsize=12)
    axes[1,1].grid(True, alpha=0.4)
    
    # Adding value labels
    for bar, cv in zip(bars, routine_consistency['CV'].values):
        height = bar.get_height()
        axes[1,1].text(bar.get_x() + bar.get_width()/2., height + height*0.02,
                      f'{cv:.2f}', ha='center', va='bottom', 
                      fontweight='bold', fontsize=11)
    
    plt.tight_layout(pad=3.0)
    plt.savefig('pattern_mining_analysis.png', dpi=300, bbox_inches='tight',
                facecolor='white', edgecolor='none')
    plt.show()
    
    return burst_counts, transition_probs, routine_consistency

def run_analysis():
    """Run all visualisation analyses with proper feedback implementation"""
    
    # Load and prepare data
    try:
        df = pd.read_csv("kitchen_hourly_activity.csv")
        df['TIME_BUCKET'] = pd.to_datetime(df['TIME_BUCKET'])
        df['HOUR'] = df['TIME_BUCKET'].dt.hour
        df['DATE'] = df['TIME_BUCKET'].dt.date
        df['DAY_OF_WEEK'] = df['TIME_BUCKET'].dt.day_name()
        print(f"Data loaded successfully: {len(df)} rows from {len(df['HOUSE_NAME'].unique())} households")
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

    print()
    
    results = {}
    
    # 1. Activity Heatmaps
    print("1. Creating activity heatmaps")
    try:
        hourly_avg, house_daily = create_activity_heatmaps(df)
        results['hourly_avg'] = hourly_avg
        results['house_daily'] = house_daily
        print("Heatmaps completed")
    except Exception as e:
        print(f"Heatmaps failed: {e}")
    
    # 2. Circadian Analysis
    print("2. Analysing circadian patterns with proper time axes")
    try:
        hourly_patterns = create_circadian_analysis(df)
        results['hourly_patterns'] = hourly_patterns
        print("Circadian analysis completed")
    except Exception as e:
        print(f"Circadian analysis failed: {e}")
    
    # 3. House Radar Charts
    print("3. Creating house comparison radar charts")
    try:
        house_profiles = create_house_radar_charts(df)
        results['house_profiles'] = house_profiles
        print("Radar charts completed")
    except Exception as e:
        print(f"Radar charts failed: {e}")
    
    # 4. Time Series Analysis
    print("4. Creating time series with consistent scales")
    try:
        df = create_activity_timeseries(df)
        print("Time series completed")
    except Exception as e:
        print(f"Time series failed: {e}")
    
    # 5. Interactive Visualisations
    print("5. Creating interactive visualisations")
    try:
        fig1, fig2, fig3 = create_interactive_dashboard(df)
        print("Interactive visualisations completed")
    except Exception as e:
        print(f"Interactive visualisations failed: {e}")
    
    # 6. Statistical Analysis
    print("6. Running improved statistical analysis")
    try:
        peak_hours, activity_variance = create_statistical_analysis(df)
        results['peak_hours'] = peak_hours
        results['activity_variance'] = activity_variance
        print("Statistical analysis completed")
    except Exception as e:
        print(f"Statistical analysis failed: {e}")
    
    # 7. Pattern Mining
    print("7. Performing advanced pattern mining analysis")
    try:
        burst_counts, transition_probs, routine_consistency = create_pattern_mining_analysis(df)
        results['burst_counts'] = burst_counts
        results['transition_probs'] = transition_probs
        results['routine_consistency'] = routine_consistency
        print("Pattern mining completed")
    except Exception as e:
        print(f"Pattern mining failed: {e}")
    
    return results

# MAIN EXECUTION
if __name__ == "__main__":
    results = run_analysis()