In [None]:
# Import python packages
import streamlit as st
import pandas as pd

# Finding an active snowpark session
from snowflake.snowpark.context import get_active_session
session = get_active_session()


In [None]:
# This script extracts 83M+ records from vitalise data and generates a CSV

# Importing further equired packages
import numpy as np
from snowflake.snowpark.functions import col, lit, to_timestamp, concat_ws, date_trunc, hour, count as sf_count
from snowflake.snowpark.types import *
import warnings
warnings.filterwarnings('ignore')

def create_category_mapping():
    """
    Create mappings for both CLASS_NAME and PARENT_NAME to analyse categories
    """
    
    mapping = {
        # HUMAN ACTIVITIES & COMMUNICATION
        "Speech": "Human_Communication",
        "Human voice": "Human_Communication", 
        "Conversation": "Human_Communication",
        "Narration, monologue": "Human_Communication",
        "Male speech, man speaking": "Human_Communication",
        "Female speech, woman speaking": "Human_Communication",
        "Child speech, kid speaking": "Human_Communication",
        "Babbling": "Human_Communication",
        "Whispering": "Human_Communication",
        
        # HUMAN VOCAL & SINGING
        "Singing": "Human_Vocal",
        "Male singing": "Human_Vocal",
        "Female singing": "Human_Vocal", 
        "Child singing": "Human_Vocal",
        "Humming": "Human_Vocal",
        "Vocal music": "Human_Vocal",
        "Choir": "Human_Vocal",
        "Chant": "Human_Vocal",
        
        # HUMAN PHYSICAL & MOVEMENT
        "Human locomotion": "Human_Physical",
        "Walk, footsteps": "Human_Physical",
        "Run": "Human_Physical",
        "Hands": "Human_Physical", 
        "Clapping": "Human_Physical",
        "Finger snapping": "Human_Physical",
        "Human group actions": "Human_Physical",
        "Crowd": "Human_Physical",
        "Applause": "Human_Physical",
        
        # HUMAN EMOTIONAL & EXPRESSIONS
        "Laughter": "Human_Emotional",
        "Giggle": "Human_Emotional",
        "Chuckle, chortle": "Human_Emotional",
        "Crying, sobbing": "Human_Emotional",
        "Baby cry, infant cry": "Human_Emotional",
        "Whimper": "Human_Emotional",
        "Wail, moan": "Human_Emotional",
        
        # HUMAN RESPIRATORY & BODILY
        "Breathing": "Human_Respiratory",
        "Respiratory sounds": "Human_Respiratory",
        "Cough": "Human_Respiratory",
        "Throat clearing": "Human_Respiratory", 
        "Sneeze": "Human_Respiratory",
        "Pant": "Human_Respiratory",
        "Gasp": "Human_Respiratory",
        "Sigh": "Human_Respiratory",
        "Digestive": "Human_Bodily",
        "Burping, eructation": "Human_Bodily",
        "Hiccup": "Human_Bodily",
        "Heart sounds, heartbeat": "Human_Bodily",
        
        # MUSIC & INSTRUMENTS
        "Music": "Music_General",
        "Musical instrument": "Music_Instruments",
        "Piano": "Music_Instruments",
        "Keyboard (musical)": "Music_Instruments",
        "Guitar": "Music_Instruments", 
        "Violin, fiddle": "Music_Instruments",
        "Plucked string instrument": "Music_Instruments",
        "Bowed string instrument": "Music_Instruments",
        "Brass instrument": "Music_Instruments",
        "Wind instrument, woodwind instrument": "Music_Instruments",
        "Synthesizer": "Music_Instruments",
        "Organ": "Music_Instruments",
        "Effects unit": "Music_Instruments",
        "Theremin": "Music_Instruments",
        
        # MUSIC PERCUSSION
        "Drum": "Music_Percussion",
        "Drum kit": "Music_Percussion",
        "Snare drum": "Music_Percussion",
        "Cymbal": "Music_Percussion",
        "Percussion": "Music_Percussion",
        "Mallet percussion": "Music_Percussion",
        "Bell": "Music_Percussion",
        "Chime": "Music_Percussion",
        "Rattle (instrument)": "Music_Percussion",
        
        # MUSIC GENRES
        "Rock music": "Music_Genre",
        "Classical music": "Music_Genre", 
        "Electronic music": "Music_Genre",
        "Hip hop music": "Music_Genre",
        "Country": "Music_Genre",
        "Christian music": "Music_Genre",
        "Music of Asia": "Music_Genre",
        "Music of Latin America": "Music_Genre",
        "Music genre": "Music_Genre",
        "Music mood": "Music_Genre",
        "Music role": "Music_Genre",
        "Musical concepts": "Music_Genre",
        
        # DOMESTIC & HOUSEHOLD
        "Domestic sounds, home sounds": "Domestic_General",
        "Writing": "Domestic_General",
        "Toothbrush": "Domestic_Personal",
        "Electric toothbrush": "Domestic_Personal",
        "Hair dryer": "Domestic_Personal", 
        "Electric shaver": "Domestic_Personal",
        
        # KITCHEN & COOKING
        "Chopping (food)": "Kitchen_Food_Prep",
        "Frying (food)": "Kitchen_Food_Prep",
        "Cooking": "Kitchen_Food_Prep",
        "Sizzle": "Kitchen_Food_Prep",
        "Boiling": "Kitchen_Food_Prep",
        "Microwave oven": "Kitchen_Appliances", 
        "Blender": "Kitchen_Appliances",
        "Dishwasher": "Kitchen_Appliances",
        "Washing machine": "Kitchen_Appliances",
        "Vacuum cleaner": "Kitchen_Appliances",
        
        # WATER & LIQUIDS
        "Water": "Water_Sounds",
        "Liquid": "Water_Sounds",
        "Pour": "Water_Sounds",
        "Splash, splatter": "Water_Sounds", 
        "Stream": "Water_Sounds",
        "Drip": "Water_Sounds",
        "Gurgling": "Water_Sounds",
        "Rain": "Water_Natural",
        "Ocean": "Water_Natural",
        "Thunderstorm": "Water_Natural",
        "Steam": "Water_Natural",
        
        # DOORS, WINDOWS & MECHANISMS
        "Door": "Home_Doors_Windows",
        "Doorbell": "Home_Doors_Windows",
        "Knock": "Home_Doors_Windows",
        "Sliding door": "Home_Doors_Windows",
        "Cupboard open or close": "Home_Doors_Windows",
        "Drawer open or close": "Home_Doors_Windows",
        "Squeak": "Home_Doors_Windows",
        "Keys jangling": "Home_Doors_Windows",
        "Zipper (clothing)": "Home_Doors_Windows",
        "Mechanisms": "Home_Mechanisms",
        "Clock": "Home_Mechanisms",
        "Tick": "Home_Mechanisms",
        "Alarm": "Home_Mechanisms",
        "Clicking": "Home_Mechanisms",
        "Ratchet, pawl": "Home_Mechanisms",
        
        # TECHNOLOGY & ELECTRONICS
        "Telephone": "Technology_Communication",
        "Telephone bell ringing": "Technology_Communication",
        "Microphone": "Technology_Communication",
        "Television": "Technology_Media",
        "Radio": "Technology_Media",
        "Sound equipment": "Technology_Media",
        "Recording": "Technology_Media",
        "Sound reproduction": "Technology_Media",
        "Camera": "Technology_Media",
        "Typing": "Technology_Computing",
        "Computer keyboard": "Technology_Computing",
        
        # TOOLS & EQUIPMENT  
        "Tools": "Tools_General",
        "Power tool": "Tools_Power",
        "Drill": "Tools_Power",
        "Scissors": "Tools_Manual",
        "Hammer": "Tools_Manual",
        
        # MATERIALS & SURFACES
        "Wood": "Materials_Natural", 
        "Glass": "Materials_Hard",
        "Metal": "Materials_Hard",
        "Surface contact": "Materials_Contact",
        "Scratch": "Materials_Contact",
        "Rub": "Materials_Contact",
        "Deformable shell": "Materials_Flexible",
        "Sounds of things": "Materials_General",
        
        # IMPACTS & COLLISIONS
        "Generic impact sounds": "Impacts_General",
        "Specific impact sounds": "Impacts_Specific",
        "Thump, thud": "Impacts_Heavy",
        "Bang": "Impacts_Sharp",
        "Slam": "Impacts_Sharp", 
        "Crash": "Impacts_Destructive",
        "Explosion": "Impacts_Destructive",
        "Gunshot, gunfire": "Impacts_Destructive",
        
        # PETS & DOMESTIC ANIMALS
        "Dog": "Pets_Dogs",
        "Domestic animals, pets": "Pets_General",
        "Canidae, dogs, wolves": "Pets_Dogs", 
        "Bark": "Pets_Dogs",
        "Growling": "Pets_Dogs",
        "Whimper (dog)": "Pets_Dogs",
        "Cat": "Pets_Cats",
        "Purr": "Pets_Cats",
        "Meow": "Pets_Cats",
        "Hiss": "Pets_Cats",
        
        # FARM ANIMALS
        "Livestock, farm animals, working animals": "Farm_Animals",
        "Cattle, bovinae": "Farm_Animals",
        "Pig": "Farm_Animals",
        "Goat": "Farm_Animals", 
        "Sheep": "Farm_Animals",
        "Horse": "Farm_Animals",
        
        # WILDLIFE & BIRDS
        "Bird": "Wildlife_Birds",
        "Bird vocalization, bird call, bird song": "Wildlife_Birds",
        "Chirp, tweet": "Wildlife_Birds",
        "Crow": "Wildlife_Birds", 
        "Owl": "Wildlife_Birds",
        "Pigeon, dove": "Wildlife_Birds",
        "Duck": "Wildlife_Birds",
        "Goose": "Wildlife_Birds",
        "Turkey": "Wildlife_Birds",
        "Chicken, rooster": "Wildlife_Birds",
        "Fowl": "Wildlife_Birds",
        
        # OTHER ANIMALS
        "Wild animals": "Wildlife_Other",
        "Animal": "Wildlife_Other", 
        "Frog": "Wildlife_Other",
        "Snake": "Wildlife_Other",
        "Roaring cats (lions, tigers)": "Wildlife_Other",
        "Rodents, rats, mice": "Wildlife_Other",
        "Insect": "Wildlife_Insects",
        "Bee, wasp, etc.": "Wildlife_Insects",
        "Fly, housefly": "Wildlife_Insects",
        
        # TRANSPORTATION & VEHICLES
        "Car": "Transport_Road",
        "Vehicle": "Transport_Road",
        "Motor vehicle (road)": "Transport_Road",
        "Truck": "Transport_Road",
        "Vehicle horn, car horn, honking": "Transport_Road",
        "Engine": "Transport_Engines",
        "Light engine (high frequency)": "Transport_Engines",
        "Aircraft": "Transport_Air",
        "Aircraft engine": "Transport_Air", 
        "Train": "Transport_Rail",
        "Rail transport": "Transport_Rail",
        "Boat, Water vehicle": "Transport_Water",
        "Bicycle": "Transport_Non_Motor",
        "Non-motorized land vehicle": "Transport_Non_Motor",
        "Emergency vehicle": "Transport_Emergency",
        "Siren": "Transport_Emergency",
        
        # NATURAL SOUNDS & WEATHER
        "Natural sounds": "Nature_General",
        "Wind": "Nature_Weather", 
        "Fire": "Nature_Elements",
        "Fireworks": "Nature_Elements",
        "Thunder": "Nature_Weather",
        
        # ACOUSTIC ENVIRONMENTS
        "Acoustic environment": "Environment_Acoustic",
        "Background noise": "Environment_Background",
        "Channel, environment and background": "Environment_Background",
        "Noise": "Environment_Background",
        "Reverberation": "Environment_Acoustic",
        "Echo": "Environment_Acoustic",
        
        # TONES & SIGNALS
        "Brief tone": "Signals_Tones",
        "Sine wave": "Signals_Tones",
        "Beep, bleep": "Signals_Electronic", 
        "Buzz": "Signals_Electronic",
        "Hum": "Signals_Electronic",
        "Static": "Signals_Electronic",
        "Whistle": "Signals_Manual",
        "Chirp, tweet": "Signals_Natural",
        "Arrow": "Signals_Other",
        
        # MISCELLANEOUS
        "Source-ambiguous sounds": "Misc_Ambiguous",
        "Miscellaneous sources": "Misc_Other",
        "Other sourceless": "Misc_Other", 
        "Onomatopoeia": "Misc_Other",
        "Silence": "Misc_Silence"
    }
    
    return mapping

def extract_living_room_csv(session, output_filename="living_room_activity.csv"):
    """
    Extract living room data from vitalise_data_light_01 table and create a CSV
    """
    
    print("CREATING LIVING ROOM CSV FILE")
    print("*" * 70)
    print("This will process 83M+ records into an analysis-ready CSV")
    print("*" * 70)
    
    try:
        # Step 1: Extract raw data with both CLASS_NAME and PARENT_NAME
        sf_df = session.table("vitalise_data_light_01").select(
            col('"HOUSE_NAME"').alias('HOUSE_NAME'),
            col('"DATE_COL"').alias('DATE_COL'), 
            col('"TIME_COL"').alias('TIME_COL'),
            col('"CLASS_NAME"').alias('CLASS_NAME'),
            col('"PARENT_NAME"').alias('PARENT_NAME'),
            col('"ROOM_NAME"').alias('ROOM_NAME')
        ).filter(col('"ROOM_NAME"') == lit('Living Room'))
        
        print("Raw data filtered for Living Room")
        
        # Step 2: Create datetime and time buckets
        
        sf_df = sf_df.with_column(
            'DATETIME',
            to_timestamp(concat_ws(lit(' '), col('DATE_COL'), col('TIME_COL')))
        )
        
        sf_df = sf_df.with_column(
            'TIME_BUCKET', 
            date_trunc('hour', col('DATETIME'))
        )
        
        # Extract hour for filtering
        sf_df = sf_df.with_column(
            'HOUR',
            hour(col('DATETIME'))
        )
        
        # Filter to recording window
        print("Step 3: Filtering for records between the recording window")
        sf_df = sf_df.filter(
            (col('HOUR') >= lit(8)) & (col('HOUR') <= lit(20)) 
        )
        
        # Step 4: Aggregate by house, time, class, and parent
        agg_df = sf_df.group_by(
            'HOUSE_NAME', 
            'TIME_BUCKET', 
            'CLASS_NAME',
            'PARENT_NAME'
        ).agg(sf_count(lit('*')).alias('DETECTION_COUNT'))
        
        # Step 5: Convert to pandas
        pdf = agg_df.to_pandas()
        
        print(f"Raw aggregated data shape: {pdf.shape}")
        print(f"Unique houses: {pdf['HOUSE_NAME'].nunique()}")
        print(f"Unique time buckets: {pdf['TIME_BUCKET'].nunique()}")
        print(f"Unique CLASS_NAMEs: {pdf['CLASS_NAME'].nunique()}")
        print(f"Total detection records: {len(pdf):,}")
        print(f"Total detections: {pdf['DETECTION_COUNT'].sum():,}")
        
        # Step 6: Apply categorization mapping
        category_mapping = create_category_mapping()
        
        def categorize_sound(row):
            class_name = str(row['CLASS_NAME']).strip()
            parent_name = str(row['PARENT_NAME']).strip()
            
            # First try to map by specific class name
            if class_name in category_mapping:
                return category_mapping[class_name]
            # Then try parent name
            elif parent_name in category_mapping:
                return category_mapping[parent_name]
            # Create a fallback category based on parent name
            elif parent_name != 'nan' and parent_name != '' and parent_name != 'None':
                clean_parent = parent_name.replace(' ', '_').replace(',', '').replace('(', '').replace(')', '')
                return f"Other_{clean_parent}"
            else:
                return "Uncategorized"
        
        pdf['ANALYSIS_CATEGORY'] = pdf.apply(categorize_sound, axis=1)
        
        # Show category distribution
        category_counts = pdf.groupby('ANALYSIS_CATEGORY')['DETECTION_COUNT'].sum().sort_values(ascending=False)
        print(f"\nCategory distribution (Top 15):")
        for i, (cat, count) in enumerate(category_counts.head(15).items(), 1):
            print(f"   {i:2}. {cat}: {count:,} detections")
        
        print(f"\nTotal analysis categories: {pdf['ANALYSIS_CATEGORY'].nunique()}")
        
        # Step 7: Create final activity matrix
        
        # Aggregate by house, time, and analysis category
        activity_df = pdf.groupby([
            'HOUSE_NAME', 
            'TIME_BUCKET', 
            'ANALYSIS_CATEGORY'
        ])['DETECTION_COUNT'].sum().reset_index()
        
        print(f"Activity aggregation: {activity_df.shape}")
        
        # Pivot to create wide format
        matrix_df = activity_df.pivot_table(
            index=['HOUSE_NAME', 'TIME_BUCKET'],
            columns='ANALYSIS_CATEGORY', 
            values='DETECTION_COUNT',
            fill_value=0,
            aggfunc='sum'
        ).reset_index()
        
        # Flatten column names
        matrix_df.columns.name = None
        
        print(f"Matrix created: {matrix_df.shape}")
        print(f"Final dataset:")
        print(f"   • Houses: {matrix_df['HOUSE_NAME'].nunique()}")
        print(f"   • Time periods: {matrix_df['TIME_BUCKET'].nunique()}")
        print(f"   • Activity categories: {matrix_df.shape[1] - 2}")
        print(f"   • Total observations: {len(matrix_df):,}")
        
        # Step 9: Save to CSV
        matrix_df.to_csv(output_filename, index=False)
        
        print(f"***CSV CREATION COMPLETED!***")
        print(f"Output file: {output_filename}")
        print(f"File size: {matrix_df.shape[0]} rows × {matrix_df.shape[1]} columns")
        
        # Show sample of final data
        print(f"Sample of final CSV data:")
        print(matrix_df.head(3))
        
        # Show column names (categories)
        category_columns = [col for col in matrix_df.columns if col not in ['HOUSE_NAME', 'TIME_BUCKET']]
        print(f"Activity categories in CSV ({len(category_columns)} total):")
        for i, cat in enumerate(sorted(category_columns)[:10], 1):
            print(f"   {i:2}. {cat}")
        if len(category_columns) > 10:
            print(f"   ... and {len(category_columns) - 10} more categories")
        
        return matrix_df, output_filename
        
    except Exception as e:
        print(f"Error during CSV creation: {e}")
        import traceback
        print(traceback.format_exc())
        return None, None

In [None]:
# Copy the mapping function and extraction function from the artifact above, then run:

matrix_df, csv_file = extract_living_room_csv(session)

In [None]:
""" 
SIMPLIFIED LIVING ROOM SOCIAL ISOLATION ANALYSIS

Different visualization approaches from kitchen analysis focused on social isolation.

Research Questions:
- RQ1: Social isolation detection during daytime
- RQ2: Temporal patterns
- RQ3: Household variations  
- RQ5: Weekend/weekday patterns
- RQ6: Activity networks
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import networkx as nx
from sklearn.preprocessing import StandardScaler
from scipy.stats import entropy
import warnings
warnings.filterwarnings('ignore')

# Set different style from kitchen
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

def setup_social_categories(df):
   
    
    all_cols = [col for col in df.columns if col not in ['HOUSE_NAME', 'TIME_BUCKET']]
    
    social_categories = [col for col in all_cols if 'Human_Communication' in col or 
                        'Human_Vocal' in col or 'Human_Emotional' in col or 'Human_Physical' in col]
    
    entertainment_categories = [col for col in all_cols if 'Music_' in col or 'Technology_Media' in col]
    
    domestic_categories = [col for col in all_cols if 'Kitchen_' in col or 'Domestic_' in col or 'Home_' in col]
    
    print(f" Categories identified:")
    print(f" Social: {len(social_categories)}")
    print(f" Entertainment: {len(entertainment_categories)}")  
    print(f" Domestic: {len(domestic_categories)}")
    
    return social_categories, entertainment_categories, domestic_categories, all_cols

def create_social_risk_dashboard(df, social_cats, entertainment_cats, domestic_cats):

    
    # Calculate metrics for each house
    house_metrics = {}
    
    for house in df['HOUSE_NAME'].unique():
        house_data = df[df['HOUSE_NAME'] == house]
        
    
        social_sum = house_data[social_cats].values.sum() if social_cats else 0
        entertainment_sum = house_data[entertainment_cats].values.sum() if entertainment_cats else 0
        domestic_sum = house_data[domestic_cats].values.sum() if domestic_cats else 0
        
        total_hours = len(house_data)
        
        house_metrics[house] = {
            'social_avg': social_sum / total_hours if total_hours > 0 else 0,
            'entertainment_avg': entertainment_sum / total_hours if total_hours > 0 else 0,
            'domestic_avg': domestic_sum / total_hours if total_hours > 0 else 0,
            'total_activity': social_sum + entertainment_sum + domestic_sum,
            'social_frequency': np.sum(house_data[social_cats].sum(axis=1) > 0) / total_hours if social_cats and total_hours > 0 else 0
        }
    
    # Create dashboard plots
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    fig.suptitle('Living Room Social Isolation Risk Dashboard', 
                fontsize=16, fontweight='bold')
    
    houses = list(house_metrics.keys())
    
    # 1. Social activity levels
    social_levels = [house_metrics[h]['social_avg'] for h in houses]
    colors_social = ['red' if s < 50 else 'orange' if s < 100 else 'green' for s in social_levels]
    
    axes[0,0].bar(houses, social_levels, color=colors_social, alpha=0.7)
    axes[0,0].set_title('Average Social Activity Level', fontweight='bold')
    axes[0,0].set_ylabel('Social Activity')
    axes[0,0].tick_params(axis='x', rotation=45)
    
    # Add risk labels
    for i, (house, level) in enumerate(zip(houses, social_levels)):
        risk_label = 'HIGH RISK' if level < 50 else 'MEDIUM RISK' if level < 100 else 'LOW RISK'
        axes[0,0].text(i, level + 5, risk_label, ha='center', fontweight='bold', fontsize=8)
    
    # 2. Social frequency (% of hours with social activity)
    social_freq = [house_metrics[h]['social_frequency'] * 100 for h in houses]
    colors_freq = ['red' if f < 20 else 'orange' if f < 40 else 'green' for f in social_freq]
    
    axes[0,1].bar(houses, social_freq, color=colors_freq, alpha=0.7)
    axes[0,1].set_title('Social Activity Frequency (%)', fontweight='bold')
    axes[0,1].set_ylabel('% Hours with Social Activity')
    axes[0,1].tick_params(axis='x', rotation=45)
    
    # 3. Entertainment vs Social scatter
    entertainment_levels = [house_metrics[h]['entertainment_avg'] for h in houses]
    scatter = axes[0,2].scatter(social_levels, entertainment_levels, 
                               s=100, alpha=0.7, c=range(len(houses)), cmap='viridis')
    
    for i, house in enumerate(houses):
        axes[0,2].annotate(house, (social_levels[i], entertainment_levels[i]), 
                          xytext=(5, 5), textcoords='offset points', fontsize=8)
    
    axes[0,2].set_title('Social vs Entertainment Balance', fontweight='bold')
    axes[0,2].set_xlabel('Social Activity')
    axes[0,2].set_ylabel('Entertainment Activity')
    axes[0,2].grid(True, alpha=0.3)
    
    # 4. Total activity comparison
    total_activity = [house_metrics[h]['total_activity'] for h in houses]
    axes[1,0].bar(houses, total_activity, color='skyblue', alpha=0.7)
    axes[1,0].set_title('Total Daytime Activity', fontweight='bold')
    axes[1,0].set_ylabel('Total Activity Count')
    axes[1,0].tick_params(axis='x', rotation=45)
    
    # 5. Risk categorization pie chart
    risk_counts = {'High Risk': 0, 'Medium Risk': 0, 'Low Risk': 0}
    for level in social_levels:
        if level < 50:
            risk_counts['High Risk'] += 1
        elif level < 100:
            risk_counts['Medium Risk'] += 1
        else:
            risk_counts['Low Risk'] += 1
    
    colors_pie = ['red', 'orange', 'green']
    axes[1,1].pie(risk_counts.values(), labels=risk_counts.keys(), autopct='%1.0f%%',
                 colors=colors_pie, startangle=90)
    axes[1,1].set_title('Social Isolation Risk Distribution', fontweight='bold')
    
    # 6. Intervention priority ranking
    # Simple priority score: inverse of social activity
    priority_scores = [(1000 / (s + 1)) for s in social_levels]  
    sorted_indices = np.argsort(priority_scores)[::-1]  # Sort descending (highest priority first)
    
    priority_houses = [houses[i] for i in sorted_indices]
    priority_values = [priority_scores[i] for i in sorted_indices]
    
    colors_priority = ['darkred', 'red', 'orange', 'yellow', 'lightgreen', 'green', 'darkgreen'][:len(houses)]
    
    axes[1,2].barh(priority_houses, priority_values, color=colors_priority, alpha=0.7)
    axes[1,2].set_title('Intervention Priority Ranking', fontweight='bold')
    axes[1,2].set_xlabel('Priority Score (Higher = More Urgent)')
    
    plt.tight_layout()
    plt.savefig('social_risk_dashboard.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    return house_metrics

def create_temporal_patterns_analysis(df, social_cats, entertainment_cats):
    """
     Temporal patterns within daytime window
   
    """

    
    # Add hour column
    df_temp = df.copy()
    df_temp['HOUR'] = pd.to_datetime(df_temp['TIME_BUCKET']).dt.hour
    
    # Create temporal analysis plots
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle('Daytime Temporal Patterns Analysis', fontsize=16, fontweight='bold')
    
    # 1. Hourly social activity pattern (all houses combined)
    if social_cats:
        hourly_social = []
        hours = []
        for hour in range(8, 24):
            hour_data = df_temp[df_temp['HOUR'] == hour]
            if not hour_data.empty:
                total_social = hour_data[social_cats].values.sum()
                hourly_social.append(total_social)
                hours.append(hour)
        
        axes[0,0].plot(hours, hourly_social, 'bo-', linewidth=3, markersize=8)
        axes[0,0].set_title('Hourly Social Activity Pattern', fontweight='bold')
        axes[0,0].set_xlabel('Hour of Day')
        axes[0,0].set_ylabel('Total Social Activity')
        axes[0,0].grid(True, alpha=0.3)
        axes[0,0].set_xticks(range(8, 21))
        
        # Highlight peak hour
        if hourly_social:
            peak_idx = np.argmax(hourly_social)
            peak_hour = hours[peak_idx]
            peak_value = hourly_social[peak_idx]
            axes[0,0].annotate(f'Peak: {peak_hour}:00', 
                             xy=(peak_hour, peak_value),
                             xytext=(peak_hour+1, peak_value+peak_value*0.1),
                             arrowprops=dict(arrowstyle='->', color='red', lw=2),
                             fontsize=12, fontweight='bold', color='red')
    
    # 2. Social activity by house over time periods
    time_periods = ['Morning (8-12)', 'Afternoon (12-17)', 'Evening (17-21)']
    houses = df['HOUSE_NAME'].unique()
    
    period_data = {period: [] for period in time_periods}
    
    for house in houses:
        house_data = df_temp[df_temp['HOUSE_NAME'] == house]
        
        # Morning (8-12)
        morning_data = house_data[(house_data['HOUR'] >= 8) & (house_data['HOUR'] < 12)]
        morning_social = morning_data[social_cats].values.sum() if not morning_data.empty and social_cats else 0
        period_data['Morning (8-12)'].append(morning_social)
        
        # Afternoon (12-17)
        afternoon_data = house_data[(house_data['HOUR'] >= 12) & (house_data['HOUR'] < 17)]
        afternoon_social = afternoon_data[social_cats].values.sum() if not afternoon_data.empty and social_cats else 0
        period_data['Afternoon (12-17)'].append(afternoon_social)
        
        # Evening (17-21)
        evening_data = house_data[(house_data['HOUR'] >= 17) & (house_data['HOUR'] <= 20)]
        evening_social = evening_data[social_cats].values.sum() if not evening_data.empty and social_cats else 0
        period_data['Evening (17-21)'].append(evening_social)
    
    # Plot period comparison
    x = np.arange(len(houses))
    width = 0.25
    
    axes[0,1].bar(x - width, period_data['Morning (8-12)'], width, label='Morning (8-12)', alpha=0.8, color='lightblue')
    axes[0,1].bar(x, period_data['Afternoon (12-17)'], width, label='Afternoon (12-17)', alpha=0.8, color='orange')
    axes[0,1].bar(x + width, period_data['Evening (17-21)'], width, label='Evening (17-21)', alpha=0.8, color='green')
    
    axes[0,1].set_title('Social Activity by Time Period', fontweight='bold')
    axes[0,1].set_xlabel('Household')
    axes[0,1].set_ylabel('Social Activity Count')
    axes[0,1].set_xticks(x)
    axes[0,1].set_xticklabels(houses, rotation=45)
    axes[0,1].legend()
    axes[0,1].grid(True, alpha=0.3)
    
    # 3. Peak activity hours by house
    peak_hours_by_house = []
    house_names = []
    
    for house in houses:
        house_data = df_temp[df_temp['HOUSE_NAME'] == house]
        house_hourly = []
        house_hours = []
        
        for hour in range(8, 21):
            hour_data = house_data[house_data['HOUR'] == hour]
            if not hour_data.empty and social_cats:
                total_social = hour_data[social_cats].values.sum()
                house_hourly.append(total_social)
                house_hours.append(hour)
        
        if house_hourly:
            peak_idx = np.argmax(house_hourly)
            peak_hour = house_hours[peak_idx]
            peak_hours_by_house.append(peak_hour)
            house_names.append(house)
    
    if peak_hours_by_house:
        axes[1,0].bar(house_names, peak_hours_by_house, color='gold', alpha=0.7)
        axes[1,0].set_title('Peak Social Activity Hour by House', fontweight='bold')
        axes[1,0].set_xlabel('Household')
        axes[1,0].set_ylabel('Peak Hour')
        axes[1,0].set_ylim(7, 22)
        axes[1,0].tick_params(axis='x', rotation=45)
        
        # Add hour labels on bars
        for i, hour in enumerate(peak_hours_by_house):
            axes[1,0].text(i, hour + 0.2, f'{hour}:00', ha='center', fontweight='bold')
    
    # 4. Social vs Entertainment over time
    if social_cats and entertainment_cats:
        hourly_entertainment = []
        for hour in range(8, 21):
            hour_data = df_temp[df_temp['HOUR'] == hour]
            if not hour_data.empty:
                total_entertainment = hour_data[entertainment_cats].values.sum()
                hourly_entertainment.append(total_entertainment)
        
        if len(hourly_social) == len(hourly_entertainment):
            axes[1,1].plot(hours, hourly_social, 'bo-', linewidth=2, label='Social', markersize=6)
            axes[1,1].plot(hours, hourly_entertainment, 'ro-', linewidth=2, label='Entertainment', markersize=6)
            axes[1,1].set_title('Social vs Entertainment by Hour', fontweight='bold')
            axes[1,1].set_xlabel('Hour of Day')
            axes[1,1].set_ylabel('Activity Count')
            axes[1,1].legend()
            axes[1,1].grid(True, alpha=0.3)
            axes[1,1].set_xticks(range(8, 21))
    
    plt.tight_layout()
    plt.savefig('temporal_patterns.png', dpi=300, bbox_inches='tight')
    plt.show()

def create_household_comparison_analysis(df, social_cats, entertainment_cats, domestic_cats):
    
    houses = df['HOUSE_NAME'].unique()
    
    # Calculate normalized metrics for chart
    house_profiles = {}
    
    for house in houses:
        house_data = df[df['HOUSE_NAME'] == house]
        
        # Calculate metrics
        social_total = house_data[social_cats].values.sum() if social_cats else 0
        entertainment_total = house_data[entertainment_cats].values.sum() if entertainment_cats else 0
        domestic_total = house_data[domestic_cats].values.sum() if domestic_cats else 0
        
        house_profiles[house] = {
            'Social Activity': social_total,
            'Entertainment': entertainment_total,
            'Domestic Activity': domestic_total,
            'Total Hours': len(house_data),
            'Activity Diversity': len([c for c in [social_total, entertainment_total, domestic_total] if c > 0])
        }
    
    # Create comparison visualization
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle('Household Social Activity Comparison', fontsize=16, fontweight='bold')
    
    # 1. Activity levels comparison
    categories = ['Social Activity', 'Entertainment', 'Domestic Activity']
    
    x = np.arange(len(houses))
    width = 0.25
    
    social_vals = [house_profiles[h]['Social Activity'] for h in houses]
    entertainment_vals = [house_profiles[h]['Entertainment'] for h in houses]
    domestic_vals = [house_profiles[h]['Domestic Activity'] for h in houses]
    
    axes[0,0].bar(x - width, social_vals, width, label='Social', alpha=0.8, color='lightcoral')
    axes[0,0].bar(x, entertainment_vals, width, label='Entertainment', alpha=0.8, color='lightblue')
    axes[0,0].bar(x + width, domestic_vals, width, label='Domestic', alpha=0.8, color='lightgreen')
    
    axes[0,0].set_title('Activity Levels by Household', fontweight='bold')
    axes[0,0].set_xlabel('Household')
    axes[0,0].set_ylabel('Total Activity Count')
    axes[0,0].set_xticks(x)
    axes[0,0].set_xticklabels(houses, rotation=45)
    axes[0,0].legend()
    axes[0,0].grid(True, alpha=0.3)
    
    # 2. Activity intensity (per hour)
    social_intensity = [house_profiles[h]['Social Activity'] / house_profiles[h]['Total Hours'] for h in houses]
    entertainment_intensity = [house_profiles[h]['Entertainment'] / house_profiles[h]['Total Hours'] for h in houses]
    
    axes[0,1].scatter(social_intensity, entertainment_intensity, s=150, alpha=0.7, c=range(len(houses)), cmap='viridis')
    
    for i, house in enumerate(houses):
        axes[0,1].annotate(house, (social_intensity[i], entertainment_intensity[i]), 
                          xytext=(5, 5), textcoords='offset points', fontweight='bold')
    
    axes[0,1].set_title('Social vs Entertainment Intensity', fontweight='bold')
    axes[0,1].set_xlabel('Social Intensity (per hour)')
    axes[0,1].set_ylabel('Entertainment Intensity (per hour)')
    axes[0,1].grid(True, alpha=0.3)
    
    # 3. Activity diversity
    diversity_scores = [house_profiles[h]['Activity Diversity'] for h in houses]
    colors_div = ['red' if d <= 1 else 'orange' if d == 2 else 'green' for d in diversity_scores]
    
    axes[1,0].bar(houses, diversity_scores, color=colors_div, alpha=0.7)
    axes[1,0].set_title('Activity Diversity Score', fontweight='bold')
    axes[1,0].set_xlabel('Household')
    axes[1,0].set_ylabel('Diversity Score (0-3)')
    axes[1,0].tick_params(axis='x', rotation=45)
    axes[1,0].set_ylim(0, 3.5)
    
    # Add diversity labels
    for i, (house, score) in enumerate(zip(houses, diversity_scores)):
        label = 'Low' if score <= 1 else 'Medium' if score == 2 else 'High'
        axes[1,0].text(i, score + 0.1, label, ha='center', fontweight='bold')
    
    # 4. Overall activity ranking
    total_activities = [house_profiles[h]['Social Activity'] + house_profiles[h]['Entertainment'] + house_profiles[h]['Domestic Activity'] for h in houses]
    
    # Sort by total activity
    sorted_indices = np.argsort(total_activities)[::-1]
    sorted_houses = [houses[i] for i in sorted_indices]
    sorted_activities = [total_activities[i] for i in sorted_indices]
    
    axes[1,1].barh(sorted_houses, sorted_activities, color='skyblue', alpha=0.7)
    axes[1,1].set_title('Overall Activity Ranking', fontweight='bold')
    axes[1,1].set_xlabel('Total Activity Count')
    
    plt.tight_layout()
    plt.savefig('household_comparison.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    return house_profiles

def create_simple_network_analysis(df, social_cats, entertainment_cats):
    """
    Simple activity network analysis
    
    """
    
    # Using only a subset of categories to avoid complexity
    network_cats = social_cats[:3] + entertainment_cats[:3] if len(social_cats) >= 3 and len(entertainment_cats) >= 3 else social_cats + entertainment_cats
    
    if len(network_cats) < 2:
        print("Not enough categories for network analysis")
        return
    
    # Calculate simple correlations
    correlation_matrix = df[network_cats].corr()
    
    # Create network graph
    plt.figure(figsize=(12, 10))
    
    # Simple network visualization
    G = nx.Graph()
    
    # Adding nodes
    for cat in network_cats:
        simple_name = cat.replace('_', ' ').split()[:2]  # Taking first 2 words
        G.add_node(' '.join(simple_name))
    
    # Adding edges for strong correlations
    threshold = 0.3
    node_names = [' '.join(cat.replace('_', ' ').split()[:2]) for cat in network_cats]
    
    for i in range(len(network_cats)):
        for j in range(i+1, len(network_cats)):
            corr_val = correlation_matrix.iloc[i, j]
            if abs(corr_val) > threshold:
                G.add_edge(node_names[i], node_names[j], weight=abs(corr_val), correlation=corr_val)
    
    if len(G.edges()) > 0:
        # Layout and draw
        pos = nx.spring_layout(G, k=2, iterations=50)
        
        # Node sizes based on degree
        node_sizes = [G.degree(node) * 500 + 300 for node in G.nodes()]
        
        # Draw nodes
        nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color='lightblue', alpha=0.8)
        
        # Draw edges with colors
        positive_edges = [(u, v) for u, v, d in G.edges(data=True) if d.get('correlation', 0) > 0]
        negative_edges = [(u, v) for u, v, d in G.edges(data=True) if d.get('correlation', 0) < 0]
        
        if positive_edges:
            nx.draw_networkx_edges(G, pos, positive_edges, edge_color='green', width=2, alpha=0.6)
        if negative_edges:
            nx.draw_networkx_edges(G, pos, negative_edges, edge_color='red', width=2, alpha=0.6, style='dashed')
        
        # Draw labels
        nx.draw_networkx_labels(G, pos, font_size=10, font_weight='bold')
        
        plt.title('Living Room Activity Co-occurrence Network\n(Daytime Recording:)', 
                 fontsize=14, fontweight='bold')
        
        # Add legend
        import matplotlib.lines as mlines
        green_line = mlines.Line2D([], [], color='green', linewidth=2, label='Positive Correlation')
        red_line = mlines.Line2D([], [], color='red', linewidth=2, linestyle='--', label='Negative Correlation')
        plt.legend(handles=[green_line, red_line], loc='upper left')
        
        plt.axis('off')
        plt.tight_layout()
        plt.savefig('activity_network.png', dpi=300, bbox_inches='tight')
        plt.show()
        
        print(f"Network created with {len(G.nodes())} nodes and {len(G.edges())} connections")
    else:
        print("No significant correlations found for network visualization")

def run_simplified_living_room_analysis(csv_file="living_room_activity.csv"):
    """
    Execute simplified living room analysis avoiding pandas conflicts
    """
    
    print("SIMPLIFIED LIVING ROOM SOCIAL ANALYSIS")
    print("*" * 60)
    #print("Daytime Recording Window: 8 AM - 11 PM (15 hours/day)")
    print("Focus: Social Isolation & Wellbeing Assessment")
    print("*" * 60)
    
    try:
        # Load data
        df = pd.read_csv(csv_file)
        print(f"Data loaded: {df.shape}")
        print(f"Date range: {pd.to_datetime(df['TIME_BUCKET']).min()} to {pd.to_datetime(df['TIME_BUCKET']).max()}")
        
        # Setup categories
        social_cats, entertainment_cats, domestic_cats, all_cols = setup_social_categories(df)
        
        # Run analyses
        print("1. SOCIAL RISK DASHBOARD (RQ1, RQ3)")
        print("-" * 50)
        house_metrics = create_social_risk_dashboard(df, social_cats, entertainment_cats, domestic_cats)
        
        print("2. TEMPORAL PATTERNS ANALYSIS (RQ2)")
        print("-" * 45)
        create_temporal_patterns_analysis(df, social_cats, entertainment_cats)
        
        print("3. HOUSEHOLD COMPARISON (RQ3)")
        print("-" * 38)
        house_profiles = create_household_comparison_analysis(df, social_cats, entertainment_cats, domestic_cats)
        
        print("4. ACTIVITY NETWORK (RQ6)")
        print("-" * 32)
        create_simple_network_analysis(df, social_cats, entertainment_cats)
        
        # Risk assessment summary
        social_levels = [house_metrics[h]['social_avg'] for h in house_metrics.keys()]
        high_risk_count = sum(1 for s in social_levels if s < 50)
        medium_risk_count = sum(1 for s in social_levels if 50 <= s < 100)
        low_risk_count = sum(1 for s in social_levels if s >= 100)
        
        print(f"SOCIAL ISOLATION RISK ASSESSMENT:")
        print(f"High Risk: {high_risk_count} households (social activity < 50)")
        print(f"Medium Risk: {medium_risk_count} households (social activity 50-100)")
        print(f"Low Risk: {low_risk_count} households (social activity > 100)")
        
        # To identify most and least at risk
        if social_levels:
            min_idx = np.argmin(social_levels)
            max_idx = np.argmax(social_levels)
            houses = list(house_metrics.keys())
            
            print(f"Most at risk: {houses[min_idx]} (activity: {social_levels[min_idx]:.1f})")
            print(f"Least at risk: {houses[max_idx]} (activity: {social_levels[max_idx]:.1f})")
        
        print(f"Generated Files:")
        print(f"   • social_risk_dashboard.png")
        print(f"   • temporal_patterns.png")
        print(f"   • household_comparison.png")
        print(f"   • activity_network.png")
        
        print(f"Research Questions Addressed:")
        print(f"   • RQ1: Social isolation risk assessment completed")
        print(f"   • RQ2: Temporal patterns analyzed (8 AM - 9 PM)")
        print(f"   • RQ3: Household variations documented")
        print(f"   • RQ6: Activity network relationships mapped")
        
        return {
            'house_metrics': house_metrics,
            'house_profiles': house_profiles,
            'social_categories': social_cats,
            'entertainment_categories': entertainment_cats
        }
        
    except FileNotFoundError:
        print("Error: living_room_activity.csv not found")
        print("Please run the Snowflake extraction first to create the CSV")
        return None
    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()
        return None

# Execute analysis
if __name__ == "__main__":
    results = run_simplified_living_room_analysis()
    
    if results:
        print("SUCCESS! All visualizations created successfully!")
    else:
        print("Analysis failed. Please check the error messages above.")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Load dataset
df = pd.read_csv("living_room_activity.csv")

# Define social categories
social_cats = [c for c in df.columns if 'Human_Communication' in c or 'Human_Vocal' in c]

def plot_social_activity_level_clean(df, social_cats):
    """
    Plot average social activity level per household
    with gradient coloring (no risk labels).
    """

    houses = df['HOUSE_NAME'].unique()
    social_levels = []

    for house in houses:
        house_data = df[df['HOUSE_NAME'] == house]
        total_hours = len(house_data)
        social_sum = house_data[social_cats].values.sum() if social_cats else 0
        avg_social = social_sum / total_hours if total_hours > 0 else 0
        social_levels.append(avg_social)

    # Normalize values for color intensity
    norm_levels = (social_levels - np.min(social_levels)) / (np.max(social_levels) - np.min(social_levels))
    colors = plt.cm.Blues(norm_levels)  # Blue gradient

    # Plot
    plt.figure(figsize=(10, 6))
    bars = plt.bar(houses, social_levels, color=colors, alpha=0.8)
    plt.title("Average Social Activity Level (Living Room)", fontweight="bold")
    plt.ylabel("Social Activity (avg per hour)")
    plt.xticks(rotation=45)

    # Add numeric labels on top
    for bar in bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2, height + (0.01*height),
                 f"{int(height):,}", ha='center', fontsize=9)

    plt.tight_layout()
    plt.show()

# Run it
plot_social_activity_level_clean(df, social_cats)


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the CSV
df = pd.read_csv("living_room_activity.csv")

# Category selection
social_cats = [c for c in df if ('Human_Communication' in c) or ('Human_Vocal' in c)
                               or ('Human_Emotional' in c) or ('Human_Physical' in c)]
entertainment_cats = [c for c in df if ('Music_' in c) or ('Technology_Media' in c)]
domestic_cats = [c for c in df if ('Kitchen_' in c) or ('Domestic_' in c) or ('Home_' in c)]


def build_house_metrics(df, social_cats, entertainment_cats, domestic_cats):
    houses = df['HOUSE_NAME'].unique()
    metrics = {}
    for h in houses:
        hd = df[df['HOUSE_NAME'] == h]
        hours = len(hd)
        social_sum = hd[social_cats].values.sum() if social_cats else 0
        ent_sum    = hd[entertainment_cats].values.sum() if entertainment_cats else 0
        dom_sum    = hd[domestic_cats].values.sum() if domestic_cats else 0
        metrics[h] = dict(
            social_avg = social_sum / hours if hours else 0,
            social_freq = (np.sum(hd[social_cats].sum(axis=1) > 0) / hours * 100) if (social_cats and hours) else 0,
            entertainment_avg = ent_sum / hours if hours else 0,
            total_activity = social_sum + ent_sum + dom_sum
        )
    return metrics

# plot dashboard 
def plot_social_dashboard_colorful(df, social_cats, entertainment_cats, domestic_cats):
    sns.set_theme(style="whitegrid")
    houses = df['HOUSE_NAME'].unique()
    # Assign a vivid, consistent color to each house
    pal = sns.color_palette("Set2", n_colors=len(houses))
    house_colors = {h: pal[i] for i, h in enumerate(houses)}

    M = build_house_metrics(df, social_cats, entertainment_cats, domestic_cats)
    social_avg = np.array([M[h]['social_avg'] for h in houses])
    social_freq = np.array([M[h]['social_freq'] for h in houses])
    ent_avg = np.array([M[h]['entertainment_avg'] for h in houses])
    total_activity = np.array([M[h]['total_activity'] for h in houses])

    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    fig.suptitle('Living Room Social Activity Dashboard', fontsize=16, fontweight='bold')

    # A) Average Social Activity (colored per house)
    ax = axes[0,0]
    bars = ax.bar(houses, social_avg, color=[house_colors[h] for h in houses], edgecolor="black", linewidth=0.6)
    ax.set_title('Average Social Activity Level')
    ax.set_ylabel('Avg per Hour')
    ax.tick_params(axis='x', rotation=45)
    for b in bars:
        h = b.get_height()
        ax.text(b.get_x()+b.get_width()/2, h*1.01, f'{int(h):,}', ha='center', va='bottom', fontsize=9)

    # B) Social Activity Frequency %
    ax = axes[0,1]
    bars = ax.bar(houses, social_freq, color=[house_colors[h] for h in houses], edgecolor="black", linewidth=0.6)
    ax.set_title('Social Activity Frequency (%)')
    ax.set_ylabel('% of Recorded Hours with Social Activity')
    ax.set_ylim(0, max(100, social_freq.max()*1.08))
    ax.tick_params(axis='x', rotation=45)
    for b in bars:
        h = b.get_height()
        ax.text(b.get_x()+b.get_width()/2, h+1, f'{h:.0f}%', ha='center', va='bottom', fontsize=9)

    # C) Social vs Entertainment (scatter; same colors per house)
    ax = axes[0,2]
    for i, h in enumerate(houses):
        ax.scatter(social_avg[i], ent_avg[i], s=140, color=house_colors[h], edgecolor="black", linewidth=0.6, alpha=0.9)
        ax.annotate(h, (social_avg[i], ent_avg[i]), xytext=(6,6), textcoords='offset points', fontsize=9, weight='bold')
    ax.set_title('Social vs. Entertainment Balance')
    ax.set_xlabel('Average Social (per hour)')
    ax.set_ylabel('Average Entertainment (per hour)')

    # D) Total Daytime Activity (stack: social + entertainment + domestic) for more color
    ax = axes[1,0]
    # compute components
    soc_total = []
    ent_total = []
    dom_total = []
    for h in houses:
        hd = df[df['HOUSE_NAME'] == h]
        soc_total.append(hd[social_cats].values.sum() if social_cats else 0)
        ent_total.append(hd[entertainment_cats].values.sum() if entertainment_cats else 0)
        dom_total.append(hd[domestic_cats].values.sum() if domestic_cats else 0)
    # stack bars with a colorful Paired palette
    p2 = sns.color_palette("Paired", 6)
    b1 = ax.bar(houses, soc_total, color=p2[1], edgecolor="black", linewidth=0.5, label="Social")
    b2 = ax.bar(houses, ent_total, bottom=soc_total, color=p2[3], edgecolor="black", linewidth=0.5, label="Entertainment")
    b3 = ax.bar(houses, dom_total, bottom=(np.array(soc_total)+np.array(ent_total)),
                color=p2[5], edgecolor="black", linewidth=0.5, label="Domestic")
    ax.set_title('Total Daytime Activity (Stacked by Type)')
    ax.set_ylabel('Total Count')
    ax.tick_params(axis='x', rotation=45)
    ax.legend(ncol=3, frameon=True)

    # E) Distribution of Avg Social (hist) with a gradient
    ax = axes[1,1]
    sns.histplot(social_avg, bins=6, kde=False, ax=ax, color=sns.color_palette("flare", 8)[4], edgecolor="white")
    ax.set_title('Distribution of Average Social Activity (across houses)')
    ax.set_xlabel('Avg Social per Hour')
    ax.set_ylabel('Number of Houses')

    # F) Attention Ranking (lower social → higher score), colorful bars
    ax = axes[1,2]
    priority = 1000 / (social_avg + 1.0)
    order = np.argsort(priority)[::-1]
    ax.barh([houses[i] for i in order], priority[order],
            color=[house_colors[houses[i]] for i in order], edgecolor="black", linewidth=0.6)
    ax.set_title('Attention Ranking (lower social → higher score)')
    ax.set_xlabel('Score')

    plt.tight_layout(rect=[0,0,1,0.97])
    plt.savefig('social_activity_dashboard_colorful.png', dpi=300, bbox_inches='tight')
    plt.show()

# Run plotting func
plot_social_dashboard_colorful(df, social_cats, entertainment_cats, domestic_cats)


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load CSV 
df = pd.read_csv("living_room_activity.csv")

# Category selection
social_cats = [c for c in df if ('Human_Communication' in c) or ('Human_Vocal' in c)
                               or ('Human_Emotional' in c) or ('Human_Physical' in c)]
entertainment_cats = [c for c in df if ('Music_' in c) or ('Technology_Media' in c)]
domestic_cats = [c for c in df if ('Kitchen_' in c) or ('Domestic_' in c) or ('Home_' in c)]

# compute house metrics
def build_house_metrics(df, social_cats, entertainment_cats, domestic_cats):
    houses = df['HOUSE_NAME'].unique()
    metrics = {}
    for h in houses:
        hd = df[df['HOUSE_NAME'] == h]
        hours = len(hd)
        social_sum = hd[social_cats].values.sum() if social_cats else 0
        ent_sum    = hd[entertainment_cats].values.sum() if entertainment_cats else 0
        dom_sum    = hd[domestic_cats].values.sum() if domestic_cats else 0
        metrics[h] = dict(
            social_avg = social_sum / hours if hours else 0,
            social_freq = (np.sum(hd[social_cats].sum(axis=1) > 0) / hours * 100) if (social_cats and hours) else 0,
            entertainment_avg = ent_sum / hours if hours else 0,
            total_activity = social_sum + ent_sum + dom_sum
        )
    return metrics

# Plot Dashboard
def plot_social_dashboard_colorful(df, social_cats, entertainment_cats, domestic_cats):
    sns.set_theme(style="whitegrid")
    houses = df['HOUSE_NAME'].unique()
    # Assigning a vivid, consistent color to each house
    pal = sns.color_palette("Set2", n_colors=len(houses))
    house_colors = {h: pal[i] for i, h in enumerate(houses)}

    M = build_house_metrics(df, social_cats, entertainment_cats, domestic_cats)
    social_avg = np.array([M[h]['social_avg'] for h in houses])
    social_freq = np.array([M[h]['social_freq'] for h in houses])
    ent_avg = np.array([M[h]['entertainment_avg'] for h in houses])
    total_activity = np.array([M[h]['total_activity'] for h in houses])

    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    fig.suptitle('Living Room Social Activity Dashboard', fontsize=16, fontweight='bold')

    # A) Average Social Activity (colored per house)
    ax = axes[0,0]
    bars = ax.bar(houses, social_avg, color=[house_colors[h] for h in houses], edgecolor="black", linewidth=0.6)
    ax.set_title('Average Social Activity Level')
    ax.set_ylabel('Avg per Hour')
    ax.tick_params(axis='x', rotation=45)
    for b in bars:
        h = b.get_height()
        ax.text(b.get_x()+b.get_width()/2, h*1.01, f'{int(h):,}', ha='center', va='bottom', fontsize=9)

    # B) Social Activity Frequency %
    ax = axes[0,1]
    bars = ax.bar(houses, social_freq, color=[house_colors[h] for h in houses], edgecolor="black", linewidth=0.6)
    ax.set_title('Social Activity Frequency (%)')
    ax.set_ylabel('% of Recorded Hours with Social Activity')
    ax.set_ylim(0, max(100, social_freq.max()*1.08))
    ax.tick_params(axis='x', rotation=45)
    for b in bars:
        h = b.get_height()
        ax.text(b.get_x()+b.get_width()/2, h+1, f'{h:.0f}%', ha='center', va='bottom', fontsize=9)

    # C) Social vs Entertainment (scatter; same colors per house)
    ax = axes[0,2]
    for i, h in enumerate(houses):
        ax.scatter(social_avg[i], ent_avg[i], s=140, color=house_colors[h], edgecolor="black", linewidth=0.6, alpha=0.9)
        ax.annotate(h, (social_avg[i], ent_avg[i]), xytext=(6,6), textcoords='offset points', fontsize=9, weight='bold')
    ax.set_title('Social vs. Entertainment Balance')
    ax.set_xlabel('Average Social (per hour)')
    ax.set_ylabel('Average Entertainment (per hour)')

    # D) Total Daytime Activity (stack: social + entertainment + domestic) for more color
    ax = axes[1,0]
    # compute components
    soc_total = []
    ent_total = []
    dom_total = []
    for h in houses:
        hd = df[df['HOUSE_NAME'] == h]
        soc_total.append(hd[social_cats].values.sum() if social_cats else 0)
        ent_total.append(hd[entertainment_cats].values.sum() if entertainment_cats else 0)
        dom_total.append(hd[domestic_cats].values.sum() if domestic_cats else 0)
    # stack bars with a colorful Paired palette
    p2 = sns.color_palette("Paired", 6)
    b1 = ax.bar(houses, soc_total, color=p2[1], edgecolor="black", linewidth=0.5, label="Social")
    b2 = ax.bar(houses, ent_total, bottom=soc_total, color=p2[3], edgecolor="black", linewidth=0.5, label="Entertainment")
    b3 = ax.bar(houses, dom_total, bottom=(np.array(soc_total)+np.array(ent_total)),
                color=p2[5], edgecolor="black", linewidth=0.5, label="Domestic")
    ax.set_title('Total Daytime Activity (Stacked by Type)')
    ax.set_ylabel('Total Count')
    ax.tick_params(axis='x', rotation=45)
    ax.legend(ncol=3, frameon=True)

    # E) Distribution of Avg Social (hist) with a gradient
    ax = axes[1,1]
    sns.histplot(social_avg, bins=6, kde=False, ax=ax, color=sns.color_palette("flare", 8)[4], edgecolor="white")
    ax.set_title('Distribution of Average Social Activity (across houses)')
    ax.set_xlabel('Avg Social per Hour')
    ax.set_ylabel('Number of Houses')

    # F) Attention Ranking (lower social → higher score), colorful bars
    ax = axes[1,2]
    priority = 1000 / (social_avg + 1.0)
    order = np.argsort(priority)[::-1]
    ax.barh([houses[i] for i in order], priority[order],
            color=[house_colors[houses[i]] for i in order], edgecolor="black", linewidth=0.6)
    ax.set_title('Attention Ranking (lower social → higher score)')
    ax.set_xlabel('Score')

    plt.tight_layout(rect=[0,0,1,0.97])
    plt.savefig('social_activity_dashboard_colorful.png', dpi=300, bbox_inches='tight')
    plt.show()

plot_social_dashboard_colorful(df, social_cats, entertainment_cats, domestic_cats)


In [None]:
# Flexible Daytime Temporal Patterns Figure (adapts to latest recorded hour)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load data
csv_file = "living_room_activity.csv"   # change if needed
df = pd.read_csv(csv_file)
df["TIME_BUCKET"] = pd.to_datetime(df["TIME_BUCKET"])
df["HOUR"] = df["TIME_BUCKET"].dt.hour

# 2) Define category groups
all_cols = [c for c in df.columns if c not in ["HOUSE_NAME", "TIME_BUCKET", "HOUR"]]
social_cats = [c for c in all_cols if ("Human_Communication" in c or
                                       "Human_Vocal" in c or
                                       "Human_Emotional" in c or
                                       "Human_Physical" in c)]
entertainment_cats = [c for c in all_cols if ("Music_" in c or "Technology_Media" in c)]

# 3) Choose hour window (dynamic to dataset) 
start_hour = 8
max_hour_in_data = int(df["HOUR"].max())        # latest hour present in the CSV
end_hour = min(max_hour_in_data, 23)            # never go past 23:00
hours = list(range(start_hour, end_hour + 1))


# 4) Figure 
fig = plt.figure(figsize=(13, 9))
fig.suptitle("Daytime Temporal Patterns Analysis (Dynamic Hour Range)", fontsize=16, fontweight="bold")

# ---------- 4a) Hourly Social Activity Pattern (all houses) ----------
ax1 = plt.subplot(2, 2, 1)

hourly_social = []
for h in hours:
    hour_slice = df[df["HOUR"] == h]
    hourly_social.append(hour_slice[social_cats].values.sum() if not hour_slice.empty else 0)

ax1.plot(hours, hourly_social, marker="o", linewidth=3)
ax1.set_title("Hourly Social Activity Pattern")
ax1.set_xlabel("Hour of Day")
ax1.set_ylabel("Total Social Activity")
ax1.grid(alpha=0.3)
ax1.set_xticks(hours)

# Annotate peak
if any(hourly_social):
    peak_idx = int(np.argmax(hourly_social))
    ax1.annotate(f"Peak: {hours[peak_idx]:02d}:00",
                 xy=(hours[peak_idx], hourly_social[peak_idx]),
                 xytext=(hours[max(0, peak_idx-1)], hourly_social[peak_idx]*1.1),
                 arrowprops=dict(arrowstyle="->", color="red", lw=2),
                 fontsize=11, color="red", fontweight="bold")

# 4b) Social Activity by Time Period (Morning / Afternoon / Evening)
ax2 = plt.subplot(2, 2, 2)

houses = df["HOUSE_NAME"].unique()
period_bands = {
    "Morning (8–12)":  (8, 12),                  # 8 <= HOUR < 12
    "Afternoon (12–17)": (12, 17),               # 12 <= HOUR < 17
    "Evening (17–23)":   (17, end_hour + 1)       # 17 <= HOUR <= end_hour
}

bar_width = 0.25
x = np.arange(len(houses))

for i, (label, (lo, hi)) in enumerate(period_bands.items()):
    vals = []
    for house in houses:
        hs = df[(df["HOUSE_NAME"] == house) & (df["HOUR"] >= lo) & (df["HOUR"] < hi)]
        vals.append(hs[social_cats].values.sum() if not hs.empty else 0)
    ax2.bar(x + i*bar_width - bar_width, vals, bar_width, label=label)

ax2.set_title("Social Activity by Time Period")
ax2.set_xlabel("Household")
ax2.set_ylabel("Social Activity Count")
ax2.set_xticks(x)
ax2.set_xticklabels(houses, rotation=45)
ax2.legend()
ax2.grid(alpha=0.3)

# 4c) Peak Social Activity Hour by House
ax3 = plt.subplot(2, 2, 3)

peak_hours = []
for house in houses:
    house_df = df[df["HOUSE_NAME"] == house]
    sums_per_hour = []
    for h in hours:
        hs = house_df[house_df["HOUR"] == h]
        sums_per_hour.append(hs[social_cats].values.sum() if not hs.empty else 0)
    if any(sums_per_hour):
        peak_hour = hours[int(np.argmax(sums_per_hour))]
    else:
        peak_hour = np.nan
    peak_hours.append(peak_hour)

ax3.bar(houses, peak_hours, color="gold", alpha=0.8)
ax3.set_title("Peak Social Activity Hour by House")
ax3.set_xlabel("Household")
ax3.set_ylabel("Peak Hour")
ax3.set_ylim(start_hour - 1, end_hour + 2)
ax3.tick_params(axis="x", rotation=45)
for i, ph in enumerate(peak_hours):
    if not np.isnan(ph):
        ax3.text(i, ph + 0.2, f"{int(ph):02d}:00", ha="center", fontweight="bold")

# 4d) Social vs Entertainment by Hour (all houses) 
ax4 = plt.subplot(2, 2, 4)

hourly_ent = []
for h in hours:
    hour_slice = df[df["HOUR"] == h]
    hourly_ent.append(hour_slice[entertainment_cats].values.sum() if not hour_slice.empty else 0)

ax4.plot(hours, hourly_social, "o-", linewidth=2, label="Social")
ax4.plot(hours, hourly_ent, "o-", linewidth=2, label="Entertainment")
ax4.set_title("Social vs Entertainment by Hour")
ax4.set_xlabel("Hour of Day")
ax4.set_ylabel("Activity Count")
ax4.set_xticks(hours)
ax4.grid(alpha=0.3)
ax4.legend()

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.savefig("temporal_patterns_dynamic.png", dpi=300, bbox_inches="tight")
plt.show()
