# Timestream Data Dashboard

This notebook provides an interactive dashboard for querying and visualizing AWS Timestream data.

## Features:
- Query UEReports table
- Display results in interactive pandas DataFrame
- Visualize time-series data with Plotly
- Refresh button to reload data
- Write sample data for testing

**Prerequisites:** Run `00_setup.ipynb` first to create the database and table.

## Step 1: Import Required Modules

In [None]:
import sys
import os
from datetime import datetime, timedelta
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display, Markdown, clear_output

# Add modules directory to path
sys.path.insert(0, os.path.abspath('../modules'))

from timestream_client import TimestreamClient

print("✅ Modules loaded successfully")

## Step 2: Initialize Timestream Client

In [None]:
# Configuration
AWS_REGION = os.getenv('AWS_REGION', 'us-east-1')
TIMESTREAM_DATABASE_NAME = os.getenv('TIMESTREAM_DATABASE_NAME', 'SuperAppDB')
TIMESTREAM_TABLE_NAME = 'UEReports'

# Initialize Timestream client
timestream = TimestreamClient(
    database_name=TIMESTREAM_DATABASE_NAME,
    table_name=TIMESTREAM_TABLE_NAME,
    region_name=AWS_REGION
)

print(f"✅ Timestream client initialized")
print(f"   Region: {AWS_REGION}")
print(f"   Database: {TIMESTREAM_DATABASE_NAME}")
print(f"   Table: {TIMESTREAM_TABLE_NAME}")

## Step 3: Create Interactive Dashboard Widgets

In [None]:
# Create widgets
refresh_btn = widgets.Button(
    description='Refresh Data',
    button_style='primary',
    icon='refresh',
    layout=widgets.Layout(width='150px')
)

add_sample_btn = widgets.Button(
    description='Add Sample Data',
    button_style='success',
    icon='plus',
    layout=widgets.Layout(width='150px')
)

status_output = widgets.HTML(
    value='<p style="color: #666; font-size: 12px;">Ready to query Timestream</p>'
)

data_output = widgets.Output(
    layout=widgets.Layout(
        width='100%',
        border='1px solid #ddd',
        padding='15px'
    )
)

viz_output = widgets.Output(
    layout=widgets.Layout(
        width='100%',
        padding='15px'
    )
)

# Global variable to store current data
current_df = None

print("✅ Widgets created")

## Step 4: Define Event Handlers

In [None]:
def load_data():
    """Load data from Timestream"""
    global current_df
    
    status_output.value = '<p style="color: #1976d2; font-size: 12px;">⏳ Loading data...</p>'
    
    try:
        # Query Timestream
        df = timestream.query()
        current_df = df
        
        # Display data
        with data_output:
            clear_output()
            if len(df) > 0:
                print(f"📊 Found {len(df)} record(s)")
                print(f"Last updated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
                display(df)
            else:
                print("⚠️  No data available")
                print("\nThe UEReports table is empty.")
                print("Click 'Add Sample Data' to insert test records.")
        
        # Create visualizations if data exists
        with viz_output:
            clear_output()
            if len(df) > 0 and 'time' in df.columns:
                create_visualizations(df)
        
        status_output.value = f'<p style="color: #2e7d32; font-size: 12px;">✅ Loaded {len(df)} record(s)</p>'
    
    except Exception as e:
        with data_output:
            clear_output()
            print(f"❌ Error: {str(e)}")
            print("\nPlease check:")
            print("- Your SageMaker execution role has Timestream permissions")
            print("- The database and table were created (run 00_setup.ipynb)")
        
        status_output.value = '<p style="color: #d32f2f; font-size: 12px;">❌ Error occurred</p>'

def create_visualizations(df):
    """Create visualizations from DataFrame"""
    print("📈 Data Visualizations\n")
    
    # Convert time column to datetime if it exists
    if 'time' in df.columns:
        try:
            df['time'] = pd.to_datetime(df['time'])
        except:
            pass
    
    # Find numeric columns for plotting
    numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns.tolist()
    
    if numeric_cols and 'time' in df.columns:
        # Time series plot
        for col in numeric_cols[:3]:  # Limit to first 3 numeric columns
            fig = px.line(
                df,
                x='time',
                y=col,
                title=f'{col} over time',
                markers=True
            )
            fig.update_layout(height=400)
            display(fig)
    elif numeric_cols:
        # Bar chart if no time column
        for col in numeric_cols[:3]:
            fig = px.bar(
                df,
                y=col,
                title=f'Distribution of {col}'
            )
            fig.update_layout(height=400)
            display(fig)
    else:
        print("No numeric columns found for visualization")

def on_refresh_click(b):
    """Handle refresh button click"""
    refresh_btn.disabled = True
    load_data()
    refresh_btn.disabled = False

def on_add_sample_click(b):
    """Handle add sample data button click"""
    add_sample_btn.disabled = True
    
    status_output.value = '<p style="color: #1976d2; font-size: 12px;">⏳ Adding sample data...</p>'
    
    try:
        # Create sample records
        sample_records = [
            {
                'Dimensions': [
                    {'Name': 'device_id', 'Value': f'device-00{i}'},
                    {'Name': 'location', 'Value': 'datacenter-1'}
                ],
                'MeasureName': 'cpu_usage',
                'MeasureValue': str(70 + (i * 5.5)),
                'MeasureValueType': 'DOUBLE'
            }
            for i in range(1, 4)
        ]
        
        # Write to Timestream
        result = timestream.write_records(sample_records)
        
        status_output.value = '<p style="color: #2e7d32; font-size: 12px;">✅ Sample data added</p>'
        
        # Auto-refresh
        load_data()
    
    except Exception as e:
        with data_output:
            clear_output()
            print(f"❌ Error adding sample data: {str(e)}")
        
        status_output.value = '<p style="color: #d32f2f; font-size: 12px;">❌ Error occurred</p>'
    
    finally:
        add_sample_btn.disabled = False

# Attach event handlers
refresh_btn.on_click(on_refresh_click)
add_sample_btn.on_click(on_add_sample_click)

print("✅ Event handlers configured")

## Step 5: Display Dashboard

Run this cell to display the interactive dashboard:

In [None]:
# Display header
display(Markdown("## 📊 Timestream Data Dashboard"))
display(Markdown(f"_Database: {TIMESTREAM_DATABASE_NAME} | Table: {TIMESTREAM_TABLE_NAME}_"))
display(Markdown("---"))

# Display control buttons
button_box = widgets.HBox(
    [refresh_btn, add_sample_btn],
    layout=widgets.Layout(margin='10px 0')
)
display(button_box)

# Display status
display(status_output)

# Display data section
display(Markdown("### 📋 Query Results"))
display(data_output)

# Display visualization section
display(Markdown("### 📈 Visualizations"))
display(viz_output)

# Auto-load data on first display
load_data()

# Display tips
display(Markdown("""
---
### 💡 Tips:
- Click **Refresh Data** to reload the latest records
- Click **Add Sample Data** to insert test records
- Scroll through the DataFrame to explore all columns
- Visualizations appear automatically when data is available
"""))

## Custom Queries

Run custom SQL queries against your Timestream database:

In [None]:
# # Example: Query last 24 hours of data
# query = f"""
# SELECT *
# FROM {TIMESTREAM_DATABASE_NAME}.{TIMESTREAM_TABLE_NAME}
# WHERE time > ago(24h)
# ORDER BY time DESC
# LIMIT 100
# """

# df = timestream.query(query)
# display(df)

In [None]:
# # Example: Aggregate query (average CPU usage by device)
# query = f"""
# SELECT
#     device_id,
#     AVG(CAST(measure_value::double AS DOUBLE)) as avg_cpu_usage,
#     COUNT(*) as record_count
# FROM {TIMESTREAM_DATABASE_NAME}.{TIMESTREAM_TABLE_NAME}
# WHERE measure_name = 'cpu_usage'
# GROUP BY device_id
# """

# df = timestream.query(query)
# display(df)

## Export Data

Export your data to CSV for further analysis:

In [None]:
# # Uncomment to export current data to CSV
# if current_df is not None and len(current_df) > 0:
#     filename = f'timestream_data_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv'
#     current_df.to_csv(filename, index=False)
#     print(f"✅ Data exported to {filename}")
# else:
#     print("⚠️  No data to export. Load data first.")