## Energy and Weather Visualizations

This Jupyter Notebook creates interactive visualizations for energy consumption and weather data across five U.S. cities (New York, Chicago, Houston, Phoenix, Seattle) over the last 90 days. The visualizations include:

- **Geographic Overview**: Interactive U.S. map showing current temperature, today's energy usage, and percentage change from yesterday, with color-coded markers (red for high, green for low energy usage).
- **Time Series Analysis**: Dual-axis line chart showing temperature and energy consumption, with a dropdown to select a city or all cities, and shaded weekend regions.
- **Correlation Analysis**: Scatter plot of temperature vs. energy consumption, color-coded by city, with regression lines, R-squared, and correlation coefficients.
- **Usage Patterns Heatmap**: Heatmap of average energy usage by temperature range and day of week, with a dropdown for city selection and text annotations.

### Requirements
- Python libraries: `plotly`, `pandas`, `numpy`, `scipy`
- classes: `Config`, `DataProcessor`, `Analyzer`
- Processed data file: `data/processed/latest_historical.csv`


Run each cell to generate the visualizations. Ensure the data file and classes are accessible.

In [8]:
# Import required libraries
# Note: These should already be installed via pyproject.toml
import sys
import os

# Add the project root directory to the Python path
project_root = os.path.abspath(".")  # Current directory since we're in project root
if project_root not in sys.path:
    sys.path.append(project_root)

# Add src directory to path for imports
src_path = os.path.join(project_root, "src")
if src_path not in sys.path:
    sys.path.append(src_path)

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from scipy.stats import linregress

# Optional: Install additional packages if needed for visualization
try:
    import plotly.express as px
    import plotly.graph_objects as go
    from plotly.subplots import make_subplots
    print("✅ Plotly imported successfully")
except ImportError:
    print("⚠️  Plotly not installed. Run: uv pip install plotly")
    # You can still run without plotly, just skip visualization parts

# Import your custom classes
try:
    from config import Config
    from data_processor import DataProcessor
    from analysis import Analyzer
    print("✅ Custom modules imported successfully")
except ImportError as e:
    print(f"❌ Import error: {e}")
    print("Make sure you're running from the project root directory")
    print("Current working directory:", os.getcwd())
    print("Available files:", os.listdir("."))

# Initialize configuration and analyzer
try:
    config = Config.load()
    analyzer = Analyzer(config)
    processor = DataProcessor(config)
    print("✅ Configuration and analyzer initialized")
except Exception as e:
    print(f"❌ Configuration error: {e}")
    print("Make sure config.yaml exists and is properly formatted")

# Load processed data
def load_data():
    """Load processed data with error handling"""
    try:
        df = analyzer.load_data()
        if df.empty:
            print("⚠️  No data available. Please run the data pipeline to generate processed data.")
            return None
        print(f"✅ Data loaded successfully: {len(df)} rows")
        return df
    except Exception as e:
        print(f"❌ Data loading error: {e}")
        return None

# Test data loading
df = load_data()
if df is None:
    print("❌ Data loading failed. Cannot continue with analysis.")
    # Don't raise exception, just warn user
else:
    # Define date range and cities
    try:
        start_date, end_date = analyzer.get_available_date_range()
        filtered_df = df[(df['date'].dt.date >= start_date) & (df['date'].dt.date <= end_date)]
        cities = ['All Cities'] + sorted(filtered_df['city'].unique().tolist())
        
        print(f"✅ Analysis setup complete")
        print(f"📅 Date range: {start_date} to {end_date}")
        print(f"🏙️  Cities available: {len(cities)-1}")
        print(f"📊 Filtered data: {len(filtered_df)} rows")
        
    except Exception as e:
        print(f"❌ Analysis setup error: {e}")

# Quick data check
if 'df' in locals() and df is not None:
    print("\n" + "="*50)
    print("DATA OVERVIEW")
    print("="*50)
    print(f"Shape: {df.shape}")
    print(f"Columns: {list(df.columns)}")
    print(f"Date range: {df['date'].min()} to {df['date'].max()}")
    print(f"Cities: {df['city'].nunique()}")
    print("\nFirst few rows:")
    print(df.head())
else:
    print("\n❌ No data available for analysis")

✅ Plotly imported successfully
❌ Import error: cannot import name 'Config' from 'config' (unknown location)
Make sure you're running from the project root directory
Current working directory: C:\Users\HP\Desktop\BYU-pathway\pioneeracademy\project1-energy-analysis\notebooks
Available files: ['.ipynb_checkpoints', 'exploration.ipynb']
❌ Configuration error: name 'Config' is not defined
Make sure config.yaml exists and is properly formatted
❌ Data loading error: name 'analyzer' is not defined
❌ Data loading failed. Cannot continue with analysis.

❌ No data available for analysis


In [9]:
# Import required librariesa

%pip install pandas numpy plotly scipy

import sys
import os

# Add the project root directory to the Python path
project_root = os.path.abspath("..")  # Adjust if needed
if project_root not in sys.path:
    sys.path.append(project_root)

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy.stats import linregress
from datetime import datetime, timedelta

# Import your custom classes (adjust import paths as needed)
from config import Config
from src.data_processor import DataProcessor
from src.analysis import Analyzer

# Initialize configuration and analyzer
config = Config.load()
analyzer = Analyzer(config)
processor = DataProcessor(config)

# Load processed data
def load_data():
    df = analyzer.load_data()
    if df.empty:
        print("No data available. Please run the data pipeline to generate processed data.")
        return None
    return df

df = load_data()
if df is None:
    raise Exception("Data loading failed.")

# Define date range and cities
start_date, end_date = analyzer.get_available_date_range()
filtered_df = df[(df['date'].dt.date >= start_date) & (df['date'].dt.date <= end_date)]
cities = ['All Cities'] + sorted(filtered_df['city'].unique().tolist())

Note: you may need to restart the kernel to use updated packages.


ImportError: cannot import name 'Config' from 'config' (unknown location)

In [None]:
# Simple Geographic Overview Table
def create_geo_table(df):
    """Simple function to display geographic data in a table"""
    
    # Get unique cities and their data
    cities_data = []
    
    for city in df['city'].unique():
        city_df = df[df['city'] == city]
        latest_data = city_df.iloc[-1]  # Get most recent data
        
        cities_data.append({
            'City': city,
            'Temperature (°F)': f"{latest_data['temperature_avg']:.1f}",
            'Energy Usage (MWh)': f"{latest_data['energy_demand']:.2f}",
            'Date': latest_data['date'].strftime('%Y-%m-%d')
        })
    
    # Create and display table
    table_df = pd.DataFrame(cities_data)
    print("Geographic Overview - Energy Usage and Temperature")
    print("=" * 50)
    print(table_df.to_string(index=False))
    
    return table_df

# Even simpler version - just the essentials
def simple_geo_table(df):
    """Minimal table display"""
    result = df.groupby('city').agg({
        'temperature_avg': 'last',
        'energy_demand': 'last',
        'date': 'last'
    }).round(2)
    
    print("City Data Summary:")
    print(result)
    return result

# Usage:
# create_geo_table(filtered_df)
# or
# simple_geo_table(filtered_df)

In [None]:
# Visualization 2: Time Series Analysis
def create_time_series(df, cities):
    # Create subplot
    fig = make_subplots(specs=[[{"secondary_y": True}]])

    # Prepare data for each city
    buttons = []
    for i, city in enumerate(cities):
        if city == 'All Cities':
            plot_df = df.groupby('date').agg({
                'temperature_avg': 'mean',
                'energy_demand': 'mean',
                'is_weekend': 'first'
            }).reset_index()
        else:
            plot_df = df[df['city'] == city].copy()

        if plot_df.empty:
            print(f"No data available for {city}.")
            continue

        # Add traces
        temp_trace = go.Scatter(
            x=plot_df['date'],
            y=plot_df['temperature_avg'],
            name='Temperature (°F)',
            line=dict(color='blue'),
            visible=(i == 0)  # Only first city visible initially
        )
        energy_trace = go.Scatter(
            x=plot_df['date'],
            y=plot_df['energy_demand'],
            name='Energy Demand (MWh)',
            line=dict(color='red', dash='dot'),
            visible=(i == 0),
            yaxis='y2'
        )
        fig.add_traces([temp_trace, energy_trace])

        # Create button for this city
        visibility = [False] * len(cities) * 2
        visibility[i * 2:i * 2 + 2] = [True, True]
        buttons.append(dict(
            label=city,
            method='update',
            args=[{'visible': visibility}, {'title': f'Temperature and Energy Demand Over Time - {city}'}]
        ))

    # Highlight weekends (using first city's data for simplicity)
    plot_df = df.groupby('date').agg({'is_weekend': 'first'}).reset_index()
    weekend_ranges = []
    current_start = None
    for _, row in plot_df.iterrows():
        if row['is_weekend'] and current_start is None:
            current_start = row['date']
        elif not row['is_weekend'] and current_start is not None:
            weekend_ranges.append((current_start, row['date'] - timedelta(days=1)))
            current_start = None
    if current_start is not None:
        weekend_ranges.append((current_start, plot_df['date'].max()))

    for start, end in weekend_ranges:
        fig.add_vrect(
            x0=start, x1=end,
            fillcolor="gray", opacity=0.2,
            layer="below", line_width=0
        )

    # Update layout with dropdown
    fig.update_layout(
        title=f"Temperature and Energy Demand Over Time - {cities[0]}",
        xaxis_title="Date",
        yaxis_title="Temperature (°F)",
        yaxis2_title="Energy Demand (MWh)",
        legend=dict(x=0, y=1.1, orientation='h'),
        margin=dict(t=100),
        updatemenus=[dict(
            buttons=buttons,
            direction="down",
            showactive=True,
            x=0.1,
            xanchor="left",
            y=1.2,
            yanchor="top"
        )]
    )
    fig.update_yaxes(title_text="Temperature (°F)", secondary_y=False)
    fig.update_yaxes(title_text="Energy Demand (MWh)", secondary_y=True)
    fig.show()

create_time_series(filtered_df, cities)

In [None]:
# Visualization 3: Correlation Analysis
def create_correlation_plot(df, config, analyzer):
    if df.empty:
        print("No data available for correlation analysis.")
        return

    # Calculate correlations and regression
    correlations = analyzer.calculate_correlations(df, None)
    regression_stats = analyzer.calculate_regression(df, None)

    # Create scatter plot
    fig = px.scatter(
        df,
        x='temperature_avg',
        y='energy_demand',
        color='city',
        color_discrete_map=config.city_colors,
        hover_data=['date'],
        title="Temperature vs Energy Demand Correlation"
    )

    # Add regression lines
    for city in df['city'].unique():
        city_df = df[df['city'] == city].dropna(subset=['temperature_avg', 'energy_demand'])
        if len(city_df) >= 2:
            stats = regression_stats.get(city, {})
            if not np.isnan(stats.get('slope')):
                x_range = [city_df['temperature_avg'].min(), city_df['temperature_avg'].max()]
                y_pred = [stats['slope'] * x + stats['intercept'] for x in x_range]
                fig.add_trace(
                    go.Scatter(
                        x=x_range,
                        y=y_pred,
                        mode='lines',
                        name=f'{city} Regression',
                        line=dict(color=config.city_colors.get(city, '#000000'), dash='dash')
                    )
                )

    # Add regression stats as annotations
    annotations = []
    y_pos = 0.95
    for city in regression_stats:
        stats = regression_stats[city]
        if not np.isnan(stats['slope']):
            annotation = dict(
                x=0.05,
                y=y_pos,
                xref="paper",
                yref="paper",
                text=f"{city}: y = {stats['slope']:.2f}x + {stats['intercept']:.2f}, R² = {stats['r_squared']:.3f}, Corr = {correlations.get(city, np.nan):.3f}",
                showarrow=False,
                font=dict(size=10)
            )
            annotations.append(annotation)
            y_pos -= 0.05

    fig.update_layout(
        xaxis_title="Temperature (°F)",
        yaxis_title="Energy Demand (MWh)",
        showlegend=True,
        annotations=annotations
    )
    fig.show()

create_correlation_plot(filtered_df, config, analyzer)

In [None]:
# Visualization 4: Usage Patterns Heatmap
def create_heatmap(df, cities):
    # Define temperature bins
    bins = [-float('inf'), 50, 60, 70, 80, 90, float('inf')]
    labels = ['<50°F', '50-60°F', '60-70°F', '70-80°F', '80-90°F', '>90°F']

    # Create figure
    fig = go.Figure()

    # Prepare data for each city
    buttons = []
    for i, city in enumerate(cities):
        if city == 'All Cities':
            plot_df = df.copy()
        else:
            plot_df = df[df['city'] == city].copy()

        if plot_df.empty:
            print(f"No data available for {city}.")
            continue

        plot_df['temp_range'] = pd.cut(plot_df['temperature_avg'], bins=bins, labels=labels, include_lowest=True)
        heatmap_data = plot_df.pivot_table(
            values='energy_demand',
            index='temp_range',
            columns='day_of_week',
            aggfunc='mean'
        )

        # Reorder days of week
        day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
        heatmap_data = heatmap_data.reindex(columns=day_order)

        # Add heatmap trace
        heatmap = go.Heatmap(
            z=heatmap_data.values,
            x=day_order,
            y=labels,
            colorscale='RdBu_r',
            text=heatmap_data.values.round(2),
            texttemplate='%{text:.2f}',
            colorbar=dict(title='Energy Demand (MWh)'),
            visible=(i == 0)
        )
        fig.add_trace(heatmap)

        # Create button
        visibility = [False] * len(cities)
        visibility[i] = True
        buttons.append(dict(
            label=city,
            method='update',
            args=[{'visible': visibility}, {'title': f'Average Energy Usage by Temperature Range and Day of Week - {city}'}]
        ))

    # Update layout with dropdown
    fig.update_layout(
        title=f"Average Energy Usage by Temperature Range and Day of Week - {cities[0]}",
        xaxis_title="Day of Week",
        yaxis_title="Temperature Range",
        updatemenus=[dict(
            buttons=buttons,
            direction="down",
            showactive=True,
            x=0.1,
            xanchor="left",
            y=1.2,
            yanchor="top"
        )]
    )
    fig.show()

create_heatmap(filtered_df, cities)

## Notes

- **Running the Notebook**: Save as `energy_visualizations.ipynb` and run in a Jupyter environment (e.g., Jupyter Notebook, JupyterLab, or VS Code). Ensure dependencies (`plotly`, `pandas`, `numpy`, `scipy`) are installed and the data file exists.
- **Data Dependency**: Assumes `data/processed/latest_historical.csv` is available from your pipeline.
- **Interactivity**: Uses Plotly's dropdown menus for city selection in the time series and heatmap visualizations, replacing Streamlit's interactive components.
- **Output**: Each visualization is displayed inline in the notebook using `fig.show()`.

If you encounter issues or need modifications (e.g., different styling, additional features), let me know!