# Battery Energy Analysis - Hourly Charge Discrepancy Investigation

This notebook analyzes sensor data to understand and explain the hourly energy increase discrepancy observed in the battery charging system. We'll examine the data patterns and investigate potential causes for the unusual energy behavior.

## Import Required Libraries

Import libraries such as pandas, matplotlib, and numpy for data analysis and visualization.

In [None]:
# Import Required Libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime, timedelta
from typing import Tuple, List, Optional
import seaborn as sns

# Set up plotting style
plt.style.use('default')
sns.set_palette("husl")

## Load Sensor Data

Load the data from the source sensor into a pandas DataFrame for analysis.

In [None]:
def load_sensor_data(file_path: str) -> pd.DataFrame:
    """
    Load sensor data from file and prepare it for analysis.

    Args:
        file_path: Path to the sensor data file

    Returns:
        DataFrame with sensor data
    """
    try:
        # Load data (assuming CSV format - adjust as needed)
        df = pd.read_csv(file_path)

        # Convert timestamp column to datetime
        if 'timestamp' in df.columns:
            df['timestamp'] = pd.to_datetime(df['timestamp'])
        elif 'time' in df.columns:
            df['time'] = pd.to_datetime(df['time'])

        return df
    except FileNotFoundError:
        print(f"File {file_path} not found. Creating sample data based on observed patterns.")
        return create_sample_data()

def create_sample_data() -> pd.DataFrame:
    """
    Create sample data based on the patterns observed in the screenshots.

    Returns:
        DataFrame with sample sensor data
    """
    # Create timestamps for the observed period
    start_time = datetime(2025, 6, 21, 11, 0, 0)
    end_time = datetime(2025, 6, 21, 12, 30, 0)

    # Generate timestamps every minute
    timestamps = pd.date_range(start=start_time, end=end_time, freq='1min')

    # Create energy values based on observed pattern
    energy_values = []

    for i, ts in enumerate(timestamps):
        if ts.hour == 11:
            # Gradual increase during 11:00-12:00
            base_value = 60 + (i * 2.5)  # Gradual increase
        else:
            # Sharp increase after 12:00
            base_value = 60 + ((i - 60) * 15)  # Sharp increase

        # Add some noise
        noise = np.random.normal(0, 5)
        energy_values.append(max(0, base_value + noise))

    return pd.DataFrame({
        'timestamp': timestamps,
        'energy_kwh': energy_values,
        'sensor_id': 'computed_batt_charge_energy_today'
    })

# Load the data
sensor_data = load_sensor_data('sensor_data.csv')
print(f"Loaded {len(sensor_data)} data points")
print(f"Data columns: {sensor_data.columns.tolist()}")
print(f"Date range: {sensor_data['timestamp'].min()} to {sensor_data['timestamp'].max()}")

## Analyze Hourly Energy Increase

Calculate the hourly energy increase using the difference between consecutive timestamps in the data.

In [None]:
def calculate_energy_differences(df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate energy differences and rates of change.

    Args:
        df: DataFrame with timestamp and energy data

    Returns:
        DataFrame with additional analysis columns
    """
    df_analysis = df.copy()

    # Sort by timestamp
    df_analysis = df_analysis.sort_values('timestamp')

    # Calculate time differences (in minutes)
    df_analysis['time_diff_minutes'] = df_analysis['timestamp'].diff().dt.total_seconds() / 60

    # Calculate energy differences
    df_analysis['energy_diff_kwh'] = df_analysis['energy_kwh'].diff()

    # Calculate energy rate (kWh per minute)
    df_analysis['energy_rate_kwh_per_min'] = (
        df_analysis['energy_diff_kwh'] / df_analysis['time_diff_minutes']
    )

    # Calculate hourly rate
    df_analysis['energy_rate_kwh_per_hour'] = df_analysis['energy_rate_kwh_per_min'] * 60

    # Add hour for grouping
    df_analysis['hour'] = df_analysis['timestamp'].dt.hour

    return df_analysis

def analyze_hourly_patterns(df: pd.DataFrame) -> pd.DataFrame:
    """
    Analyze energy patterns by hour.

    Args:
        df: DataFrame with energy analysis data

    Returns:
        DataFrame with hourly statistics
    """
    hourly_stats = df.groupby('hour').agg({
        'energy_kwh': ['min', 'max', 'mean'],
        'energy_diff_kwh': ['sum', 'mean', 'std'],
        'energy_rate_kwh_per_hour': ['mean', 'max', 'std']
    }).round(2)

    # Flatten column names
    hourly_stats.columns = ['_'.join(col).strip() for col in hourly_stats.columns]

    return hourly_stats

# Perform analysis
analysis_data = calculate_energy_differences(sensor_data)
hourly_patterns = analyze_hourly_patterns(analysis_data)

print("Hourly Energy Patterns:")
print(hourly_patterns)

# Calculate total energy increase
total_increase = analysis_data['energy_kwh'].max() - analysis_data['energy_kwh'].min()
print(f"\nTotal energy increase: {total_increase:.2f} kWh")

# Identify peak rate periods
peak_rate_threshold = analysis_data['energy_rate_kwh_per_hour'].quantile(0.9)
peak_periods = analysis_data[analysis_data['energy_rate_kwh_per_hour'] > peak_rate_threshold]
print(f"\nPeak charging periods (>{peak_rate_threshold:.1f} kWh/hour):")
print(peak_periods[['timestamp', 'energy_kwh', 'energy_rate_kwh_per_hour']].head())

## Visualize Energy Data

Create plots to visualize the energy data over time and highlight the hourly increase.

In [None]:
def create_energy_visualizations(df: pd.DataFrame) -> None:
    """
    Create comprehensive visualizations of energy data.

    Args:
        df: DataFrame with energy analysis data
    """
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    fig.suptitle('Battery Energy Analysis - Hourly Charge Patterns', fontsize=16)

    # Plot 1: Energy over time
    axes[0, 0].plot(df['timestamp'], df['energy_kwh'], linewidth=2, color='blue')
    axes[0, 0].set_title('Energy Accumulation Over Time')
    axes[0, 0].set_xlabel('Time')
    axes[0, 0].set_ylabel('Energy (kWh)')
    axes[0, 0].grid(True, alpha=0.3)
    axes[0, 0].tick_params(axis='x', rotation=45)

    # Plot 2: Energy rate over time
    axes[0, 1].plot(df['timestamp'], df['energy_rate_kwh_per_hour'],
                    linewidth=2, color='red', alpha=0.7)
    axes[0, 1].set_title('Energy Charging Rate Over Time')
    axes[0, 1].set_xlabel('Time')
    axes[0, 1].set_ylabel('Charging Rate (kWh/hour)')
    axes[0, 1].grid(True, alpha=0.3)
    axes[0, 1].tick_params(axis='x', rotation=45)

    # Plot 3: Hourly energy differences
    hourly_diffs = df.groupby('hour')['energy_diff_kwh'].sum()
    axes[1, 0].bar(hourly_diffs.index, hourly_diffs.values,
                   color='green', alpha=0.7)
    axes[1, 0].set_title('Total Energy Increase by Hour')
    axes[1, 0].set_xlabel('Hour of Day')
    axes[1, 0].set_ylabel('Energy Increase (kWh)')
    axes[1, 0].grid(True, alpha=0.3)

    # Plot 4: Distribution of charging rates
    axes[1, 1].hist(df['energy_rate_kwh_per_hour'].dropna(),
                    bins=30, color='orange', alpha=0.7, edgecolor='black')
    axes[1, 1].set_title('Distribution of Charging Rates')
    axes[1, 1].set_xlabel('Charging Rate (kWh/hour)')
    axes[1, 1].set_ylabel('Frequency')
    axes[1, 1].grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

def create_detailed_timeline(df: pd.DataFrame) -> None:
    """
    Create a detailed timeline view similar to the original charts.

    Args:
        df: DataFrame with energy analysis data
    """
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))

    # Detailed energy timeline
    ax1.plot(df['timestamp'], df['energy_kwh'], linewidth=3, color='#1f77b4')
    ax1.fill_between(df['timestamp'], df['energy_kwh'], alpha=0.3, color='#1f77b4')
    ax1.set_title('Detailed Energy Accumulation Timeline', fontsize=14)
    ax1.set_ylabel('Energy (kWh)', fontsize=12)
    ax1.grid(True, alpha=0.3)
    ax1.tick_params(axis='x', rotation=45)

    # Add annotations for significant changes
    max_rate_idx = df['energy_rate_kwh_per_hour'].idxmax()
    if not pd.isna(max_rate_idx):
        max_rate_time = df.loc[max_rate_idx, 'timestamp']
        max_rate_energy = df.loc[max_rate_idx, 'energy_kwh']
        ax1.annotate(f'Peak Rate: {df.loc[max_rate_idx, "energy_rate_kwh_per_hour"]:.1f} kWh/h',
                    xy=(max_rate_time, max_rate_energy),
                    xytext=(10, 10), textcoords='offset points',
                    bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.7),
                    arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0'))

    # Energy rate changes
    ax2.plot(df['timestamp'], df['energy_rate_kwh_per_hour'],
             linewidth=2, color='red', marker='o', markersize=3)
    ax2.set_title('Energy Charging Rate Changes', fontsize=14)
    ax2.set_xlabel('Time', fontsize=12)
    ax2.set_ylabel('Rate (kWh/hour)', fontsize=12)
    ax2.grid(True, alpha=0.3)
    ax2.tick_params(axis='x', rotation=45)

    plt.tight_layout()
    plt.show()

# Create visualizations
create_energy_visualizations(analysis_data)
create_detailed_timeline(analysis_data)

## Explain Energy Discrepancy

Investigate potential reasons for the discrepancy, such as sensor errors, data aggregation issues, or incorrect calculations.