# Economic way to graph a simple moving average

In [None]:
from collections import deque
import pandas as pd
from datetime import datetime

class StreamingMovingAverage:
    """
    Efficient streaming moving average calculator that maintains state
    and calculates MA incrementally as new data points arrive.
    
    Uses a circular buffer (deque) for O(1) operations and running sum
    to avoid recalculating the entire average each time.
    """
    
    def __init__(self, window_size):
        """
        Initialize the streaming moving average calculator.
        
        Parameters:
        -----------
        window_size : int
            Number of data points to include in the moving average
        """
        self.window_size = window_size
        self.buffer = deque(maxlen=window_size)  # Circular buffer with fixed size
        self.sum = 0.0  # Running sum for O(1) average calculation
        self.timestamps = deque(maxlen=window_size)  # Store corresponding timestamps
        
    def add_data_point(self, timestamp, price):
        """
        Add a new data point and calculate the updated moving average.
        
        Parameters:
        -----------
        timestamp : datetime or str
            Timestamp of the data point
        price : float
            Price value
            
        Returns:
        --------
        dict
            Dictionary containing current MA, count of data points, and metadata
        """
        # Convert timestamp to datetime if it's a string
        if isinstance(timestamp, str):
            timestamp = pd.to_datetime(timestamp)
        
        # Check if buffer is full and we need to remove the oldest value
        if len(self.buffer) == self.window_size:
            # Remove the oldest value from the sum
            oldest_price = self.buffer[0]  # This will be automatically removed by deque
            self.sum -= oldest_price
        
        # Add the new price to buffer and sum
        self.buffer.append(price)
        self.timestamps.append(timestamp)
        self.sum += price
        
        # Calculate current moving average
        current_ma = self.sum / len(self.buffer)
        
        return {
            'timestamp': timestamp,
            'price': price,
            'moving_average': current_ma,
            'data_points': len(self.buffer),
            'is_full_window': len(self.buffer) == self.window_size,
            'window_start': self.timestamps[0] if self.timestamps else None,
            'window_end': self.timestamps[-1] if self.timestamps else None
        }
    
    def get_current_ma(self):
        """Get the current moving average without adding new data."""
        if len(self.buffer) == 0:
            return None
        return self.sum / len(self.buffer)
    
    def get_buffer_info(self):
        """Get information about the current buffer state."""
        return {
            'window_size': self.window_size,
            'current_count': len(self.buffer),
            'is_full': len(self.buffer) == self.window_size,
            'current_sum': self.sum,
            'current_ma': self.get_current_ma(),
            'oldest_timestamp': self.timestamps[0] if self.timestamps else None,
            'newest_timestamp': self.timestamps[-1] if self.timestamps else None,
            'price_range': (min(self.buffer), max(self.buffer)) if self.buffer else None
        }
    
    def reset(self):
        """Reset the moving average calculator."""
        self.buffer.clear()
        self.timestamps.clear()
        self.sum = 0.0

# Convenience function for simple usage
def create_streaming_ma(window_size):
    """
    Create a new streaming moving average calculator.
    
    Parameters:
    -----------
    window_size : int
        Number of data points in the moving average window
        
    Returns:
    --------
    StreamingMovingAverage
        New streaming MA calculator instance
    """
    return StreamingMovingAverage(window_size)


# Enhanced Streaming Moving Averages

In [None]:
# üöÄ COMPREHENSIVE TIME-BASED STREAMING MA DEMONSTRATION
print("üöÄ TIME-BASED STREAMING MA WITH REAL DATASET")
print("=" * 50)

if 'df' in locals() and df is not None and len(df) > 0:
    print(f"üìä Processing DataFrame with {len(df):,} data points")
    print(f"   ‚Ä¢ Time Range: {df['timestamp'].min()} to {df['timestamp'].max()}")
    print(f"   ‚Ä¢ Price Range: {df['price'].min():.5f} to {df['price'].max():.5f}")
    print(f"   ‚Ä¢ Data Frequency: ~{(df['timestamp'].iloc[1] - df['timestamp'].iloc[0]).total_seconds():.0f} seconds")
    
    # Create time-based moving averages with different time windows
    time_windows = ['1min', '5min', '15min', '30min']
    print(f"\nüïê Creating time-based MAs with multiple time windows:")
    
    # Create calculators for each time window (using EMA for responsiveness)
    time_calculators = {}
    for window in time_windows:
        time_calculators[window] = {
            'SMA': TimeBasedStreamingMA(window, ma_type='SMA'),
            'EMA': TimeBasedStreamingMA(window, ma_type='EMA'),
            'DEMA': TimeBasedStreamingMA(window, ma_type='DEMA'),
            'TEMA': TimeBasedStreamingMA(window, ma_type='TEMA')
        }
        print(f"   ‚úÖ {window} window: SMA, EMA (Œ±={time_calculators[window]['EMA'].alpha:.4f}), DEMA, TEMA")
    
    # Process the DataFrame through all time-based MAs
    print(f"\n‚ö° Processing {len(df)} data points through time-based MAs...")
    
    time_based_results = []
    
    # Process each row in the DataFrame
    for idx, row in df.iterrows():
        timestamp = row['timestamp']
        price = row['price']
        
        result_dict = {
            'timestamp': timestamp,
            'price': price
        }
        
        # Process through each time window and MA type
        for window in time_windows:
            for ma_type in ['SMA', 'EMA', 'DEMA', 'TEMA']:
                calc = time_calculators[window][ma_type]
                result = calc.add_data_point(timestamp, price)
                
                # Store results with descriptive column names
                col_name = f'{ma_type}_{window}'
                result_dict[col_name] = result['moving_average']
                result_dict[f'{col_name}_points'] = result['data_points_count']
                result_dict[f'{col_name}_full'] = result['is_full_window']
        
        time_based_results.append(result_dict)
        
        # Show progress for large datasets
        if (idx + 1) % 500 == 0 or idx == len(df) - 1:
            print(f"   Processed: {idx + 1:,}/{len(df):,} ({100*(idx+1)/len(df):.1f}%)")
    
    # Convert results to DataFrame
    time_df = pd.DataFrame(time_based_results)
    
    print(f"‚úÖ Processing complete!")
    print(f"\nüìà TIME-BASED STREAMING MA RESULTS:")
    print(f"   ‚Ä¢ Total processed: {len(time_df):,} data points")
    print(f"   ‚Ä¢ Time windows: {time_windows}")
    print(f"   ‚Ä¢ MA types per window: SMA, EMA, DEMA, TEMA")
    print(f"   ‚Ä¢ Total columns: {len(time_df.columns)}")
    
    # Show when each window becomes full
    print(f"\nüéØ FULL WINDOW TIMING:")
    for window in time_windows:
        for ma_type in ['SMA', 'EMA', 'DEMA', 'TEMA']:
            full_col = f'{ma_type}_{window}_full'
            if full_col in time_df.columns:
                full_idx = time_df[time_df[full_col]].index[0] if time_df[full_col].any() else None
                if full_idx is not None:
                    full_time = time_df.loc[full_idx, 'timestamp']
                    elapsed = (full_time - time_df['timestamp'].iloc[0]).total_seconds() / 60
                    print(f"   ‚Ä¢ {ma_type}_{window}: Full at {full_time.strftime('%H:%M:%S')} ({elapsed:.1f}min elapsed)")
    
    # Show latest values comparison
    print(f"\nüìä LATEST VALUES COMPARISON:")
    latest = time_df.iloc[-1]
    print(f"   ‚Ä¢ Timestamp: {latest['timestamp']}")
    print(f"   ‚Ä¢ Current Price: {latest['price']:.5f}")
    print()
    
    for window in time_windows:
        print(f"   {window} window:")
        for ma_type in ['SMA', 'EMA', 'DEMA', 'TEMA']:
            col_name = f'{ma_type}_{window}'
            if col_name in latest:
                ma_value = latest[col_name]
                diff = ma_value - latest['price']
                diff_pct = (diff / latest['price']) * 100
                points = latest.get(f'{col_name}_points', 'N/A')
                print(f"     ‚Ä¢ {ma_type}: {ma_value:.5f} ({diff:+.5f}, {diff_pct:+.3f}%) [{points} pts]")
        print()
    
    # Calculate responsiveness metrics
    print(f"\nüìä TIME-BASED MA RESPONSIVENESS ANALYSIS:")
    print("=" * 55)
    
    # Calculate price change correlation for different time windows
    time_df['price_change'] = time_df['price'].diff()
    
    for window in time_windows:
        print(f"\n{window} Time Window:")
        for ma_type in ['SMA', 'EMA', 'DEMA', 'TEMA']:
            col_name = f'{ma_type}_{window}'
            if col_name in time_df.columns:
                ma_change = time_df[col_name].diff()
                correlation = time_df['price_change'].corr(ma_change)
                responsiveness = correlation * 100
                volatility = time_df[col_name].std()
                
                print(f"   ‚Ä¢ {ma_type}: {responsiveness:6.1f}% responsiveness, {volatility:.6f} volatility")
    
    # Show recent data comparison
    print(f"\nüìã RECENT TIME-BASED MA DATA (Last 5 rows):")
    recent_cols = ['timestamp', 'price'] + [f'{ma}_{window}' for window in ['1min', '5min'] for ma in ['SMA', 'EMA', 'DEMA', 'TEMA']]
    available_cols = [col for col in recent_cols if col in time_df.columns]
    print(time_df[available_cols].tail(5).to_string(index=False, float_format='%.5f'))
    
    # Store time-based results
    df_time_based = time_df.copy()
    
    print(f"\nüíæ Results stored in 'df_time_based' variable")
    print(f"   ‚Ä¢ Contains: {list(df_time_based.columns)[:10]}... ({len(df_time_based.columns)} total)")
    
    # Show final calculator states
    print(f"\nüîç TIME-BASED CALCULATOR FINAL STATES:")
    for window in time_windows:
        print(f"\n   {window} window calculators:")
        for ma_type in ['SMA', 'EMA', 'DEMA', 'TEMA']:
            calc = time_calculators[window][ma_type]
            info = calc.get_time_window_info()
            print(f"     ‚Ä¢ {ma_type}: {info['data_points_count']:,} points, "
                  f"{info['window_utilization']:.1f}% utilized, "
                  f"MA: {info['current_ma']:.5f}")
    
    print(f"\nüéØ TIME-BASED MA ADVANTAGES:")
    print("   ‚úÖ Uses actual time windows instead of fixed row counts")
    print("   ‚úÖ Automatically handles irregular data intervals")
    print("   ‚úÖ Time-weighted EMA calculations for better accuracy")
    print("   ‚úÖ Consistent behavior regardless of data frequency")
    print("   ‚úÖ More intuitive window specifications (e.g., '5min', '1hour')")
    print("   ‚úÖ Better for real-world trading applications")

else:
    print("‚ùå No DataFrame 'df' found or DataFrame is empty")
    print("   Please run the data generation cells first")