In [77]:
import pandas as pd
import numpy as np
%pip install matplotlib
import matplotlib.pyplot as plt
from datetime import datetime
import os

1578.70s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


Note: you may need to restart the kernel to use updated packages.


In [78]:
# Configuration
DATA_FILE = '/home/lr/Documents/FUTURUES_PROJECT/DATA/cleaned_futures_data.csv'
OUTPUT_DIR = '/home/lr/Documents/FUTURUES_PROJECT/RESULTS'
SESSION_TYPE = 'RTH'  # Regular Trading Hours
PRICE_PRECISION = 0.25  # Price increment for volume profile bins

In [79]:
# Create output directory if it doesn't exist
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

def load_data(filepath):
    """Load and preprocess the futures data."""
    print(f"Loading data from {filepath}...")
    
    # Load data from CSV
    df = pd.read_csv(filepath)
    
    # Convert timestamp to datetime
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    
    # Extract date for session grouping
    df['date'] = df['timestamp'].dt.date
    
    print(f"Data loaded: {len(df)} rows")
    print(f"Date range: {df['date'].min()} to {df['date'].max()}")
    
    return df


In [80]:
def calculate_volume_profile(session_df, price_precision=PRICE_PRECISION):
    """Calculate volume profile for a single session."""
    # Find min and max prices
    min_price = min(session_df['low'].min(), session_df['open'].min(), session_df['close'].min())
    max_price = max(session_df['high'].max(), session_df['open'].max(), session_df['close'].max())
    
    # Round to nearest tick
    min_price = np.floor(min_price / price_precision) * price_precision
    max_price = np.ceil(max_price / price_precision) * price_precision
    
    # Create price bins
    price_bins = np.arange(min_price, max_price + price_precision, price_precision)
    
    # Create empty volume profile
    volume_profile = pd.DataFrame({
        'price_level': price_bins,
        'volume': 0
    })
    
        # Distribute volume across price levels within each bar
    for _, row in session_df.iterrows():
        # Calculate price range for the bar
        bar_min = min(row['low'], row['open'], row['close'])
        bar_max = max(row['high'], row['open'], row['close'])
        
        # Find bins that fall within this bar's range
        mask = (volume_profile['price_level'] >= bar_min) & (volume_profile['price_level'] <= bar_max)
        
        # Count how many bins are in this range
        bins_count = mask.sum()
        
        if bins_count > 0:
            # Distribute volume equally across the bins
            volume_per_bin = row['volume'] / bins_count
            volume_profile.loc[mask, 'volume'] += volume_per_bin
    
    return volume_profile

In [81]:
def find_vpoc(volume_profile):
    """Find the Volume Point of Control (VPOC)."""
    vpoc_idx = volume_profile['volume'].argmax()
    vpoc_price = volume_profile.iloc[vpoc_idx]['price_level']
    
    return vpoc_price

def find_value_area(volume_profile, value_area_pct=0.7):
    """Calculate the Value Area (typically 70% of volume)."""
    # Sort by volume in descending order
    sorted_profile = volume_profile.sort_values('volume', ascending=False)
    
    # Calculate cumulative volume
    total_volume = sorted_profile['volume'].sum()
    target_volume = total_volume * value_area_pct
    
    cum_volume = 0
    value_area_prices = []
    
    # Add price levels until we reach the target volume
    for _, row in sorted_profile.iterrows():
        value_area_prices.append(row['price_level'])
        cum_volume += row['volume']
        
        if cum_volume >= target_volume:
            break
    
    # Return min and max of the value area
    return min(value_area_prices), max(value_area_prices)

In [82]:
def plot_volume_profile(volume_profile, vpoc, val, vah, session_title, output_file=None):
    """Plot volume profile with VPOC and value area."""
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Plot volume profile (horizontal bars)
    ax.barh(volume_profile['price_level'], volume_profile['volume'], 
           height=volume_profile['price_level'].diff().iloc[1],
           color='steelblue', alpha=0.7)
    
    # Highlight VPOC
    vpoc_idx = (volume_profile['price_level'] - vpoc).abs().argmin()
    ax.barh(volume_profile.iloc[vpoc_idx]['price_level'], 
           volume_profile.iloc[vpoc_idx]['volume'], 
           height=volume_profile['price_level'].diff().iloc[1],
           color='crimson', alpha=0.8)
    
    # Highlight value area
    val_mask = (volume_profile['price_level'] >= val) & (volume_profile['price_level'] <= vah)
    ax.barh(volume_profile.loc[val_mask, 'price_level'], 
           volume_profile.loc[val_mask, 'volume'], 
           height=volume_profile['price_level'].diff().iloc[1],
           color='cornflowerblue', alpha=0.5)
    
    # Add annotations
    ax.axhline(y=vpoc, color='crimson', linestyle='--', alpha=0.7, linewidth=1.5)
    ax.axhline(y=val, color='navy', linestyle=':', alpha=0.7, linewidth=1.5)
    ax.axhline(y=vah, color='navy', linestyle=':', alpha=0.7, linewidth=1.5)
    
    # Add text labels
    max_volume = volume_profile['volume'].max() * 1.05
    ax.text(max_volume, vpoc, f' VPOC: {vpoc:.2f}', verticalalignment='center', 
           color='crimson', fontweight='bold')
    ax.text(max_volume, val, f' VAL: {val:.2f}', verticalalignment='center', 
           color='navy', fontweight='bold')
    ax.text(max_volume, vah, f' VAH: {vah:.2f}', verticalalignment='center', 
           color='navy', fontweight='bold')
    
    # Set labels and title
    ax.set_xlabel('Volume')
    ax.set_ylabel('Price')
    ax.set_title(f'Volume Profile - {session_title}', fontsize=14)
    
    # Add grid
    ax.grid(True, linestyle='--', alpha=0.6)
    
    # Invert y-axis for natural price direction (higher prices at top)
    ax.invert_yaxis()
    
    plt.tight_layout()
    
    if output_file:
        plt.savefig(output_file, dpi=300)
        plt.close(fig)
        return None
    else:
        return fig, ax

In [83]:
def plot_vpoc_migrations(dates, vpocs, session_type, output_file=None):
    """Plot VPOC migrations over time."""
    fig, ax = plt.subplots(figsize=(12, 6))
    
    # Convert dates to datetime if they're strings
    if isinstance(dates[0], str):
        date_objects = [pd.to_datetime(date) for date in dates]
    else:
        date_objects = dates
    
    # Plot VPOC line
    ax.plot(date_objects, vpocs, '-o', linewidth=2, markersize=5, color='mediumblue')
    
    # Add markers for VPOC changes
    for i in range(1, len(dates)):
        prev_vpoc = vpocs[i-1]
        curr_vpoc = vpocs[i]
        
        # Determine color based on direction (green for up, red for down)
        if curr_vpoc > prev_vpoc:
            color = 'green'
            direction = 'up'
        elif curr_vpoc < prev_vpoc:
            color = 'red'
            direction = 'down'
        else:
            color = 'gray'
            direction = 'none'
        
        # Draw arrow for significant migrations
        if abs(curr_vpoc - prev_vpoc) > 0.5:  # Only show significant migrations
            ax.annotate('', 
                       xy=(date_objects[i], curr_vpoc), 
                       xytext=(date_objects[i-1], prev_vpoc),
                       arrowprops=dict(arrowstyle='->',
                                     color=color,
                                     lw=1.5,
                                     alpha=0.7))
    
    # Format x-axis for dates
    plt.gcf().autofmt_xdate()
    
    # Set labels and title
    ax.set_xlabel('Date')
    ax.set_ylabel('VPOC Price')
    ax.set_title(f'VPOC Migrations - {session_type} Sessions', fontsize=14)
    
    # Add grid
    ax.grid(True, linestyle='--', alpha=0.6)
    
    plt.tight_layout()
    
    if output_file:
        plt.savefig(output_file, dpi=300)
        plt.close(fig)
        return None
    else:
        return fig, ax

In [84]:
def detect_vpoc_migration(previous_vpoc, current_vpoc, min_migration=1.0):
    """Detect if VPOC has migrated significantly."""
    diff = current_vpoc - previous_vpoc
    
    if abs(diff) >= min_migration:
        direction = 'up' if diff > 0 else 'down'
        return True, direction
    else:
        return False, 'none'

In [85]:
def find_migration_trends(dates, vpocs, min_consecutive=3, min_migration=1.0):
    """Find trends in VPOC migrations."""
    if len(dates) < min_consecutive + 1:
        return []
    
    trends = []
    current_direction = None
    consecutive_count = 0
    start_idx = 0
    
    for i in range(1, len(dates)):
        prev_vpoc = vpocs[i-1]
        curr_vpoc = vpocs[i]
        
        migrated, direction = detect_vpoc_migration(prev_vpoc, curr_vpoc, min_migration)
        
        if migrated and direction != 'none':
            if current_direction is None:
                # Start new trend
                current_direction = direction
                consecutive_count = 1
                start_idx = i - 1
            elif direction == current_direction:
                # Continue trend
                consecutive_count += 1
            else:
                # Direction changed, check if previous trend is significant
                if consecutive_count >= min_consecutive:
                    trend_start = dates[start_idx]
                    trend_end = dates[i-1]
                    
                    trends.append({
                        'start_date': trend_start,
                        'end_date': trend_end,
                        'direction': current_direction,
                        'consecutive_count': consecutive_count,
                        'vpoc_start': vpocs[start_idx],
                        'vpoc_end': vpocs[i-1],
                        'vpoc_change': vpocs[i-1] - vpocs[start_idx]
                    })
                
                # Start new trend
                current_direction = direction
                consecutive_count = 1
                start_idx = i - 1
        else:
            # No migration, check if previous trend is significant
            if current_direction is not None and consecutive_count >= min_consecutive:
                trend_start = dates[start_idx]
                trend_end = dates[i-1]
                
                trends.append({
                    'start_date': trend_start,
                    'end_date': trend_end,
                    'direction': current_direction,
                    'consecutive_count': consecutive_count,
                    'vpoc_start': vpocs[start_idx],
                    'vpoc_end': vpocs[i-1],
                    'vpoc_change': vpocs[i-1] - vpocs[start_idx]
                })
            
            # Reset trend
            current_direction = None
            consecutive_count = 0
    
    # Check for trend at the end of the data
    if current_direction is not None and consecutive_count >= min_consecutive:
        trend_start = dates[start_idx]
        trend_end = dates[-1]
        
        trends.append({
            'start_date': trend_start,
            'end_date': trend_end,
            'direction': current_direction,
            'consecutive_count': consecutive_count,
            'vpoc_start': vpocs[start_idx],
            'vpoc_end': vpocs[-1],
            'vpoc_change': vpocs[-1] - vpocs[start_idx]
        })
    
    return trends

In [86]:
def main():
    print(f"Simple VPOC Calculator")
    print(f"=====================")
    
    # Load data
    df = load_data(DATA_FILE)
    
    # Filter to selected session type
    session_df = df[df['session'] == SESSION_TYPE]
    print(f"Filtered to {SESSION_TYPE} sessions: {len(session_df)} rows")
    
    # Group by date
    grouped = session_df.groupby('date')
    print(f"Found {len(grouped)} unique {SESSION_TYPE} sessions")
    
    # Process each session
    print(f"\nCalculating volume profiles and VPOCs...")
    
    results = []
    dates = []
    vpocs = []
    
    for date, group in grouped:
        # Calculate volume profile
        volume_profile = calculate_volume_profile(group)
        
        # Find VPOC
        vpoc = find_vpoc(volume_profile)
        
        # Calculate value area
        val, vah = find_value_area(volume_profile)
        
        # Store results
        results.append({
            'date': date,
            'vpoc': vpoc,
            'value_area_low': val,
            'value_area_high': vah,
            'value_area_width': vah - val,
            'volume_profile': volume_profile,
            'total_volume': group['volume'].sum()
        })
        
        dates.append(date)
        vpocs.append(vpoc)
    
    # Save VPOC data to CSV
    vpoc_data = [{
        'date': r['date'],
        'vpoc': r['vpoc'],
        'value_area_low': r['value_area_low'],
        'value_area_high': r['value_area_high'],
        'value_area_width': r['value_area_width'],
        'total_volume': r['total_volume']
    } for r in results]
    
    vpoc_df = pd.DataFrame(vpoc_data)
    vpoc_output_file = os.path.join(OUTPUT_DIR, f'{SESSION_TYPE}_vpoc_data.csv')
    vpoc_df.to_csv(vpoc_output_file, index=False)
    print(f"Saved VPOC data to {vpoc_output_file}")
    
    # Plot VPOC migrations
    print(f"\nPlotting VPOC migrations...")
    migration_chart_file = os.path.join(OUTPUT_DIR, f'{SESSION_TYPE}_vpoc_migrations.png')
    plot_vpoc_migrations(dates, vpocs, SESSION_TYPE, migration_chart_file)
    print(f"Saved VPOC migration chart to {migration_chart_file}")
    
    # Find migration trends
    trends = find_migration_trends(dates, vpocs, min_consecutive=3, min_migration=1.0)
    print(f"Found {len(trends)} significant VPOC migration trends")
    
    # Save trends to CSV
    if trends:
        trends_df = pd.DataFrame(trends)
        trends_output_file = os.path.join(OUTPUT_DIR, f'{SESSION_TYPE}_vpoc_trends.csv')
        trends_df.to_csv(trends_output_file, index=False)
        print(f"Saved VPOC trend data to {trends_output_file}")
    
    # Generate sample volume profile visualizations (first 3 sessions)
    print(f"\nGenerating sample volume profile visualizations...")
    sample_results = results[:3]  # First 3 sessions
    
    for result in sample_results:
        date = result['date']
        profile_file = os.path.join(OUTPUT_DIR, f'{SESSION_TYPE}_profile_{date}.png')
        
        plot_volume_profile(
            result['volume_profile'],
            result['vpoc'],
            result['value_area_low'],
            result['value_area_high'],
            f"Session {date}",
            profile_file
        )
        print(f"Saved volume profile for {date} to {profile_file}")
    
    print(f"\nVPOC calculation and analysis complete!")
    print(f"Results saved to {OUTPUT_DIR} directory")
    
    # Print summary of trends
    if trends:
        print("\nSummary of VPOC Migration Trends:")
        print("=================================")
        for i, trend in enumerate(trends, 1):
            print(f"Trend #{i}:")
            print(f"  Direction: {trend['direction'].upper()}")
            print(f"  Duration: {trend['consecutive_count']} sessions")
            print(f"  Date Range: {trend['start_date']} to {trend['end_date']}")
            print(f"  VPOC Change: {trend['vpoc_change']:.2f} points")
            print()

if __name__ == "__main__":
    main()


Simple VPOC Calculator
Loading data from /home/lr/Documents/FUTURUES_PROJECT/DATA/cleaned_futures_data.csv...
Data loaded: 92207 rows
Date range: 2021-12-05 to 2022-03-10
Filtered to RTH sessions: 26125 rows
Found 69 unique RTH sessions

Calculating volume profiles and VPOCs...


  volume_profile.loc[mask, 'volume'] += volume_per_bin
  volume_profile.loc[mask, 'volume'] += volume_per_bin
  volume_profile.loc[mask, 'volume'] += volume_per_bin
  volume_profile.loc[mask, 'volume'] += volume_per_bin
  volume_profile.loc[mask, 'volume'] += volume_per_bin
  volume_profile.loc[mask, 'volume'] += volume_per_bin
  volume_profile.loc[mask, 'volume'] += volume_per_bin
 28.71428571]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  volume_profile.loc[mask, 'volume'] += volume_per_bin
  volume_profile.loc[mask, 'volume'] += volume_per_bin
  volume_profile.loc[mask, 'volume'] += volume_per_bin
 59.71428571]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  volume_profile.loc[mask, 'volume'] += volume_per_bin
  volume_profile.loc[mask, 'volume'] += volume_per_bin
  volume_profile.loc[mask, 'volume'] += volume_per_bin
  volume_profile.loc[mask, 'volume'] += volume_per_bin
  volume_profile.loc[mask,

Saved VPOC data to /home/lr/Documents/FUTURUES_PROJECT/RESULTS/RTH_vpoc_data.csv

Plotting VPOC migrations...
Saved VPOC migration chart to /home/lr/Documents/FUTURUES_PROJECT/RESULTS/RTH_vpoc_migrations.png
Found 9 significant VPOC migration trends
Saved VPOC trend data to /home/lr/Documents/FUTURUES_PROJECT/RESULTS/RTH_vpoc_trends.csv

Generating sample volume profile visualizations...
Saved volume profile for 2021-12-06 to /home/lr/Documents/FUTURUES_PROJECT/RESULTS/RTH_profile_2021-12-06.png
Saved volume profile for 2021-12-07 to /home/lr/Documents/FUTURUES_PROJECT/RESULTS/RTH_profile_2021-12-07.png
Saved volume profile for 2021-12-08 to /home/lr/Documents/FUTURUES_PROJECT/RESULTS/RTH_profile_2021-12-08.png

VPOC calculation and analysis complete!
Results saved to /home/lr/Documents/FUTURUES_PROJECT/RESULTS directory

Summary of VPOC Migration Trends:
Trend #1:
  Direction: DOWN
  Duration: 3 sessions
  Date Range: 2021-12-10 to 2021-12-15
  VPOC Change: -78.00 points

Trend #2:
  