<a href="https://colab.research.google.com/github/san-258/8-Days-prediction/blob/SPY/8_day_SPY_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ============================================================================
# SPY 8-Day VPA Predictor - Google Colab Version
# Volume Price Analysis system for predicting SPY movement 8 days ahead
# Run any day to get next prediction with advanced VPA analysis
# ============================================================================

# STEP 1: Install required packages
!pip install yfinance scikit-learn pandas numpy matplotlib seaborn scipy -q

# STEP 2: Import libraries
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, date
import warnings
import json
import sys
from typing import Tuple, Dict, Optional
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from scipy.signal import find_peaks

warnings.filterwarnings('ignore')

# Machine Learning
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import TimeSeriesSplit

# Set plotting style for Colab
plt.style.use('default')
sns.set_palette("husl")

print("📊 SPY VPA Predictor for Google Colab")
print("🎯 Advanced Volume Price Analysis System")
print("=" * 60)

class SPY_VPA_Colab:
    def __init__(self, target_days=8):
        """Initialize VPA predictor for Colab"""
        self.target_days = target_days
        self.scaler = RobustScaler()

        # VPA configuration
        self.config = {
            "vpa_settings": {
                "volume_spike_threshold": 1.5,
                "climax_volume_threshold": 2.0,
                "low_volume_threshold": 0.7,
                "institutional_flow_periods": [10, 20, 50]
            },
            "prediction": {
                "confidence_threshold": 60,
                "ensemble_weights": {
                    "vpa_model": 0.4,
                    "volume_flow": 0.3,
                    "price_action": 0.3
                }
            }
        }

        # Initialize storage
        self.spy_data = None
        self.vpa_data = None
        self.prediction_result = None

    def calculate_target_date(self, days_ahead: int = None) -> str:
        """Calculate target date N days from today (trading days only)"""
        if days_ahead is None:
            days_ahead = self.target_days

        current_date = datetime.now().date()
        target_date = current_date + timedelta(days=days_ahead)

        # Ensure target is a trading day (Monday-Friday)
        while target_date.weekday() >= 5:  # Saturday=5, Sunday=6
            target_date += timedelta(days=1)

        return target_date.strftime('%Y-%m-%d')

    def download_spy_data(self, period='10y') -> Optional[pd.DataFrame]:
        """Download SPY data optimized for Colab"""
        try:
            print("📊 Downloading SPY data...")

            # Download data with progress bar disabled for Colab
            data = yf.download('SPY', period=period, interval='1d', progress=False)

            # Handle MultiIndex columns
            if isinstance(data.columns, pd.MultiIndex):
                data.columns = data.columns.get_level_values(0)

            # Ensure required columns exist
            required_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
            missing_cols = [col for col in required_cols if col not in data.columns]

            if missing_cols:
                print(f"⚠️ Missing columns: {missing_cols}")
                # Try to map alternative column names
                for col in missing_cols:
                    for existing_col in data.columns:
                        if col.lower() in existing_col.lower():
                            data = data.rename(columns={existing_col: col})
                            break

            data = data.dropna()
            self.spy_data = data

            print(f"✅ Downloaded {len(data)} days of SPY data")
            print(f"📅 Data range: {data.index[0].date()} to {data.index[-1].date()}")
            print(f"💰 Current SPY price: ${data['Close'].iloc[-1]:.2f}")
            print(f"📊 Latest volume: {data['Volume'].iloc[-1]:,.0f}")

            return data

        except Exception as e:
            print(f"❌ Error downloading SPY data: {e}")
            return None

    def analyze_volume_price_action(self, data: pd.DataFrame) -> pd.DataFrame:
        """Comprehensive Volume Price Analysis"""
        try:
            print("🔍 Performing Volume Price Analysis...")

            df = data.copy()

            # ===== BASIC CALCULATIONS =====
            df['Daily_Return'] = df['Close'].pct_change()
            df['Price_Range'] = df['High'] - df['Low']
            df['True_Range'] = np.maximum(
                df['High'] - df['Low'],
                np.maximum(
                    abs(df['High'] - df['Close'].shift(1)),
                    abs(df['Low'] - df['Close'].shift(1))
                )
            )

            # ===== VOLUME ANALYSIS =====
            volume_periods = [5, 10, 20, 50]
            for period in volume_periods:
                df[f'Volume_MA_{period}'] = df['Volume'].rolling(period).mean()
                df[f'Volume_Ratio_{period}'] = df['Volume'] / df[f'Volume_MA_{period}']

            # VPA Pattern Recognition
            df['Volume_Spike'] = df['Volume_Ratio_20'] > self.config['vpa_settings']['volume_spike_threshold']
            df['Climax_Volume'] = df['Volume_Ratio_20'] > self.config['vpa_settings']['climax_volume_threshold']
            df['Low_Volume'] = df['Volume_Ratio_20'] < self.config['vpa_settings']['low_volume_threshold']

            # ===== EFFORT vs RESULT ANALYSIS =====
            volume_percentile = df['Volume'].rolling(20).rank(pct=True)
            price_move_percentile = df['Price_Range'].rolling(20).rank(pct=True)
            df['Effort_vs_Result'] = price_move_percentile / (volume_percentile + 0.01)

            # ===== VOLUME WEIGHTED AVERAGE PRICE (VWAP) =====
            typical_price = (df['High'] + df['Low'] + df['Close']) / 3
            df['VWAP'] = (typical_price * df['Volume']).rolling(20).sum() / df['Volume'].rolling(20).sum()
            df['Distance_from_VWAP'] = (df['Close'] - df['VWAP']) / df['VWAP']

            # ===== INSTITUTIONAL FLOW ANALYSIS =====
            df['Large_Volume_Threshold'] = df['Volume'].rolling(50).quantile(0.8)
            df['Institutional_Buy'] = (df['Volume'] > df['Large_Volume_Threshold']) & (df['Close'] > df['Open'])
            df['Institutional_Sell'] = (df['Volume'] > df['Large_Volume_Threshold']) & (df['Close'] < df['Open'])
            df['Cumulative_Inst_Flow'] = (df['Institutional_Buy'].astype(int) - df['Institutional_Sell'].astype(int)).cumsum()

            for period in self.config['vpa_settings']['institutional_flow_periods']:
                df[f'Inst_Flow_{period}d'] = df['Cumulative_Inst_Flow'].diff(period)

            # ===== MONEY FLOW ANALYSIS =====
            df['Raw_Money_Flow'] = typical_price * df['Volume']
            df['Positive_MF'] = np.where(typical_price > typical_price.shift(1), df['Raw_Money_Flow'], 0)
            df['Negative_MF'] = np.where(typical_price < typical_price.shift(1), df['Raw_Money_Flow'], 0)

            # Money Flow Index
            for period in [14, 21]:
                positive_mf = df['Positive_MF'].rolling(period).sum()
                negative_mf = df['Negative_MF'].rolling(period).sum()
                mf_ratio = positive_mf / (negative_mf + 1)
                df[f'MFI_{period}'] = 100 - (100 / (1 + mf_ratio))

            # Accumulation/Distribution Line
            clv = ((df['Close'] - df['Low']) - (df['High'] - df['Close'])) / (df['High'] - df['Low'] + 0.001)
            df['AD_Line'] = (clv * df['Volume']).cumsum()
            df['AD_Line_Change'] = df['AD_Line'].diff(10)

            # ===== VOLUME PROFILE APPROXIMATION =====
            # Point of Control (price with highest volume)
            df['POC'] = df['Close'].rolling(50).apply(
                lambda x: x.iloc[np.argmax(df.loc[x.index, 'Volume'].values)] if len(x) > 0 else x.iloc[-1]
            )
            df['Distance_from_POC'] = (df['Close'] - df['POC']) / df['POC']

            # ===== VPA COMPOSITE SCORE =====
            df['VPA_Score'] = self._calculate_vpa_composite_score(df)

            # ===== TECHNICAL INDICATORS =====
            # RSI
            for period in [14, 21]:
                delta = df['Close'].diff()
                gain = (delta.where(delta > 0, 0)).rolling(period).mean()
                loss = (-delta.where(delta < 0, 0)).rolling(period).mean()
                rs = gain / loss
                df[f'RSI_{period}'] = 100 - (100 / (1 + rs))

            # Price momentum
            for period in [5, 10, 20]:
                df[f'Price_Momentum_{period}'] = df['Close'].pct_change(period)

            self.vpa_data = df.dropna()
            print(f"✅ VPA analysis completed - {len(self.vpa_data)} days analyzed")

            return self.vpa_data

        except Exception as e:
            print(f"❌ Error in VPA analysis: {e}")
            return data

    def _calculate_vpa_composite_score(self, df: pd.DataFrame) -> pd.Series:
        """Calculate VPA composite score"""
        try:
            scores = []

            # Volume strength component
            if 'Volume_Ratio_20' in df.columns:
                vol_score = (df['Volume_Ratio_20'] - 1) / df['Volume_Ratio_20'].std()
                scores.append(vol_score * 0.3)

            # Effort vs Result component
            if 'Effort_vs_Result' in df.columns:
                effort_score = (df['Effort_vs_Result'] - df['Effort_vs_Result'].mean()) / df['Effort_vs_Result'].std()
                scores.append(effort_score * 0.2)

            # Money Flow component
            if 'MFI_14' in df.columns:
                mfi_score = (df['MFI_14'] - 50) / 30
                scores.append(mfi_score * 0.2)

            # Institutional flow component
            if 'Inst_Flow_20d' in df.columns:
                inst_score = df['Inst_Flow_20d'] / (df['Inst_Flow_20d'].std() + 1)
                scores.append(inst_score * 0.3)

            if scores:
                composite = sum(scores) / len(scores)
            else:
                composite = pd.Series(0, index=df.index)

            return composite.fillna(0)

        except Exception as e:
            print(f"Warning: Error calculating VPA score: {e}")
            return pd.Series(0, index=df.index)

    def prepare_vpa_features(self) -> Tuple[pd.DataFrame, list]:
        """Prepare features for VPA-based prediction"""
        try:
            print("🛠️ Preparing VPA features for modeling...")

            if self.vpa_data is None:
                raise ValueError("VPA data not available. Run analyze_volume_price_action first.")

            # Define feature categories
            volume_features = [
                'Volume_Ratio_5', 'Volume_Ratio_10', 'Volume_Ratio_20', 'Volume_Ratio_50'
            ]

            vpa_pattern_features = [
                'Volume_Spike', 'Climax_Volume', 'Low_Volume'
            ]

            price_action_features = [
                'Distance_from_VWAP', 'Distance_from_POC', 'Daily_Return'
            ]

            momentum_features = [
                'RSI_14', 'RSI_21', 'Price_Momentum_5', 'Price_Momentum_10', 'Price_Momentum_20'
            ]

            money_flow_features = [
                'MFI_14', 'MFI_21', 'AD_Line_Change'
            ]

            institutional_features = [
                'Inst_Flow_10d', 'Inst_Flow_20d', 'Inst_Flow_50d'
            ]

            composite_features = [
                'VPA_Score', 'Effort_vs_Result'
            ]

            # Combine all features
            all_potential_features = (volume_features + vpa_pattern_features + price_action_features +
                                    momentum_features + money_flow_features + institutional_features +
                                    composite_features)

            # Convert boolean features to integers
            feature_df = self.vpa_data.copy()
            for col in vpa_pattern_features:
                if col in feature_df.columns:
                    feature_df[col] = feature_df[col].astype(int)

            # Filter available features
            available_features = []
            for feature in all_potential_features:
                if feature in feature_df.columns and not feature_df[feature].isna().all():
                    available_features.append(feature)

            print(f"✅ Prepared {len(available_features)} VPA features")
            print("📊 Feature categories:")
            print(f"   • Volume: {len([f for f in available_features if 'Volume' in f])}")
            print(f"   • VPA Patterns: {len([f for f in available_features if f in vpa_pattern_features])}")
            print(f"   • Price Action: {len([f for f in available_features if f in price_action_features])}")
            print(f"   • Momentum: {len([f for f in available_features if f in momentum_features])}")
            print(f"   • Money Flow: {len([f for f in available_features if f in money_flow_features])}")
            print(f"   • Institutional: {len([f for f in available_features if f in institutional_features])}")

            return feature_df, available_features

        except Exception as e:
            print(f"❌ Error preparing VPA features: {e}")
            return self.vpa_data, []

    def train_vpa_models(self, feature_df: pd.DataFrame, feature_columns: list) -> Dict:
        """Train VPA-optimized ensemble models"""
        try:
            print("🤖 Training VPA ensemble models...")

            # Create target variable (N-day forward return)
            feature_df['Target_Return'] = feature_df['Close'].pct_change(self.target_days).shift(-self.target_days)

            # Prepare clean dataset
            model_data = feature_df[feature_columns + ['Target_Return', 'Close']].dropna()

            if len(model_data) < 50:
                raise ValueError(f"Insufficient data: {len(model_data)} samples (need at least 50)")

            # Features and target
            X = model_data[feature_columns].iloc[:-self.target_days]
            y = model_data['Target_Return'].iloc[:-self.target_days]

            # Current features for prediction
            current_features = model_data[feature_columns].iloc[-1:].values
            current_price = model_data['Close'].iloc[-1]

            print(f"📊 Training data: {len(X)} samples, {len(feature_columns)} features")

            # Initialize models
            models = {
                'VPA_Random_Forest': RandomForestRegressor(
                    n_estimators=150,
                    max_depth=10,
                    min_samples_split=5,
                    min_samples_leaf=3,
                    max_features='sqrt',
                    random_state=42,
                    n_jobs=-1
                ),
                'VPA_Gradient_Boost': GradientBoostingRegressor(
                    n_estimators=120,
                    max_depth=8,
                    learning_rate=0.08,
                    subsample=0.8,
                    random_state=42
                ),
                'VPA_Ridge': Ridge(alpha=2.0, random_state=42)
            }

            # Time series cross-validation
            tscv = TimeSeriesSplit(n_splits=3)
            results = {}

            for model_name, model in models.items():
                print(f"  Training {model_name}...")

                cv_scores = []
                cv_direction_acc = []

                # Cross-validation
                for train_idx, val_idx in tscv.split(X):
                    X_train_cv, X_val_cv = X.iloc[train_idx], X.iloc[val_idx]
                    y_train_cv, y_val_cv = y.iloc[train_idx], y.iloc[val_idx]

                    # Scale features for Ridge
                    if 'Ridge' in model_name:
                        X_train_scaled = self.scaler.fit_transform(X_train_cv)
                        X_val_scaled = self.scaler.transform(X_val_cv)

                        model.fit(X_train_scaled, y_train_cv)
                        y_pred_cv = model.predict(X_val_scaled)
                    else:
                        model.fit(X_train_cv, y_train_cv)
                        y_pred_cv = model.predict(X_val_cv)

                    mse = mean_squared_error(y_val_cv, y_pred_cv)
                    direction_acc = np.mean((y_pred_cv > 0) == (y_val_cv > 0))

                    cv_scores.append(mse)
                    cv_direction_acc.append(direction_acc)

                # Train final model
                if 'Ridge' in model_name:
                    X_scaled = self.scaler.fit_transform(X)
                    current_features_scaled = self.scaler.transform(current_features)

                    model.fit(X_scaled, y)
                    prediction = model.predict(current_features_scaled)[0]
                    feature_importance = None
                else:
                    model.fit(X, y)
                    prediction = model.predict(current_features)[0]
                    feature_importance = dict(zip(feature_columns, model.feature_importances_))

                results[model_name] = {
                    'model': model,
                    'cv_mse': np.mean(cv_scores),
                    'cv_direction_acc': np.mean(cv_direction_acc),
                    'cv_direction_std': np.std(cv_direction_acc),
                    'prediction': prediction,
                    'feature_importance': feature_importance,
                    'current_price': current_price
                }

                print(f"    Direction Accuracy: {np.mean(cv_direction_acc):.1%} ± {np.std(cv_direction_acc):.1%}")

            print("✅ Model training completed")
            return results

        except Exception as e:
            print(f"❌ Error training VPA models: {e}")
            return {}

    def create_ensemble_prediction(self, model_results: Dict) -> Dict:
        """Create weighted ensemble prediction"""
        try:
            if not model_results:
                return None

            print("🎯 Creating ensemble prediction...")

            # Ensemble weights
            weights = self.config['prediction']['ensemble_weights']

            weighted_prediction = 0
            total_weight = 0
            confidence_scores = []

            for model_name, result in model_results.items():
                # Assign weights based on model type
                if 'Random_Forest' in model_name:
                    weight = weights['vpa_model']
                elif 'Gradient' in model_name:
                    weight = weights['volume_flow']
                else:
                    weight = weights['price_action']

                # Adjust by performance
                performance_weight = weight * result['cv_direction_acc']

                weighted_prediction += result['prediction'] * performance_weight
                total_weight += performance_weight
                confidence_scores.append(result['cv_direction_acc'] * 100)

            final_prediction = weighted_prediction / total_weight if total_weight > 0 else 0

            # Calculate target price
            current_price = list(model_results.values())[0]['current_price']
            target_price = current_price * (1 + final_prediction)

            # Confidence calculation
            avg_confidence = np.mean(confidence_scores)
            prediction_std = np.std([r['prediction'] for r in model_results.values()])
            agreement_factor = max(0.5, 1 - prediction_std * 10)

            ensemble_confidence = min(95, max(50, avg_confidence * agreement_factor))

            # Direction
            direction = 'Bullish' if final_prediction > 0.005 else 'Bearish' if final_prediction < -0.005 else 'Neutral'

            prediction_result = {
                'prediction_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                'target_date': self.calculate_target_date(),
                'days_ahead': self.target_days,
                'current_price': current_price,
                'predicted_return': final_prediction,
                'target_price': target_price,
                'price_change': target_price - current_price,
                'direction': direction,
                'confidence': ensemble_confidence,
                'model_agreement': agreement_factor,
                'individual_predictions': {name: result['prediction'] for name, result in model_results.items()},
                'model_confidences': dict(zip(model_results.keys(), confidence_scores))
            }

            self.prediction_result = prediction_result
            return prediction_result

        except Exception as e:
            print(f"❌ Error creating ensemble prediction: {e}")
            return None

    def display_prediction_results(self, prediction: Dict, model_results: Dict):
        """Display comprehensive prediction results in Colab"""
        try:
            print("\n" + "="*80)
            print("📊 SPY VPA PREDICTION RESULTS")
            print("="*80)

            # Main prediction summary
            print(f"\n🎯 PREDICTION SUMMARY")
            print(f"{'─'*50}")
            print(f"Prediction Date: {prediction['prediction_date']}")
            print(f"Target Date: {prediction['target_date']} ({prediction['days_ahead']} days ahead)")
            print(f"Current SPY Price: ${prediction['current_price']:.2f}")
            print(f"Target Price: ${prediction['target_price']:.2f}")
            print(f"Expected Return: {prediction['predicted_return']:.2%}")
            print(f"Price Change: ${prediction['price_change']:+.2f}")
            print(f"Direction: {prediction['direction']}")
            print(f"VPA Confidence: {prediction['confidence']:.1f}%")
            print(f"Model Agreement: {prediction['model_agreement']:.1%}")

            # Signal strength assessment
            print(f"\n⚡ SIGNAL ASSESSMENT")
            print(f"{'─'*50}")
            confidence = prediction['confidence']
            if confidence > 75:
                print("🟢 HIGH CONFIDENCE VPA SIGNAL")
                print("• Strong volume-price patterns detected")
                print("• Multiple VPA indicators align")
                print("• High probability directional signal")
            elif confidence > 65:
                print("🟡 MODERATE CONFIDENCE VPA SIGNAL")
                print("• Some VPA patterns support prediction")
                print("• Mixed volume-price signals")
                print("• Moderate directional probability")
            else:
                print("🔴 LOW CONFIDENCE VPA SIGNAL")
                print("• Conflicting volume-price patterns")
                print("• Unclear institutional sentiment")
                print("• High uncertainty - proceed with caution")

            # Direction-specific analysis
            direction = prediction['direction']
            pred_return = prediction['predicted_return']

            print(f"\n📈 VPA DIRECTIONAL ANALYSIS")
            print(f"{'─'*50}")

            if direction == 'Bullish':
                print("🐂 BULLISH VPA SIGNALS:")
                if abs(pred_return) > 0.02:
                    print("• Strong institutional buying pressure detected")
                    print("• Volume supports significant upward move")
                    print("• Money flow indicates accumulation")
                else:
                    print("• Moderate bullish volume patterns")
                    print("• Some institutional interest detected")
                    print("• Price likely to test higher levels")

                print("\n💡 BULLISH TRADING STRATEGY:")
                print("• Look for volume confirmation on breakouts")
                print("• Monitor for continued institutional flow")
                print("• Watch volume at resistance levels")

            elif direction == 'Bearish':
                print("🐻 BEARISH VPA SIGNALS:")
                if abs(pred_return) > 0.02:
                    print("• Strong institutional selling pressure detected")
                    print("• Volume supports significant downward move")
                    print("• Money flow indicates distribution")
                else:
                    print("• Moderate bearish volume patterns")
                    print("• Some institutional selling detected")
                    print("• Price likely to test lower levels")

                print("\n💡 BEARISH TRADING STRATEGY:")
                print("• Watch for volume confirmation on breakdowns")
                print("• Monitor for climax selling volume")
                print("• Look for support tests with volume")

            else:
                print("⚖️ NEUTRAL VPA SIGNALS:")
                print("• Balanced volume-price relationship")
                print("• No clear institutional bias")
                print("• Range-bound trading expected")

                print("\n💡 NEUTRAL TRADING STRATEGY:")
                print("• Trade range with volume confirmation")
                print("• Wait for clear VPA breakout signals")
                print("• Monitor accumulation/distribution patterns")

            # Model breakdown
            print(f"\n🤖 MODEL ENSEMBLE BREAKDOWN")
            print(f"{'─'*50}")
            for model_name, confidence_score in prediction['model_confidences'].items():
                individual_pred = prediction['individual_predictions'][model_name]
                print(f"{model_name:<25} | Return: {individual_pred:+6.2%} | Confidence: {confidence_score:5.1f}%")

            # Risk assessment
            print(f"\n⚠️ RISK ASSESSMENT")
            print(f"{'─'*50}")

            if confidence >= 75:
                risk_level = "LOW-MODERATE"
                print(f"Risk Level: {risk_level}")
                print("• High confidence VPA signal")
                print("• Good risk/reward setup")
            elif confidence >= 60:
                risk_level = "MODERATE"
                print(f"Risk Level: {risk_level}")
                print("• Moderate confidence signal")
                print("• Standard position sizing recommended")
            else:
                risk_level = "HIGH"
                print(f"Risk Level: {risk_level}")
                print("• Low confidence signal")
                print("• Reduced position sizing advised")

            if abs(pred_return) > 0.03:
                print("• Large predicted move - higher volatility expected")

            if prediction['model_agreement'] < 0.7:
                print("• Models show disagreement - use extra caution")

            # Key levels to watch
            print(f"\n🔍 KEY LEVELS TO MONITOR")
            print(f"{'─'*50}")
            print(f"Current Price: ${prediction['current_price']:.2f}")
            print(f"Target Price: ${prediction['target_price']:.2f}")
            midpoint = (prediction['current_price'] + prediction['target_price']) / 2
            print(f"Key Midpoint: ${midpoint:.2f}")

            # VPA monitoring guidelines
            print(f"\n📊 VPA MONITORING GUIDELINES")
            print(f"{'─'*50}")
            print("• Watch for volume spikes at key price levels")
            print("• Monitor institutional flow indicators")
            print("• Track effort vs result patterns")
            print("• Look for money flow divergences")
            print("• Observe volume profile changes")

            print("\n" + "="*80)

        except Exception as e:
            print(f"❌ Error displaying results: {e}")

    def plot_vpa_analysis(self, model_results: Dict = None):
        """Create VPA analysis plots for Colab"""
        try:
            if self.vpa_data is None or len(self.vpa_data) == 0:
                print("⚠️ No VPA data available for plotting")
                return

            # Create subplot figure
            fig, axes = plt.subplots(2, 2, figsize=(16, 12))
            fig.suptitle('SPY VPA Analysis Dashboard', fontsize=16, fontweight='bold')

            # Get recent data for plotting
            recent_data = self.vpa_data.tail(60)  # Last 60 days
            dates = recent_data.index

            # Plot 1: Price with Volume Overlay
            ax1 = axes[0, 0]
            ax1_vol = ax1.twinx()

            ax1.plot(dates, recent_data['Close'], 'b-', linewidth=2, label='SPY Close')
            ax1.plot(dates, recent_data['VWAP'], 'orange', linewidth=1, alpha=0.8, label='VWAP')

            # Color volume bars by VPA patterns
            volume_colors = ['green' if close > open_price else 'red' for close, open_price in zip(recent_data['Close'], recent_data['Open'])]

            ax1_vol.bar(dates, recent_data['Volume'], color=volume_colors, alpha=0.6, label='Volume')
            ax1_vol.plot(dates, recent_data['Volume_MA_20'], 'purple', linewidth=1, alpha=0.8, label='20-Day Vol MA')


            ax1.set_ylabel('Price ($)', color='b')
            ax1_vol.set_ylabel('Volume', color='gray')
            ax1.tick_params(axis='y', labelcolor='b')
            ax1_vol.tick_params(axis='y', labelcolor='gray')
            ax1.set_title('Price and Volume with VWAP')
            fig.autofmt_xdate()
            ax1.legend(loc='upper left')
            ax1_vol.legend(loc='upper right')
            ax1.grid(True, linestyle='--', alpha=0.6)


            # Plot 2: VPA Composite Score and Effort vs Result
            ax2 = axes[0, 1]
            ax2.plot(dates, recent_data['VPA_Score'], 'g-', linewidth=1.5, label='VPA Composite Score')
            ax2_evar = ax2.twinx()
            ax2_evar.plot(dates, recent_data['Effort_vs_Result'], 'brown', linewidth=1, alpha=0.7, label='Effort vs Result')

            ax2.axhline(0, color='gray', linestyle='--', linewidth=0.8) # Neutral line for VPA Score
            ax2.set_ylabel('VPA Score', color='g')
            ax2_evar.set_ylabel('Effort vs Result', color='brown')
            ax2.tick_params(axis='y', labelcolor='g')
            ax2_evar.tick_params(axis='y', labelcolor='brown')
            ax2.set_title('VPA Score and Effort vs Result')
            fig.autofmt_xdate()
            ax2.legend(loc='upper left')
            ax2_evar.legend(loc='upper right')
            ax2.grid(True, linestyle='--', alpha=0.6)

            # Plot 3: Institutional Flow
            ax3 = axes[1, 0]
            ax3.plot(dates, recent_data['Cumulative_Inst_Flow'], 'purple', linewidth=1.5, label='Cumulative Inst Flow')
            for period in self.config['vpa_settings']['institutional_flow_periods']:
                if f'Inst_Flow_{period}d' in recent_data.columns:
                     ax3.plot(dates, recent_data[f'Inst_Flow_{period}d'], linestyle='--', alpha=0.7, label=f'{period}-Day Inst Flow')

            ax3.axhline(0, color='gray', linestyle='--', linewidth=0.8) # Neutral line
            ax3.set_ylabel('Institutional Flow')
            ax3.set_title('Institutional Money Flow')
            fig.autofmt_xdate()
            ax3.legend()
            ax3.grid(True, linestyle='--', alpha=0.6)

            # Plot 4: Money Flow Index (MFI)
            ax4 = axes[1, 1]
            if 'MFI_14' in recent_data.columns:
                 ax4.plot(dates, recent_data['MFI_14'], 'teal', linewidth=1.5, label='MFI (14)')
            if 'MFI_21' in recent_data.columns:
                 ax4.plot(dates, recent_data['MFI_21'], 'cyan', linewidth=1, alpha=0.8, label='MFI (21)')

            ax4.axhline(20, color='green', linestyle='--', linewidth=0.8, label='Oversold (20)')
            ax4.axhline(80, color='red', linestyle='--', linewidth=0.8, label='Overbought (80)')
            ax4.axhline(50, color='gray', linestyle='--', linewidth=0.8, label='Neutral (50)')
            ax4.set_ylabel('MFI Value')
            ax4.set_title('Money Flow Index (MFI)')
            fig.autofmt_xdate()
            ax4.legend()
            ax4.grid(True, linestyle='--', alpha=0.6)

            plt.tight_layout(rect=[0, 0.03, 1, 0.95]) # Adjust layout to prevent title overlap
            plt.show()

        except Exception as e:
            print(f"❌ Error generating VPA plots: {e}")


    def run_complete_vpa_analysis(self):
        """Run the full VPA analysis and prediction"""
        print("🚀 Starting SPY VPA Prediction and Analysis...")

        # 1. Download data
        data = self.download_spy_data()
        if data is None or data.empty:
            print("🚫 Failed to download data. Exiting.")
            return

        # 2. Analyze Volume Price Action
        self.vpa_data = self.analyze_volume_price_action(data)
        if self.vpa_data.empty:
            print("🚫 VPA analysis failed or returned empty data. Exiting.")
            return

        # 3. Prepare features
        feature_df, feature_columns = self.prepare_vpa_features()
        if not feature_columns:
            print("🚫 Failed to prepare VPA features. Exiting.")
            return

        # 4. Train models
        model_results = self.train_vpa_models(feature_df, feature_columns)
        if not model_results:
            print("🚫 Failed to train VPA models. Exiting.")
            return

        # 5. Create ensemble prediction
        prediction = self.create_ensemble_prediction(model_results)
        if prediction is None:
            print("🚫 Failed to create ensemble prediction. Exiting.")
            return

        self.prediction_result = prediction

        # 6. Display results
        self.display_prediction_results(prediction, model_results)

        # 7. Plot analysis
        self.plot_vpa_analysis(model_results)

        print("✅ SPY VPA Analysis and Prediction complete.")
        return prediction


# Example Usage (can be run in a separate cell after defining the class)
# predictor = SPY_VPA_Colab(target_days=8)
# result = predictor.run_complete_vpa_analysis()

📊 SPY VPA Predictor for Google Colab
🎯 Advanced Volume Price Analysis System


In [None]:
# Example Usage: Run the complete VPA analysis
predictor = SPY_VPA_Colab(target_days=6)
result = predictor.run_complete_vpa_analysis()

# You can then inspect the 'result' variable to see the prediction details
# print(result)

🚀 Starting SPY VPA Prediction and Analysis...
📊 Downloading SPY data...
✅ Downloaded 2514 days of SPY data
📅 Data range: 2015-08-31 to 2025-08-28
💰 Current SPY price: $648.92
📊 Latest volume: 57,419,428
🔍 Performing Volume Price Analysis...
✅ VPA analysis completed - 2464 days analyzed
🛠️ Preparing VPA features for modeling...
✅ Prepared 23 VPA features
📊 Feature categories:
   • Volume: 7
   • VPA Patterns: 3
   • Price Action: 3
   • Momentum: 5
   • Money Flow: 3
   • Institutional: 3
🤖 Training VPA ensemble models...
📊 Training data: 2452 samples, 23 features
  Training VPA_Random_Forest...
    Direction Accuracy: 57.6% ± 4.2%
  Training VPA_Gradient_Boost...
    Direction Accuracy: 50.6% ± 3.9%
  Training VPA_Ridge...
    Direction Accuracy: 48.9% ± 3.5%
✅ Model training completed
🎯 Creating ensemble prediction...

📊 SPY VPA PREDICTION RESULTS

🎯 PREDICTION SUMMARY
──────────────────────────────────────────────────
Prediction Date: 2025-08-29 01:27:36
Target Date: 2025-09-04 (6 da