In [20]:
import pandas as pd
import requests
from datetime import datetime, timedelta
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib

In [21]:
API_KEY = "vFDjkUVRfPnedLrbRjm75BZ9CJHz3dfv"
TICKER = "AAPL"
START_DATE = "2025-10-01"
END_DATE = "2025-11-01"

In [22]:
def pull_polygon_data(ticker, start, end, api_key):
    url = f"https://api.polygon.io/v2/aggs/ticker/{ticker}/range/1/minute/{start}/{end}?apiKey={api_key}"
    response = requests.get(url)
    data = response.json()
    
    if 'results' not in data or len(data['results']) < 2:
        raise ValueError("Not enough data returned from Polygon API")
    
    df = pd.DataFrame(data['results'])
    df['timestamp'] = pd.to_datetime(df['t'], unit='ms')
    df = df.rename(columns={'o':'open','h':'high','l':'low','c':'close','v':'volume'})
    df = df[['timestamp','open','high','low','close','volume']]
    return df

In [23]:
def calculate_features(df):
    df = df.copy()
    
    # Momentum & volatility
    df['momentum_1min'] = df['close'].pct_change()
    df['volatility_1min'] = df['momentum_1min'] ** 2
    
    # Price direction
    df['price_direction'] = (df['close'] > df['open']).astype(int)
    
    # VWAP and deviation
    df['vwap'] = (df['close'] * df['volume']).cumsum() / df['volume'].cumsum()
    df['vwap_dev'] = (df['close'] - df['vwap']) / df['vwap']
    
    # Time features
    df['hour'] = df['timestamp'].dt.hour
    df['minute'] = df['timestamp'].dt.minute
    
    # Target: next-minute movement
    df['next_return'] = df['close'].shift(-1) / df['close'] - 1
    df['target'] = (df['next_return'] > 0).astype(int)
    
    df = df.dropna()
    return df

In [24]:
df = pull_polygon_data(TICKER, START_DATE, END_DATE, API_KEY)
df = calculate_features(df)

features = ['momentum_1min', 'volatility_1min', 'price_direction', 'vwap_dev', 'hour', 'minute']
X = df[features]
y = df['target']

# Chronological train/test split
split_index = int(len(X)*0.8)
X_train, X_test = X.iloc[:split_index], X.iloc[split_index:]
y_train, y_test = y.iloc[:split_index], y.iloc[split_index:]

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

# Save model
joblib.dump(model, "trained_stock_model.pkl")

Accuracy: 0.526


['trained_stock_model.pkl']

In [25]:
def get_recent_minute_decision(ticker, api_key, model, prob_threshold=0.55):
    today = datetime.utcnow().date()
    start = today - timedelta(days=1)
    
    # Pull data
    df = pull_polygon_data(ticker, start, today, api_key)
    
    # Only last 2 minutes needed for momentum, volatility, price direction
    last_two = df.iloc[-2:]
    
    momentum_1min = (last_two['close'].iloc[1] - last_two['close'].iloc[0]) / last_two['close'].iloc[0]
    volatility_1min = momentum_1min ** 2
    price_direction = int(last_two['close'].iloc[1] > last_two['open'].iloc[1])
    
    # VWAP deviation using cumulative VWAP
    vwap = (df['close'] * df['volume']).cumsum() / df['volume'].cumsum()
    vwap_dev = (last_two['close'].iloc[1] - vwap.iloc[-1]) / vwap.iloc[-1]
    
    hour = last_two['timestamp'].iloc[1].hour
    minute = last_two['timestamp'].iloc[1].minute
    
    feature_row = pd.DataFrame([{
        'momentum_1min': momentum_1min,
        'volatility_1min': volatility_1min,
        'price_direction': price_direction,
        'vwap_dev': vwap_dev,
        'hour': hour,
        'minute': minute
    }])
    
    # Model prediction
    pred_proba = model.predict_proba(feature_row)[0]
    
    # Decision logic with HOLD for uncertain predictions
    if pred_proba[1] > prob_threshold:
        decision = "BUY"
    elif pred_proba[0] > prob_threshold:
        decision = "SELL"
    else:
        decision = "HOLD"
    
    return decision, last_two.iloc[1], feature_row, pred_proba

In [26]:
model = joblib.load("trained_stock_model.pkl")

decision, last_bar, features, pred_proba = get_recent_minute_decision(TICKER, API_KEY, model)
print("Decision:", decision)
print("Last bar:\n", last_bar)
print("Features:\n", features)
print("Predicted probabilities (SELL, BUY):", pred_proba)

  today = datetime.utcnow().date()


Decision: SELL
Last bar:
 timestamp    2025-11-06 00:58:00
open                      269.77
high                      269.86
low                       269.77
close                     269.85
volume                    1243.0
Name: 709, dtype: object
Features:
    momentum_1min  volatility_1min  price_direction  vwap_dev  hour  minute
0       0.001299         0.000002                1   0.00077     0      58
Predicted probabilities (SELL, BUY): [0.63 0.37]
