In [6]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error # Add this at the top!

# =====================
# CONFIG
# =====================
TICKERS = ["AAPL", "MSFT", "GOOGL", "AMZN", "TSLA", "META", "ZS", "OKTA", "DDOG"]
START_DATE = "2018-01-01"

def create_features(df):
    df = df.copy()
    # Handle multi-index columns immediately if they exist
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = df.columns.get_level_values(0)
    
    close = df["Close"]
    
    # 1. Volatility - ATR approx
    df['High_Low'] = df['High'] - df['Low']
    df['ATR'] = df['High_Low'].rolling(14).mean()
    
    # 2. Log Returns 
    df["Log_Ret"] = np.log(close / close.shift(1))
    
    # 3. Momentum (ROC)
    df["Momentum_10"] = close.pct_change(10)
    
    # 4. Standard Indicators
    df["SMA_20"] = close.rolling(20).mean()
    
    # RSI Calculation
    delta = close.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df["RSI_14"] = 100 - (100 / (1 + rs))
    
    df["Target_Ret"] = df["Log_Ret"].shift(-1)
    df.dropna(inplace=True)
    return df

def generate_signal(row):
    pred_return = row["Avg_Pred_Ret"]
    threshold = 0.005 # 0.5%
    
    if pred_return > threshold and row["RSI_14"] < 70:
        return "BUY"
    elif pred_return < -threshold and row["RSI_14"] > 30:
        return "SELL"
    else:
        return "HOLD"

# =====================
# ENGINE FUNCTION
# =====================


def run_prediction(ticker_to_predict):
    final_data = []
    
    for ticker in TICKERS:
        print(f"Processing {ticker}...")
        df_raw = yf.download(ticker, start=START_DATE, progress=False)
        
        if df_raw.empty:
            continue
            
        df = create_features(df_raw)
        features = ["Log_Ret", "Momentum_10", "RSI_14", "ATR", "SMA_20"]
        X = df[features]
        y = df["Target_Ret"] 

        split = int(len(df) * 0.8)
        X_train, X_test = X.iloc[:split], X.iloc[split:]
        y_train, y_test = y.iloc[:split], y.iloc[split:]

        # 1. Initialize Models
        rf = RandomForestRegressor(n_estimators=100, max_depth=5, random_state=42)
        xgb = XGBRegressor(n_estimators=100, learning_rate=0.05, max_depth=4)

        # 2. Train Models
        rf.fit(X_train, y_train)
        xgb.fit(X_train, y_train)

        # 3. EVALUATE IMMEDIATELY (This fixes your error)
        mae = mean_absolute_error(y_test, rf.predict(X_test))
        print(f"✅ {ticker} Trained. Test MAE: {mae:.6f}")

        # 4. Generate Predictions
        df["Avg_Pred_Ret"] = (rf.predict(X) + xgb.predict(X)) / 2
        df["Signal"] = df.apply(generate_signal, axis=1)
        df["Ticker"] = ticker
        
        # Strategy Returns
        df["Strategy_Ret"] = np.where(df["Signal"].shift(1) == "BUY", df["Log_Ret"], 0)
        df["Strategy_Ret"] = np.where(df["Signal"].shift(1) == "SELL", -df["Log_Ret"], df["Strategy_Ret"])
        df["Cumulative_Ret"] = df["Strategy_Ret"].cumsum()
        
        final_data.append(df)

    full_report = pd.concat(final_data)
    full_report.to_csv("optimized_trading_signals.csv")
    import pickle

    # 1. Prepare a dictionary of the results
    output_for_ensemble = {
        "ticker": ticker,
        "pred_return": df["Avg_Pred_Ret"].iloc[-1],
        "rsi": df["RSI_14"].iloc[-1],
        "atr": df["ATR"].iloc[-1],
        "raw_data": df.tail(5) # Sending last 5 days for context
    }

    # 2. Save to a pickle file
    file_name = f"{ticker}_model_output.pkl"
    with open(file_name, 'wb') as f:
        pickle.dump(output_for_ensemble, f)

    print(f"✅ Data for {ticker} pickled for the Ensemble file.")
    return full_report[full_report["Ticker"] == ticker].iloc[-1]
if __name__ == "__main__":
    # 1. Run the engine
    if __name__ == "__main__":
    # This loop runs the engine for EVERY ticker in your list
    # This creates all the .pkl files your API needs
        for t in TICKERS:
            try:
                run_prediction(t)
            except Exception as e:
                print(f"Error processing {t}: {e}")

        print("\n✅ All pickle files generated! Your API is now ready.")

        # 2. ADD THIS: Read the CSV we just created to show the full summary
        summary_df = pd.read_csv("optimized_trading_signals.csv")
        
        # 3. Get only the LATEST row for every ticker
        last_signals = summary_df.groupby('Ticker').tail(1)
        
        # 4. Print the "Live Dashboard" to your screen
        print("\n" + "="*50)
        print("      CURRENT MARKET SIGNALS SUMMARY")
        print("="*50)
        print(last_signals[['Ticker', 'Close', 'Signal', 'RSI_14', 'Avg_Pred_Ret']])
        print("="*50)

Processing AAPL...
✅ AAPL Trained. Test MAE: 0.013080
Processing MSFT...
✅ MSFT Trained. Test MAE: 0.009913
Processing GOOGL...
✅ GOOGL Trained. Test MAE: 0.014204
Processing AMZN...
✅ AMZN Trained. Test MAE: 0.014911
Processing TSLA...
✅ TSLA Trained. Test MAE: 0.029509
Processing META...
✅ META Trained. Test MAE: 0.017848
Processing ZS...
✅ ZS Trained. Test MAE: 0.017973
Processing OKTA...
✅ OKTA Trained. Test MAE: 0.017343
Processing DDOG...
✅ DDOG Trained. Test MAE: 0.019836
✅ Data for DDOG pickled for the Ensemble file.
Processing AAPL...
✅ AAPL Trained. Test MAE: 0.013088
Processing MSFT...
✅ MSFT Trained. Test MAE: 0.009909
Processing GOOGL...
✅ GOOGL Trained. Test MAE: 0.014201
Processing AMZN...
✅ AMZN Trained. Test MAE: 0.014911
Processing TSLA...
✅ TSLA Trained. Test MAE: 0.029509
Processing META...
✅ META Trained. Test MAE: 0.017069
Processing ZS...
✅ ZS Trained. Test MAE: 0.017973
Processing OKTA...
✅ OKTA Trained. Test MAE: 0.017343
Processing DDOG...
✅ DDOG Trained. Test

KeyboardInterrupt: 

In [None]:


def run_prediction(ticker_to_predict):
    final_data = []
    
    for ticker in TICKERS:
        print(f"Processing {ticker}...")
        df_raw = yf.download(ticker, start=START_DATE, progress=False)
        
        if df_raw.empty:
            continue
            
        df = create_features(df_raw)
        features = ["Log_Ret", "Momentum_10", "RSI_14", "ATR", "SMA_20"]
        X = df[features]
        y = df["Target_Ret"] 

        split = int(len(df) * 0.8)
        X_train, X_test = X.iloc[:split], X.iloc[split:]
        y_train, y_test = y.iloc[:split], y.iloc[split:]

        # 1. Initialize Models
        rf = RandomForestRegressor(n_estimators=100, max_depth=5, random_state=42)
        xgb = XGBRegressor(n_estimators=100, learning_rate=0.05, max_depth=4)

        # 2. Train Models
        rf.fit(X_train, y_train)
        xgb.fit(X_train, y_train)

        # 3. EVALUATE IMMEDIATELY (This fixes your error)
        mae = mean_absolute_error(y_test, rf.predict(X_test))
        print(f"✅ {ticker} Trained. Test MAE: {mae:.6f}")

        # 4. Generate Predictions
        df["Avg_Pred_Ret"] = (rf.predict(X) + xgb.predict(X)) / 2
        df["Signal"] = df.apply(generate_signal, axis=1)
        df["Ticker"] = ticker
        
        # Strategy Returns
        df["Strategy_Ret"] = np.where(df["Signal"].shift(1) == "BUY", df["Log_Ret"], 0)
        df["Strategy_Ret"] = np.where(df["Signal"].shift(1) == "SELL", -df["Log_Ret"], df["Strategy_Ret"])
        df["Cumulative_Ret"] = df["Strategy_Ret"].cumsum()
        
        final_data.append(df)

    full_report = pd.concat(final_data)
    full_report.to_csv("optimized_trading_signals.csv")
    import pickle

    # Inside run_prediction...
    # 1. Prepare a dictionary of the results
    output_for_ensemble = {
        "ticker": ticker,
        "pred_return": df["Avg_Pred_Ret"].iloc[-1],
        "rsi": df["RSI_14"].iloc[-1],
        "atr": df["ATR"].iloc[-1],
        "raw_data": df.tail(5) # Sending last 5 days for context
    }

    # 2. Save to a pickle file
    file_name = f"{ticker}_model_output.pkl"
    with open(file_name, 'wb') as f:
        pickle.dump(output_for_ensemble, f)

    print(f"✅ Data for {ticker} pickled for the Ensemble file.")
    return full_report[full_report["Ticker"] == ticker].iloc[-1]
if __name__ == "__main__":
    # 1. Run the engine
    ticker_to_view = "GOOGL" 
    result = run_prediction(ticker) 

    # 2. ADD THIS: Read the CSV we just created to show the full summary
    summary_df = pd.read_csv("optimized_trading_signals.csv")
    
    # 3. Get only the LATEST row for every ticker
    last_signals = summary_df.groupby('Ticker').tail(1)
    
    # 4. Print the "Live Dashboard" to your screen
    print("\n" + "="*50)
    print("      CURRENT MARKET SIGNALS SUMMARY")
    print("="*50)
    print(last_signals[['Ticker', 'Close', 'Signal', 'RSI_14', 'Avg_Pred_Ret']])
    print("="*50)

Processing AAPL...
✅ AAPL Trained. Test MAE: 0.013097
Processing MSFT...
✅ MSFT Trained. Test MAE: 0.009918
Processing GOOGL...
✅ GOOGL Trained. Test MAE: 0.014208
Processing AMZN...
✅ AMZN Trained. Test MAE: 0.014911
Processing TSLA...
✅ TSLA Trained. Test MAE: 0.029509
Processing META...
✅ META Trained. Test MAE: 0.017388
Processing ZS...
✅ ZS Trained. Test MAE: 0.017973
Processing OKTA...
✅ OKTA Trained. Test MAE: 0.017343
Processing DDOG...
✅ DDOG Trained. Test MAE: 0.019836
✅ Data for DDOG pickled for the Ensemble file.

      CURRENT MARKET SIGNALS SUMMARY
      Ticker       Close Signal     RSI_14  Avg_Pred_Ret
1991    AAPL  271.859985   SELL  31.277093     -0.006222
3983    MSFT  483.619995   HOLD  57.341837     -0.001016
5975   GOOGL  313.000000   HOLD  41.645432     -0.002330
7967    AMZN  230.820007   HOLD  48.034414     -0.001145
9959    TSLA  449.720001   SELL  49.359208     -0.008003
11951   META  660.090027   SELL  56.278618     -0.008854
13892     ZS  224.919998   HOLD 

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error # Add this at the top!

# =====================
# CONFIG
# =====================
TICKERS = ["AAPL", "MSFT", "GOOGL", "AMZN", "TSLA", "META", "ZS", "OKTA", "DDOG"]
START_DATE = "2018-01-01"

def create_features(df):
    df = df.copy()
    # Handle multi-index columns immediately if they exist
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = df.columns.get_level_values(0)
    
    close = df["Close"]
    
    # 1. Volatility - ATR approx
    df['High_Low'] = df['High'] - df['Low']
    df['ATR'] = df['High_Low'].rolling(14).mean()
    
    # 2. Log Returns 
    df["Log_Ret"] = np.log(close / close.shift(1))
    
    # 3. Momentum (ROC)
    df["Momentum_10"] = close.pct_change(10)
    
    # 4. Standard Indicators
    df["SMA_20"] = close.rolling(20).mean()
    
    # RSI Calculation
    delta = close.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df["RSI_14"] = 100 - (100 / (1 + rs))
    
    df["Target_Ret"] = df["Log_Ret"].shift(-1)
    df.dropna(inplace=True)
    return df

def generate_signal(row):
    pred_return = row["Avg_Pred_Ret"]
    threshold = 0.005 # 0.5%
    
    if pred_return > threshold and row["RSI_14"] < 70:
        return "BUY"
    elif pred_return < -threshold and row["RSI_14"] > 30:
        return "SELL"
    else:
        return "HOLD"

# =====================
# ENGINE FUNCTION
# =====================


def run_prediction(ticker_to_predict):
    final_data = []
    
    for ticker in TICKERS:
        print(f"Processing {ticker}...")
        df_raw = yf.download(ticker, start=START_DATE, progress=False)
        
        if df_raw.empty:
            continue
            
        df = create_features(df_raw)
        features = ["Log_Ret", "Momentum_10", "RSI_14", "ATR", "SMA_20"]
        X = df[features]
        y = df["Target_Ret"] 

        split = int(len(df) * 0.8)
        X_train, X_test = X.iloc[:split], X.iloc[split:]
        y_train, y_test = y.iloc[:split], y.iloc[split:]

        # 1. Initialize Models
        rf = RandomForestRegressor(n_estimators=100, max_depth=5, random_state=42)
        xgb = XGBRegressor(n_estimators=100, learning_rate=0.05, max_depth=4)

        # 2. Train Models
        rf.fit(X_train, y_train)
        xgb.fit(X_train, y_train)

        # 3. EVALUATE IMMEDIATELY (This fixes your error)
        mae = mean_absolute_error(y_test, rf.predict(X_test))
        print(f"✅ {ticker} Trained. Test MAE: {mae:.6f}")

        # 4. Generate Predictions
        df["Avg_Pred_Ret"] = (rf.predict(X) + xgb.predict(X)) / 2
        df["Signal"] = df.apply(generate_signal, axis=1)
        df["Ticker"] = ticker
        
        # Strategy Returns
        df["Strategy_Ret"] = np.where(df["Signal"].shift(1) == "BUY", df["Log_Ret"], 0)
        df["Strategy_Ret"] = np.where(df["Signal"].shift(1) == "SELL", -df["Log_Ret"], df["Strategy_Ret"])
        df["Cumulative_Ret"] = df["Strategy_Ret"].cumsum()
        
        final_data.append(df)

    full_report = pd.concat(final_data)
    full_report.to_csv("optimized_trading_signals.csv")
    import pickle

    # 1. Prepare a dictionary of the results
    output_for_ensemble = {
        "ticker": ticker,
        "pred_return": df["Avg_Pred_Ret"].iloc[-1],
        "rsi": df["RSI_14"].iloc[-1],
        "atr": df["ATR"].iloc[-1],
        "raw_data": df.tail(5) # Sending last 5 days for context
    }

    # 2. Save to a pickle file
    file_name = f"{ticker}_model_output.pkl"
    with open(file_name, 'wb') as f:
        pickle.dump(output_for_ensemble, f)

    print(f"✅ Data for {ticker} pickled for the Ensemble file.")
    return full_report[full_report["Ticker"] == ticker].iloc[-1]
if __name__ == "__main__":
    # 1. Run the engine
    if __name__ == "__main__":
    # This loop runs the engine for EVERY ticker in your list
    # This creates all the .pkl files your API needs
        for t in TICKERS:
            try:
                run_prediction(t)
            except Exception as e:
                print(f"Error processing {t}: {e}")

        print("\n✅ All pickle files generated! Your API is now ready.")

        # 2. ADD THIS: Read the CSV we just created to show the full summary
        summary_df = pd.read_csv("optimized_trading_signals.csv")
        
        # 3. Get only the LATEST row for every ticker
        last_signals = summary_df.groupby('Ticker').tail(1)
        
        # 4. Print the "Live Dashboard" to your screen
        print("\n" + "="*50)
        print("      CURRENT MARKET SIGNALS SUMMARY")
        print("="*50)
        print(last_signals[['Ticker', 'Close', 'Signal', 'RSI_14', 'Avg_Pred_Ret']])
        print("="*50)

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import pickle
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

# 1. Full Ticker List
TICKERS = ["AAPL", "MSFT", "GOOGL", "AMZN", "TSLA", "META", "NVDA", "NIO", "DDOG"] # Add all 50

def run_full_pipeline():
    # CREATE AN EMPTY DICTIONARY BEFORE THE LOOP
    master_rf_results = {} 

    for ticker in TICKERS:
        try:
            print(f"Processing {ticker}...")
            df_raw = yf.download(ticker, start="2018-01-01", progress=False)
            if df_raw.empty: continue
            
            # --- Feature Engineering ---
            df = df_raw.copy()
            if isinstance(df.columns, pd.MultiIndex): df.columns = df.columns.get_level_values(0)
            
            df["Log_Ret"] = np.log(df["Close"] / df["Close"].shift(1))
            df["RSI_14"] = 100 - (100 / (1 + (df["Close"].diff().where(df["Close"].diff() > 0, 0).rolling(14).mean() / 
                                              -df["Close"].diff().where(df["Close"].diff() < 0, 0).rolling(14).mean())))
            df["Target"] = df["Log_Ret"].shift(-1)
            df.dropna(inplace=True)

            # --- Training ---
            features = ["Log_Ret", "RSI_14"]
            X, y = df[features], df["Target"]
            model = RandomForestRegressor(n_estimators=50, max_depth=5).fit(X, y)

            # --- Store Result in Dictionary ---
            master_rf_results[ticker] = {
                "ticker": ticker,
                "prediction": float(model.predict(X.tail(1))[0]),
                "rsi": float(df["RSI_14"].iloc[-1])
            }
            
        except Exception as e:
            print(f"Error on {ticker}: {e}")

    # 2. SAVE AFTER THE LOOP FINISHES (Crucial Fix)
    with open("master_rf_results.pkl", "wb") as f:
        pickle.dump(master_rf_results, f)
    
    print(f"✅ Master file saved with {len(master_rf_results)} tickers.")

if __name__ == "__main__":
    run_full_pipeline()

Processing AAPL...
Processing MSFT...
Processing GOOGL...
Processing AMZN...
Processing TSLA...
Processing META...
Processing NVDA...
Processing NIO...
Processing DDOG...
✅ Master file saved with 9 tickers.
