In [59]:
import requests
from datetime import datetime, timedelta
import time
import os
import alpaca_trade_api as tradeapi
import pandas as pd
from transformers import pipeline, BertTokenizer, BertForSequenceClassification

In [60]:
top_50_sp500_stocks = [
    'AAPL',  # Apple Inc.
    'MSFT',  # Microsoft Corporation
    'AMZN',  # Amazon.com Inc.
    'NVDA',  # NVIDIA Corporation
    'GOOGL', # Alphabet Inc. (Class A)
    'GOOG',  # Alphabet Inc. (Class C)
    'TSLA',  # Tesla Inc.
    'META',  # Meta Platforms Inc.
    'BRK.B', # Berkshire Hathaway Inc. (Class B)
    'UNH',   # UnitedHealth Group Incorporated
    'JNJ',   # Johnson & Johnson
    'XOM',   # Exxon Mobil Corporation
    'V',     # Visa Inc.
    'PG',    # Procter & Gamble Co.
    'JPM',   # JPMorgan Chase & Co.
    'LLY',   # Eli Lilly and Company
    'MA',    # Mastercard Incorporated
    'HD',    # The Home Depot Inc.
    'CVX',   # Chevron Corporation
    'MRK',   # Merck & Co. Inc.
    'PEP',   # PepsiCo Inc.
    'ABBV',  # AbbVie Inc.
    'KO',    # The Coca-Cola Company
    'PFE',   # Pfizer Inc.
    'AVGO',  # Broadcom Inc.
    'COST',  # Costco Wholesale Corporation
    'MCD',   # McDonald's Corporation
    'TMO',   # Thermo Fisher Scientific Inc.
    'WMT',   # Walmart Inc.
    'DHR',   # Danaher Corporation
    'NKE',   # NIKE Inc.
    'DIS',   # The Walt Disney Company
    'ADBE',  # Adobe Inc.
    'NFLX',  # Netflix Inc.
    'VZ',    # Verizon Communications Inc.
    'CSCO',  # Cisco Systems Inc.
    'ABT',   # Abbott Laboratories
    'ACN',   # Accenture plc
    'NEE',   # NextEra Energy Inc.
    'LIN',   # Linde plc
    'TXN',   # Texas Instruments Incorporated
    'MDT',   # Medtronic plc
    'PM',    # Philip Morris International Inc.
    'WFC',   # Wells Fargo & Company
    'HON',   # Honeywell International Inc.
    'QCOM',  # QUALCOMM Incorporated
    'BMY',   # Bristol-Myers Squibb Company
    'LOW',   # Lowe's Companies Inc.
    'UNP',   # Union Pacific Corporation
    'RTX'    # Raytheon Technologies Corporation
]


In [61]:
# Alpaca API credentials
ALPACA_API_KEY = "PKDFQIPQCXE38TAG1WKG"
ALPACA_SECRET_KEY = "LRaWwamsMm0WDa58x8S0z8wje9gRcYhcgtMf7C55"
ALPACA_URL = 'https://paper-api.alpaca.markets'

# Initialize Alpaca API
alpaca = tradeapi.REST(ALPACA_API_KEY, ALPACA_SECRET_KEY, base_url=ALPACA_URL, api_version='v2')

In [62]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Load your data
data = pd.read_csv('stock_data.csv')

# Normalize the input features (open, high, low, close, volume, and sentiment score)
scaler = MinMaxScaler()
data[['open', 'high', 'low', 'close', 'volume', 'average_sentiment_score']] = scaler.fit_transform(
    data[['open', 'high', 'low', 'close', 'volume', 'average_sentiment_score']]
)

# Create the target variable (next day's closing price)
data['next_day_close'] = data.groupby('symbol')['close'].shift(-1)

# Drop any rows with NaN values (e.g., the last row for each symbol)
data.dropna(inplace=True)

# Split data into features and target
X = data[['open', 'high', 'low', 'close', 'volume', 'average_sentiment_score']]
y = data['next_day_close']


In [63]:
# load stock data csv into dataframe

stock_data = pd.read_csv('stock_data.csv')

stock_data.head()

Unnamed: 0,symbol,time,open,high,low,close,volume,date,average_sentiment_score,log_sentiment_score
0,AAPL,2024-07-24 00:00:00-04:00,224.115,224.765,217.165,218.585,1005989,2024-07-24 00:00:00.000000,0.974538,-0.025792
1,AAPL,2024-07-25 00:00:00-04:00,218.88,220.81,214.64,217.42,642703,2024-07-25 00:00:00.000000,0.947577,-0.053847
2,AAPL,2024-07-26 00:00:00-04:00,218.94,219.48,216.04,218.03,661067,2024-07-26 00:00:00.000000,0.972036,-0.028362
3,AAPL,2024-07-29 00:00:00-04:00,217.375,219.28,215.79,218.185,381966,2024-07-29 00:00:00.000000,0.955637,-0.045377
4,AAPL,2024-07-30 00:00:00-04:00,219.3,220.27,216.12,218.68,559407,2024-07-30 00:00:00.000000,0.964756,-0.03588


In [130]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from datetime import datetime, timedelta
import numpy as np

def add_features(df):
    """
    Add moving averages, EMA, and time trend to the dataframe.
    """
    df = df.copy()  # Avoid SettingWithCopyWarning
    df.loc[:, '5_day_sma'] = df['close'].rolling(window=5).mean()
    df.loc[:, '5_day_ema'] = df['close'].ewm(span=5, adjust=False).mean()
    df.loc[:, 'time_trend'] = np.arange(len(df))
    return df

def prepare_data(file_path='stock_data.csv'):
    """
    Prepare the data by handling missing values, adding features, and splitting into features and target.
    """
    # Load the stock data
    stock_data = pd.read_csv(file_path)

    # Convert 'time' column to datetime
    stock_data['time'] = pd.to_datetime(stock_data['time'])

    # Add new features
    stock_data = add_features(stock_data)

    # Prepare input features (X) and labels (y)
    feature_columns = ['close', 'log_sentiment_score', '5_day_sma', '5_day_ema', 'time_trend']
    X = stock_data[feature_columns]
    y = stock_data['close'].shift(-1)  # Next day's closing price as the target

    # Handle NaN values using SimpleImputer
    imputer = SimpleImputer(strategy='mean')
    X = imputer.fit_transform(X)

    # Drop the last row from X to match y's length
    X = X[:-1]
    y = y.dropna()

    # Scale features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    return X, y, imputer, scaler, feature_columns

def train_model(X, y):
    """
    Train a LinearRegression model on the provided data.
    """
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train a LinearRegression model
    model = LinearRegression()
    model.fit(X_train, y_train)

    return model

def predict_and_decide(symbols, model, imputer, scaler, feature_columns, file_path='stock_data.csv'):
    """
    Predict the next day's stock price for each symbol and decide whether to buy, sell, or hold.
    """
    # Load the stock data
    stock_data = pd.read_csv(file_path)

    decisions = []

    for symbol in symbols:
        # Filter the data for the given symbol
        symbol_data = stock_data[stock_data['symbol'] == symbol]

        # Add features
        symbol_data = add_features(symbol_data)

        # Get the latest available data
        latest_data = symbol_data.iloc[-1]

        # Prepare the input features for prediction
        X_latest = latest_data[feature_columns].values.reshape(1, -1)

        # Handle NaN values in the input features
        X_latest = imputer.transform(X_latest)

        # Scale the input features
        X_latest = scaler.transform(X_latest)

        # Predict the next day's price
        predicted_price = model.predict(X_latest)[0]

        current_price = latest_data['close']

        # Decision-making logic
        if predicted_price > current_price * 1.02:
            decision = 'buy'
        elif predicted_price < current_price * 0.98:
            decision = 'sell'
        else:
            decision = 'hold'

        decisions.append((symbol, decision, current_price, predicted_price))

    return decisions

# Example usage


# Prepare the data
X, y, imputer, scaler, feature_columns = prepare_data()

# Train the model
model = train_model(X, y)

# Get trading decisions for each symbol
decisions = predict_and_decide(top_50_sp500_stocks, model, imputer, scaler, feature_columns)




[('AAPL', 'hold', 225.91, 230.3719948440367),
 ('MSFT', 'hold', 417.04, 412.6170959008291),
 ('AMZN', 'buy', 176.87, 182.72455934939074),
 ('NVDA', 'buy', 125.53, 133.8239390451099),
 ('GOOGL', 'buy', 164.925, 171.16261792386916),
 ('GOOG', 'buy', 166.59, 172.75648250328868),
 ('TSLA', 'hold', 215.925, 219.9355247615386),
 ('META', 'hold', 535.51, 527.1978448850224),
 ('BRK.B', 'hold', 450.75, 445.3134496329904),
 ('UNH', 'hold', 578.55, 568.3838984263592),
 ('JNJ', 'buy', 162.35, 168.63559433697873),
 ('XOM', 'buy', 115.21, 123.38966282775834),
 ('V', 'hold', 268.58, 270.50953334535774),
 ('PG', 'buy', 169.96, 175.91657168532203),
 ('JPM', 'buy', 217.265, 222.12955416434187),
 ('LLY', 'sell', 953.52, 927.5926381720408),
 ('MA', 'hold', 469.64, 463.2694418815016),
 ('HD', 'hold', 366.78, 364.7420820008504),
 ('CVX', 'buy', 146.87, 153.79352527000793),
 ('MRK', 'buy', 116.665, 124.8503505489163),
 ('PEP', 'buy', 175.94, 181.68232879531),
 ('ABBV', 'buy', 197.19, 202.10321543614145),
 ('

In [137]:
def execute_trade(symbol, decision, hedge_sell_ratio=0.01, max_investment_ratio=0.01):
    account = alpaca.get_account()
    cash = float(account.cash)
    
    # Check if there's an existing position in the symbol
    position = alpaca.get_position(symbol) if symbol in [p.symbol for p in alpaca.list_positions()] else None
    current_qty = float(position.qty) if position else 0

    # Get the current price directly from the decisions tuple
    current_price = None
    for dec_symbol, dec, curr_price, _ in decisions:
        if dec_symbol == symbol:
            current_price = curr_price
            break

    if current_price is None:
        print(f"Current price for {symbol} not found in decisions.")
        return

    if decision == 'buy':
        # Calculate how much to buy based on available cash and hedging
        qty_to_buy = (cash * max_investment_ratio) / current_price
        alpaca.submit_order(
            symbol=symbol,
            qty=qty_to_buy,
            side='buy',
            type='market',
            time_in_force='day'
        )
    elif decision == 'sell' and current_qty > 0:
        # Calculate how much to sell, hedging by selling only a portion of current holdings
        qty_to_sell = current_qty * hedge_sell_ratio
        alpaca.submit_order(
            symbol=symbol,
            qty=qty_to_sell,
            side='sell',
            type='market',
            time_in_force='day'
        )

# Example usage integrated with the trading strategy
def main():
    # Prepare the data
    X, y, imputer, scaler, feature_columns = prepare_data()

    # Train the model
    model = train_model(X, y)

    # Define your list of symbols
    symbols = top_50_sp500_stocks

    # Get trading decisions for each symbol
    decisions = predict_and_decide(symbols, model, imputer, scaler, feature_columns)

    # Execute trades based on the decisions
    for symbol, decision, current_price, predicted_price in decisions:
        execute_trade(symbol, decision)

if __name__ == "__main__":
    main()


