In [20]:
import alpaca
from alpaca.data.historical import StockHistoricalDataClient
from alpaca.data.requests import StockBarsRequest
from alpaca.data.timeframe import TimeFrame
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
import matplotlib.pyplot as plt

# Set your API key and secret
API_KEY = 'PKPJWD9MRK2Z3T4L41CK'
API_SECRET = 'wkyHvvmfGRdMr9H57KddPrD8zwG3sQ0KwO4GlojT'

# Initialize the Alpaca data client
client = StockHistoricalDataClient(API_KEY, API_SECRET)

# Define the function to get historical data
def get_historical_data(symbol, start_date, end_date):
    request_params = StockBarsRequest(
        symbol_or_symbols=symbol,
        timeframe=TimeFrame.Day,
        start=start_date,
        end=end_date
    )
    bars = client.get_stock_bars(request_params).df
    return bars

# Calculate RSI
def calculate_rsi(data, window=14):
    delta = data['close'].diff()
    gain = (delta.where(delta > 0, 0)).fillna(0)
    loss = (-delta.where(delta < 0, 0)).fillna(0)
    avg_gain = gain.rolling(window=window, min_periods=1).mean()
    avg_loss = loss.rolling(window=window, min_periods=1).mean()
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

# Calculate EMA
def calculate_ema(data, span=20):
    ema = data['close'].ewm(span=span, adjust=False).mean()
    return ema

# Calculate SMA
def calculate_sma(data, window=7):
    sma = data['close'].rolling(window=window).mean()
    return sma

# Calculate Bollinger Bands
def calculate_bollinger_bands(data, window=20):
    rolling_mean = data['close'].rolling(window=window).mean()
    rolling_std = data['close'].rolling(window=window).std()
    bollinger_upper = rolling_mean + (rolling_std * 2)
    bollinger_lower = rolling_mean - (rolling_std * 2)
    return bollinger_upper, rolling_mean, bollinger_lower

# Calculate MACD
def calculate_macd(data, short_window=12, long_window=26, signal_window=9):
    short_ema = data['close'].ewm(span=short_window, adjust=False).mean()
    long_ema = data['close'].ewm(span=long_window, adjust=False).mean()
    macd = short_ema - long_ema
    signal = macd.ewm(span=signal_window, adjust=False).mean()
    return macd, signal

# Calculate KD (Stochastic Oscillator)
def calculate_kd(data, window=14):
    low_min = data['low'].rolling(window=window).min()
    high_max = data['high'].rolling(window=window).max()
    k = 100 * (data['close'] - low_min) / (high_max - low_min)
    d = k.rolling(window=3).mean()
    return k, d

# Prepare data for LSTM model
def prepare_data_for_lstm(data, n_lookback):
    X, y = [], []
    for i in range(n_lookback, len(data)):
        X.append(data[i-n_lookback:i])
        y.append(data[i, -1])  # 'adjusted close' price is the target
    return np.array(X), np.array(y)

# Define the model creation function
model = Sequential()
model.add(LSTM(100, return_sequences=True, input_shape=(30,17), activation='tanh'))
model.add(Dropout(0.2))  # Dropout layer to prevent overfitting
model.add(Dense(25, activation='tanh'))
model.add(Dense(1))

# Get today's date and the date one year ago
end_date = datetime.now() - timedelta(days=2)
start_date = end_date - timedelta(days=1500)

# Specify the ticker
ticker = 'MSFT'

# Fetch historical data for the specified ticker and calculate indicators
print(f"Fetching data for {ticker}")
try:
    data = get_historical_data(ticker, start_date, end_date)
    data['RSI'] = calculate_rsi(data)
    data['EMA_20'] = calculate_ema(data, span=20)
    data['EMA_50'] = calculate_ema(data, span=50)
    data['EMA_100'] = calculate_ema(data, span=100)
    data['EMA_200'] = calculate_ema(data, span=200)
    data['SMA_7'] = calculate_sma(data, window=7)
    data['SMA_21'] = calculate_sma(data, window=21)
    data['Bollinger_Upper'], data['Bollinger_Middle'], data['Bollinger_Lower'] = calculate_bollinger_bands(data)
    data['MACD'], data['MACD_Signal'] = calculate_macd(data)
    data['%K'], data['%D'] = calculate_kd(data)
    data = data.drop(columns=['trade_count', 'volume', 'vwap'])  # Drop the specified columns

    # Ensure 'date' is a column and reset index if necessary
    data = data.reset_index()
    data.rename(columns={'timestamp': 'date'}, inplace=True)

    # Fill missing values
    data = data.fillna(method='ffill').fillna(method='bfill')

except Exception as e:
    print(f"Could not fetch data for {ticker}: {e}")

# Select relevant features for model input
X_features = ['open', 'high', 'low', 'RSI', 'EMA_20', 'EMA_50', 'EMA_100',
              'EMA_200', 'SMA_7', 'SMA_21', 'Bollinger_Upper',
              'Bollinger_Middle', 'Bollinger_Lower', 'MACD', 'MACD_Signal', '%K', '%D']
y_feature = 'close'

# Split the data into training and testing sets
train_size = int(len(data) * 0.7)
train_data = data[:train_size]
test_data = data[train_size:]

# Normalize the data
scaler = MinMaxScaler()
train_scaled = scaler.fit_transform(train_data[X_features])
test_scaled = scaler.transform(test_data[X_features])

# Prepare the data for LSTM model
n_lookback = 30
X_train, y_train = prepare_data_for_lstm(train_scaled, n_lookback)
X_test, y_test = prepare_data_for_lstm(test_scaled, n_lookback)

# Create the LSTM model
model = create_model(units=100, dropout_rate=0.2, input_shape=(n_lookback, len(X_features)), activation='tanh', optimizer='rmsprop')

# Train the model
#early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# Make predictions
train_predictions = model.predict(X_train)
test_predictions = model.predict(X_test)

# Invert scaling to get actual prices
y_train_scaled = scaler.inverse_transform(np.concatenate([np.zeros((len(y_train), len(X_features)-1)), y_train.reshape(-1, 1)], axis=1))[:, -1]
train_predictions_scaled = scaler.inverse_transform(np.concatenate([np.zeros((len(train_predictions), len(X_features)-1)), train_predictions], axis=1))[:, -1]

y_test_scaled = scaler.inverse_transform(np.concatenate([np.zeros((len(y_test), len(X_features)-1)), y_test.reshape(-1, 1)], axis=1))[:, -1]
test_predictions_scaled = scaler.inverse_transform(np.concatenate([np.zeros((len(test_predictions), len(X_features)-1)), test_predictions], axis=1))[:, -1]


  super().__init__(**kwargs)


Fetching data for MSFT
Epoch 1/50


  data = data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 54ms/step - loss: 0.2213 - val_loss: 0.0632
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - loss: 0.0867 - val_loss: 0.0445
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step - loss: 0.0565 - val_loss: 0.2376
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 0.0628 - val_loss: 0.1069
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - loss: 0.0383 - val_loss: 0.0197
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - loss: 0.0363 - val_loss: 0.0161
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step - loss: 0.0267 - val_loss: 0.0214
Epoch 8/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - loss: 0.0241 - val_loss: 0.0203
Epoch 9/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m