<a href="https://colab.research.google.com/github/mjgpinheiro/Econophysics/blob/main/ML_Prophet_Stocks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
import yfinance as yf
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingRegressor
import ta
from prophet import Prophet
from sklearn.impute import SimpleImputer
from sklearn.linear_model import Lasso


# Download data
data = yf.download('AAPL', start='2016-01-01', end='2023-03-26')
if data.empty:
    raise ValueError("No data found for the specified dates.")
prices = data['Close'].to_numpy()

# Add technical indicators
rsi = ta.momentum.RSIIndicator(data['Close'], window=14)
data['RSI'] = rsi.rsi()

stoch = ta.momentum.StochasticOscillator(high=data['High'], low=data['Low'], close=data['Close'], window=14, smooth_window=3)
data['%K'] = stoch.stoch()


def predict(prices):
    # Split data into training and testing sets
    train_prices = prices[:-365]
    test_prices = prices[-365:]
    if len(train_prices) < 5:
        return ["hold"] * len(test_prices), 0
    print(train_prices,test_prices)

    # Scale the data
    scaler = StandardScaler()
    X_train = scaler.fit_transform(train_prices.reshape(-1, 1))

    # Add RSI and Stochastic Oscillator to training data
    train_rsi = data['RSI'][:-365].to_numpy().reshape(-1, 1)
    train_k_percent = data['%K'][:-365].to_numpy().reshape(-1, 1)
    X_train = np.concatenate((X_train, train_rsi, train_k_percent), axis=1)

    # Use grid search to find the best hyperparameters
    if len(train_prices) >= 5:
        param_grid = {'alpha': [0.001, 0.01, 0.1, 1, 10]}
        # Impute missing values with the mean
        imputer = SimpleImputer(strategy='mean')
        X_train = imputer.fit_transform(X_train)

        grid_search = GridSearchCV(Lasso(), param_grid, cv=5)
        grid_search.fit(X_train[:-1], X_train[1:])
        best_alpha = grid_search.best_params_['alpha']
    else:
        best_alpha = 0.001

    # Train the model
    model = Sequential()
    model.add(LSTM(128, input_shape=(1, 3)))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(X_train[:-1].reshape(-1, 1, 3), X_train[1:], epochs=50, batch_size=1, verbose=0)

    # Make prediction with LSTM model
    X_test = scaler.transform(test_prices.reshape(-1, 1))
#    test_rsi = data['RSI'][-365:].to_numpy().reshape(-1, 1)
    test_rsi = data['RSI'][-365:].to_numpy().reshape(-1, 1)

    test_k_percent = data['%K'][-365:].to_numpy().reshape(-1, 1)
    X_test = np.concatenate((X_test, test_rsi, test_k_percent), axis=1)
    y_pred = model.predict(X_test.reshape(-1, 1, 3))
    y_pred = scaler.inverse_transform(y_pred).flatten()

    # Calculate average return
    daily_returns = np.diff(prices[-365:]) / prices[-365:-1]
    strategy_returns = np.diff(y_pred) / prices[-365:-1]
    avg_return = np.mean(strategy_returns)

    return y_pred, avg_return

# Predict with LSTM model
lstm_prices, lstm_avg_return = predict(prices)

# Combine predictions
combined_predictions = []
for i in range(len(lstm_prices)):
  if lstm_prices[i] == "buy":
     combined_predictions.append("buy")
  elif lstm_prices[i] == "sell":
     combined_predictions.append("sell")
  else:
     prophet_change = prophet_prices[i] - prophet_prices[i-1]
     lstm_change = prices[-365+i+1] - prices[-365+i]
     if prophet_change > 0 and lstm_change > 0:
       combined_predictions.append("buy")
     elif prophet_change < 0 and lstm_change < 0:
       combined_predictions.append("sell")
     else:
       combined_predictions.append("hold")

# Calculate Sharpe ratio
prophet_prices = prophet_forecast['yhat'].tail(365).to_numpy()

daily_returns = np.diff(prices[-365:]) / prices[-365:-1]
strategy_returns = np.diff(prophet_prices) / prices[-365:-1]
sharpe_ratio = (np.mean(strategy_returns) - np.mean(daily_returns)) / np.std(daily_returns)


#Print trading strategy predictions
print("Trading strategy predictions:")
for i in range(len(combined_predictions)):
  print(f"Day {i+1}: {combined_predictions[i]}")
print(f"Sharpe ratio: {sharpe_ratio}")





[*********************100%***********************]  1 of 1 completed
[ 26.33749962  25.67749977  25.17499924 ... 143.28999329 142.8999939
 142.80999756] [141.50999451 140.91000366 143.75999451 144.83999634 146.55000305
 148.75999451 149.25999451 149.47999573 148.69000244 148.63999939
 149.32000732 148.8500061  152.57000732 149.80000305 148.96000671
 150.02000427 151.49000549 150.96000671 151.27999878 150.44000244
 150.80999756 147.91999817 147.86999512 149.99000549 150.
 151.         153.49000549 157.86999512 160.55000305 161.02000427
 161.41000366 161.94000244 156.80999756 160.24000549 165.30000305
 164.77000427 163.75999451 161.83999634 165.32000732 171.17999268
 175.08000183 174.55999756 179.44999695 175.74000549 174.33000183
 179.30000305 172.25999451 171.13999939 169.75       172.99000549
 175.63999939 176.27999878 180.33000183 179.28999329 179.38000488
 178.19999695 177.57000732 182.00999451 179.69999695 174.91999817
 172.         172.16999817 172.19000244 175.08000183 175.52999