# import neccessary libraries

In [56]:
#import libraries needed
#read and download data
import pandas as pd
import yfinance as yf
import numpy as np
from math import sqrt
#train model
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.metrics import mean_squared_error
from pytrends.request import TrendReq
#dates
from datetime import date
from pandas.tseries.offsets import BDay
#plotting
import plotly.graph_objs as go

# Define functions to fetch data
* get stock data
* get google trends data
* combine data

In [57]:
def grab_stock(ticker,start,end):
    # Fetch historical data
    stock_ticker = ticker
    start_date = start
    end_date = end
    stock_data = yf.download(stock_ticker, start_date, end_date)
    return stock_data

# Test stock data collection

In [58]:
print(grab_stock("TSLA","2020-10-03","2020-11-05"))

[*********************100%***********************]  1 of 1 completed
                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2020-10-05  141.116669  144.546661  139.776672  141.893326  141.893326   
2020-10-06  141.263336  142.926666  135.350006  137.993332  137.993332   
2020-10-07  139.956665  143.300003  137.949997  141.766663  141.766663   
2020-10-08  146.146667  146.333328  141.766663  141.973328  141.973328   
2020-10-09  143.376663  144.863327  142.153336  144.666672  144.666672   
2020-10-12  147.333328  149.580002  146.193329  147.433334  147.433334   
2020-10-13  147.783340  149.630005  145.533340  148.883331  148.883331   
2020-10-14  149.926666  155.300003  149.116669  153.766663  153.766663   
2020-10-15  150.103333  152.190002  147.500000  149.626663  149.626663   
2020-10-16  151.479996  151.983337  146.283340  146.556671  146.556671   
2020-10-19  148.746674  149.000000  142.956

In [59]:
def grab_google_trends(word, start, end):
    # Create pytrends object
    pytrends = TrendReq(hl='en-US', tz=360)
    # Set search parameters
    kw_list = [word]  # List of keywords or topics
    # Get date range
    check_dates = start + ' ' + end
    pytrends.build_payload(kw_list, timeframe=check_dates, geo='US')
    # Get Google Trends data
    trends_data = pytrends.interest_over_time()
    # Filter out weekends
    trends_data = trends_data[~trends_data.index.dayofweek.isin([5, 6])]  # Exclude Saturday (5) and Sunday (6)
    # Return the DataFrame
    return trends_data

# Test Google Trends Data Collection

In [60]:
print(grab_google_trends("TSLA","2020-10-03","2020-11-05"))

            TSLA  isPartial
date                       
2020-10-05    65      False
2020-10-06    57      False
2020-10-07    54      False
2020-10-08    55      False
2020-10-09    52      False
2020-10-12    54      False
2020-10-13    54      False
2020-10-14    61      False
2020-10-15    54      False
2020-10-16    51      False
2020-10-19    48      False
2020-10-20    52      False
2020-10-21   100      False
2020-10-22    93      False
2020-10-23    63      False
2020-10-26    44      False
2020-10-27    44      False
2020-10-28    47      False
2020-10-29    45      False
2020-10-30    50      False
2020-11-02    44      False
2020-11-03    47      False
2020-11-04    32      False
2020-11-05    35      False


In [61]:
def combine_data(ticker, start, end):
    # Get the data
    stock_data = grab_stock(ticker, start, end)
    trends_data = grab_google_trends(ticker, start, end)
    # Merge the data
    combined_data = stock_data.merge(trends_data, left_index=True, right_index=True)
    # Drop duplicate "date" column from trends_data
    combined_data = combined_data.loc[:,~combined_data.columns.duplicated()]
    combined_data.dropna(inplace=True)    
    return combined_data

# Test data combination

In [62]:
print(combine_data("TSLA","2020-10-03","2020-11-05"))

[*********************100%***********************]  1 of 1 completed
                  Open        High         Low       Close   Adj Close  \
2020-10-05  141.116669  144.546661  139.776672  141.893326  141.893326   
2020-10-06  141.263336  142.926666  135.350006  137.993332  137.993332   
2020-10-07  139.956665  143.300003  137.949997  141.766663  141.766663   
2020-10-08  146.146667  146.333328  141.766663  141.973328  141.973328   
2020-10-09  143.376663  144.863327  142.153336  144.666672  144.666672   
2020-10-12  147.333328  149.580002  146.193329  147.433334  147.433334   
2020-10-13  147.783340  149.630005  145.533340  148.883331  148.883331   
2020-10-14  149.926666  155.300003  149.116669  153.766663  153.766663   
2020-10-15  150.103333  152.190002  147.500000  149.626663  149.626663   
2020-10-16  151.479996  151.983337  146.283340  146.556671  146.556671   
2020-10-19  148.746674  149.000000  142.956665  143.610001  143.610001   
2020-10-20  143.916672  143.916672  139.683

# Define functions to train model and return predicitons

## 1. try moving averages on linear regression model

In [90]:
#get next week stock price predictions
def get_predictionsV1(ticker, start="2020-01-01", end=date.today()):
    # get historical data
    data = combine_data(ticker,start,end)
    # Calculate moving average
    data['SMA_5'] = data['Close'].rolling(window=5).mean()   #week
    #Clean data
    data.dropna(inplace=True)
    # Check if its empty
    if data.empty:
        print(f"No data available for {ticker}. Skipping...")
        return None, None
    #Define features and target variables
    feature_columns = ['SMA_5']
    X = data[feature_columns]
    y = data['Close']
    # Normalize features
    scaler = MinMaxScaler()
    X = scaler.fit_transform(X)
    # Split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X[:-5], y[:-5], test_size=0.2, random_state=42)
    # Create & train the linear regression model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Evaluate model
    y_pred = model.predict(X_test)
    print('Mean Absolute Error: ', mean_absolute_error(y_test, y_pred))
    print('Root Mean Squared Error: ', sqrt(mean_squared_error(y_test, y_pred)))

    return data['Close']

In [91]:
end=date.today()
get_predictionsV1("TSLA","2010-10-03",str(end))

[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  16.196885782392837
Root Mean Squared Error:  35.36003380319836


2011-04-01      1.777333
2011-06-01      1.901333
2011-07-01      1.934667
2011-08-01      1.918000
2011-09-01      1.600000
                 ...    
2022-11-01    227.820007
2022-12-01    194.699997
2023-02-01    181.410004
2023-03-01    202.770004
2023-05-01    161.830002
Name: Close, Length: 95, dtype: float64

## 2. Try Adding moving average for week, 2 weeks, month, quarter

In [92]:
#get next week stock price predictions
def get_predictionsV2(ticker, start="2020-01-01", end=date.today()):
    # get historical data
    data = combine_data(ticker,start,end)
    # Calculate moving averages
    data['SMA_5'] = data['Close'].rolling(window=5).mean()   #week 
    data['SMA_10'] = data['Close'].rolling(window=10).mean() #2 week
    data['SMA_20'] = data['Close'].rolling(window=20).mean() # month
    data['SMA_40'] = data['Close'].rolling(window=40).mean() # quarter

    #Clean data
    data.dropna(inplace=True)
    # Check if its empty
    if data.empty:
        print(f"No data available for {ticker}. Skipping...")
        return None, None
    
    #Define features and target variables
    feature_columns = ['SMA_5', 'SMA_10', 'SMA_20','SMA_40']
    X = data[feature_columns]
    y = data['Close']

    # Normalize features
    scaler = MinMaxScaler()
    X = scaler.fit_transform(X)

    # Split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X[:-5], y[:-5], test_size=0.2, random_state=42)

    # Create & train the linear regression model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Evaluate model
    y_pred = model.predict(X_test)
    print('Mean Absolute Error: ', mean_absolute_error(y_test, y_pred))
    print('Root Mean Squared Error: ', sqrt(mean_squared_error(y_test, y_pred)))

    return data['Close']

In [93]:
get_predictionsV2("TSLA","2010-10-03",str(end))

[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  24.02474715843838
Root Mean Squared Error:  34.12773578661605


2016-02-01     13.129333
2016-03-01     12.423333
2016-04-01     15.839333
2016-06-01     14.637333
2016-07-01     14.433333
2016-08-01     15.334000
2016-09-01     13.384667
2016-11-01     12.719333
2016-12-01     12.125333
2017-02-01     16.615999
2017-03-01     16.667999
2017-05-01     21.521999
2017-06-01     22.691334
2017-08-01     21.304667
2017-09-01     23.693333
2017-11-01     21.405333
2017-12-01     20.435333
2018-02-01     23.283333
2018-03-01     22.062000
2018-05-01     19.994667
2018-06-01     19.454666
2018-08-01     20.056000
2018-10-01     20.713333
2018-11-01     22.952000
2019-02-01     20.813999
2019-03-01     19.652666
2019-04-01     19.278667
2019-05-01     15.600667
2019-07-01     15.144667
2019-08-01     15.590000
2019-10-01     16.312668
2019-11-01     20.887333
2020-04-01     32.104000
2020-05-01     46.754665
2020-06-01     59.873333
2020-07-01     74.641998
2020-09-01    158.350006
2020-10-01    149.386673
2020-12-01    194.919998
2021-02-01    279.936676


In [104]:
#get next week stock price predictions
def get_predictionsV2_2(ticker, start="2020-01-01", end=date.today()):
    # get historical data
    data = combine_data(ticker,start,end)
    # Calculate moving averages
    data['SMA_5'] = data['Close'].rolling(window=5).mean()   #week 
    data['SMA_20'] = data['Close'].rolling(window=20).mean() # month

    #Clean data
    data.dropna(inplace=True)
    # Check if its empty
    if data.empty:
        print(f"No data available for {ticker}. Skipping...")
        return None, None
    
    #Define features and target variables
    feature_columns = ['SMA_5', 'SMA_20']
    X = data[feature_columns]
    y = data['Close']

    # Normalize features
    scaler = MinMaxScaler()
    X = scaler.fit_transform(X)

    # Split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X[:-5], y[:-5], test_size=0.2, random_state=42)

    # Create & train the linear regression model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Evaluate model
    y_pred = model.predict(X_test)
    print('Mean Absolute Error: ', mean_absolute_error(y_test, y_pred))
    print('Root Mean Squared Error: ', sqrt(mean_squared_error(y_test, y_pred)))

    return data['Close']

In [105]:
get_predictionsV2_2("TSLA","2010-10-03",str(end))

[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  17.497691381599186
Root Mean Squared Error:  28.480404124063515


2013-03-01      2.310000
2013-04-01      2.928667
2013-05-01      3.552000
2013-07-01      7.812000
2013-08-01      9.036667
                 ...    
2022-11-01    227.820007
2022-12-01    194.699997
2023-02-01    181.410004
2023-03-01    202.770004
2023-05-01    161.830002
Name: Close, Length: 80, dtype: float64

## 3. Try Switching model to LTSM and removing quarter, 2 week, month

In [108]:
#get next week stock price predictions
def get_predictionsV3(ticker, start="2010-01-01", end=date.today()):
    # get historical data
    data = combine_data(ticker,start,end)
    # Calculate moving averages
    data['SMA_5'] = data['Close'].rolling(window=5).mean()   #week 

    #Clean data
    data.dropna(inplace=True)
    # Check if the its empty
    if data.empty:
        print(f"No data available for {ticker}. Skipping...")
        return None, None
    
    #Define features and target variables
    feature_columns = ['SMA_5']
    X = data[feature_columns]
    y = data['Close']

    # Normalize  features
    scaler = MinMaxScaler()
    X = scaler.fit_transform(X)

    # Split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X[:-5], y[:-5], test_size=0.2, random_state=42)

    # Reshape the data for LSTM input (samples, time steps, features)
    X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
    X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

    # Create and train the LSTM model
    model = Sequential()
    model.add(LSTM(64, input_shape=(1, X_train.shape[2])))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=0)

    # Evaluate model
    y_pred = model.predict(X_test)
    print('Mean Absolute Error: ', mean_absolute_error(y_test, y_pred))
    print('Root Mean Squared Error: ', sqrt(mean_squared_error(y_test, y_pred)))

    return data['Close']

In [109]:
get_predictionsV3("TSLA","2010-10-03",str(end))

[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  46.88582720359167
Root Mean Squared Error:  111.07338060322547


2011-04-01      1.777333
2011-06-01      1.901333
2011-07-01      1.934667
2011-08-01      1.918000
2011-09-01      1.600000
                 ...    
2022-11-01    227.820007
2022-12-01    194.699997
2023-02-01    181.410004
2023-03-01    202.770004
2023-05-01    161.830002
Name: Close, Length: 95, dtype: float64

# Switch Models to random forest finder and return predictions

In [118]:
#get next week stock price predictions
def get_predictionsV4(ticker, start="2020-01-01", end=date.today()):
    # get historical data
    data = combine_data(ticker,start,end)
    # Calculate moving averages
    data['SMA_5'] = data['Close'].rolling(window=5).mean()   #week 
    
    
    #Clean data
    data.dropna(inplace=True)
    # Check if its empty
    if data.empty:
        print(f"No data available for {ticker}. Skipping...")
        return None, None
    
    #Define the features and target variables
    feature_columns = ['SMA_5']
    X = data[feature_columns]
    y = data['Close']

    # Normalize the features
    scaler = MinMaxScaler()
    X = scaler.fit_transform(X)

    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X[:-5], y[:-5], test_size=0.2, random_state=42)

    # Create and train random forest regressor model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train.values.ravel())

    # Evaluate model
    y_pred = model.predict(X_test)
    print('Mean Absolute Error: ', mean_absolute_error(y_test, y_pred))
    print('Root Mean Squared Error: ', sqrt(mean_squared_error(y_test, y_pred)))

    # Predict the stock prices for the following trading week
    predictions = []
    for i in range(1, 6):
        X_train_new = np.concatenate((X_train, X[-(5 + i):-i]), axis=0)
        y_train_new = pd.concat([y_train, y[-(5 + i):-i]])
        model = RandomForestRegressor(n_estimators=100, random_state=42)
        model.fit(X_train, y_train.values.ravel())
        prediction = model.predict(X[-i].reshape(1, -1))
        predictions.append(prediction[0])

    return predictions[::-1], data['Close']

In [119]:
get_predictionsV4("TSLA","2010-10-03",str(end))

[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  13.04026109271579
Root Mean Squared Error:  34.16928861177041


([238.50613372802735,
  236.4573667907715,
  236.4573667907715,
  224.0095965576172,
  248.68493682861327],
 2011-04-01      1.777333
 2011-06-01      1.901333
 2011-07-01      1.934667
 2011-08-01      1.918000
 2011-09-01      1.600000
                  ...    
 2022-11-01    227.820007
 2022-12-01    194.699997
 2023-02-01    181.410004
 2023-03-01    202.770004
 2023-05-01    161.830002
 Name: Close, Length: 95, dtype: float64)

# Random Forest Finder came back as best so we added feature Relative Strength index to train on

In [120]:
def get_predictionsV5(ticker, start="2020-01-01", end=date.today()):
    # get historical data
    data = combine_data(ticker,start,end)
    # Calculate moving averages
    data['SMA_5'] = data['Close'].rolling(window=5).mean()   #week 
    
    # Calculate relative strength index (RSI)
    delta = data['Close'].diff()
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)
    avg_gain = gain.rolling(window=14).mean()
    avg_loss = loss.rolling(window=14).mean()
    rs = avg_gain / avg_loss
    data['RSI'] = 100 - (100 / (1 + rs))

    #Clean data
    data.dropna(inplace=True)
    # Check if its empty
    if data.empty:
        print(f"No data available for {ticker}. Skipping...")
        return None, None
    
    #Define features and target variables
    feature_columns = ['SMA_5', 'RSI']

    X = data[feature_columns]
    y = data['Close']

    # Normalize the features
    scaler = MinMaxScaler()
    X = scaler.fit_transform(X)

    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X[:-5], y[:-5], test_size=0.2, random_state=42)

    # Create and train the linear regression model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train.values.ravel())

    # Evaluate model
    y_pred = model.predict(X_test)
    print('Mean Absolute Error: ', mean_absolute_error(y_test, y_pred))
    print('Root Mean Squared Error: ', sqrt(mean_squared_error(y_test, y_pred)))

    # Predict the stock prices for the following trading week
    predictions = []
    for i in range(1, 6):
        X_train_new = np.concatenate((X_train, X[-(5 + i):-i]), axis=0)
        y_train_new = pd.concat([y_train, y[-(5 + i):-i]])
        model = RandomForestRegressor(n_estimators=100, random_state=42)
        model.fit(X_train, y_train.values.ravel())
        prediction = model.predict(X[-i].reshape(1, -1))
        predictions.append(prediction[0])

    return predictions[::-1], data['Close']

In [121]:
get_predictionsV5("TSLA","2010-10-03",str(end))

[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  10.431474171666537
Root Mean Squared Error:  23.314011592734005


([284.599737701416,
  260.230701751709,
  260.230701751709,
  262.3122364807129,
  263.28246994018554],
 2012-05-01      2.252000
 2012-06-01      1.876667
 2012-08-01      1.750000
 2012-10-01      1.944000
 2012-11-01      1.950000
                  ...    
 2022-11-01    227.820007
 2022-12-01    194.699997
 2023-02-01    181.410004
 2023-03-01    202.770004
 2023-05-01    161.830002
 Name: Close, Length: 86, dtype: float64)

# Add feature google trends to random forest regressor

In [134]:
def get_predictionsV6(ticker, start="2020-01-01", end=date.today()):
    # get historical data
    data = combine_data(ticker,start,end)
    # Calculate moving averages
    data['SMA_5'] = data['Close'].rolling(window=5).mean()   #week 
    # Calculate relative strength index (RSI)
    delta = data['Close'].diff()
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)
    avg_gain = gain.rolling(window=14).mean()
    avg_loss = loss.rolling(window=14).mean()
    rs = avg_gain / avg_loss
    data['RSI'] = 100 - (100 / (1 + rs))

    #Clean data
    data.dropna(inplace=True)
    # Check if its empty
    if data.empty:
        print(f"No data available for {ticker}. Skipping...")
        return None, None
    
    #Define the features and target variables
    feature_columns = ['SMA_5','RSI',ticker]
    X = data[feature_columns]
    y = data['Close']
    # Normalize features
    scaler = MinMaxScaler()
    X = scaler.fit_transform(X)

    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X[:-5], y[:-5], test_size=0.2, random_state=42)

    # Create and train the linear regression model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train.values.ravel())

    # Evaluate model
    y_pred = model.predict(X_test)
    print('Mean Absolute Error: ', mean_absolute_error(y_test, y_pred))
    print('Root Mean Squared Error: ', sqrt(mean_squared_error(y_test, y_pred)))

    # Predict the stock prices for the following trading week
    predictions = []
    for i in range(1, 6):
        X_train_new = np.concatenate((X_train, X[-(5 + i):-i]), axis=0)
        y_train_new = pd.concat([y_train, y[-(5 + i):-i]])
        model = RandomForestRegressor(n_estimators=100, random_state=42)
        model.fit(X_train, y_train.values.ravel())
        prediction = model.predict(X[-i].reshape(1, -1))
        predictions.append(prediction[0])

    return predictions[::-1], data['Close']

# Test Model on TESLA

In [136]:
get_predictionsV6("TSLA","2010-10-03",str(end))

[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  9.154364962577821
Root Mean Squared Error:  19.345238740490434


([260.3881019592285,
  266.16427017211913,
  263.27997009277345,
  256.6657014465332,
  246.7474673461914],
 2012-05-01      2.252000
 2012-06-01      1.876667
 2012-08-01      1.750000
 2012-10-01      1.944000
 2012-11-01      1.950000
                  ...    
 2022-11-01    227.820007
 2022-12-01    194.699997
 2023-02-01    181.410004
 2023-03-01    202.770004
 2023-05-01    161.830002
 Name: Close, Length: 86, dtype: float64)

# test on top 100 us companies and see best return prediction

In [137]:
#grab s&p500 stocks using panda reader
url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
sp500_table = pd.read_html(url, header=0)[0]
#convert to list
sp500_stocks = sp500_table['Symbol'].tolist()[:100]
stock_changes = []
start = "2015-01-01"#changed to 2015 so google trends doens't give crazy results
end = date.today()
for stock in sp500_stocks:
    predictions, last_close = get_predictionsV6(stock, start, str(end))
    if predictions is not None and last_close is not None:
        change = (predictions[-1] - last_close[-1]) / last_close[-1] * 100
        stock_changes.append((stock, change))
# Sort the stocks based on their percentage change and select the top 10 based off change up/down for short/buy
top_stocks = sorted(stock_changes, key=lambda x: x[1], reverse=True)[:10]

[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  10.587779541015626
Root Mean Squared Error:  12.413865644805819
[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  4.91669985961914
Root Mean Squared Error:  5.8916825196919556
[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  2.36776012802124
Root Mean Squared Error:  2.8715146921145487
[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  9.713088668823243
Root Mean Squared Error:  11.115898757521459
[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  16.986053817749028
Root Mean Squared Error:  21.980894496670523
[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  7.485130645751953
Root Mean Squared Error:  8.042372777194242
[*********************100%***********************]  1 of 1 completed
Mean Abs

[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  9.766360015869134
Root Mean Squared Error:  11.61503030328829
[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  93.88684539794924
Root Mean Squared Error:  141.87717185343257
[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  15.509987579345704
Root Mean Squared Error:  24.261045466423287
[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  11.764168624877932
Root Mean Squared Error:  13.10714354074329
[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  5.840969274520875
Root Mean Squared Error:  8.916675029453845
[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  6.706439845085145
Root Mean Squared Error:  7.910577507048007
[*********************100%***********************]  1 of 1 completed
Mean Abso

## Define plotting function for predictions and historical data

In [140]:
#plot predicitions and historical prices
def plot_stock(stock, start, end,change):
    fig = go.Figure()
    predictions, historical_data = get_predictionsV6(stock, start, end)
    # Plot historical stock prices
    fig.add_trace(go.Scatter(x=historical_data.index, y=historical_data, mode='lines', name=f'{stock} - Historical Prices', line=dict(color='blue')))
    # Plot predicted stock prices
    end_date = pd.to_datetime(end)
    next_week_dates = pd.bdate_range(end_date + BDay(1), end_date + BDay(5))
    predictions = pd.Series(predictions, index=next_week_dates)
    fig.add_trace(go.Scatter(x=predictions.index, y=predictions, mode='lines+markers', name=f'{stock} - Predictions ({change:.2f}%)'))
    
    fig.update_layout( 
        title=f'{stock}: Historical Prices and Predictions for the Next Week',
        xaxis_title='Date',
        yaxis_title='Price',
        xaxis=dict(rangeslider=dict(visible=True), type="date"),
        hovermode='x unified'
    )
    fig.show()

# Plot top 10 results

In [139]:
for stock,change in top_stocks:
    plot_stock(stock, start, str(end),change)


[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  10.519340759277343
Root Mean Squared Error:  12.793356523802837


[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  4.618129467010498
Root Mean Squared Error:  6.949750260158612


[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  4.599810014724731
Root Mean Squared Error:  8.100923343817724


[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  10.587779541015626
Root Mean Squared Error:  12.413865644805819


[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  9.993649375915528
Root Mean Squared Error:  13.582581442018558


[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  12.250720314025878
Root Mean Squared Error:  14.983974126046999


[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  11.339820732116703
Root Mean Squared Error:  14.348856944884872


[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  7.655540657043458
Root Mean Squared Error:  9.694969102592616


[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  4.8967059822082515
Root Mean Squared Error:  5.678287809102825


[*********************100%***********************]  1 of 1 completed
Mean Absolute Error:  4.201870800018311
Root Mean Squared Error:  5.616651047258132


### Based of error and return from top 10 returning S&P stocks I chose the stocks to buy and target to sell using limit orders