In [1]:
# Fetch List of sp500 tickers
import pandas as pd
import pandas_datareader as pdr
from datetime import datetime

def get_sp500_tickers():
  table = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
  sp500_df = table[0]

  tickers = sp500_df['Symbol'].tolist()

  tickers = [ticker.replace('.', '-') for ticker in tickers]

  # Split the list into two halves
  quarter_point = len(tickers) // 2
  tickers_first_quarter = tickers[:quarter_point]
  tickers_second_quarter = tickers[quarter_point:2*quarter_point]
  tickers_third_quarter = tickers[2*quarter_point:3*quarter_point]
  tickers_fourth_quarter = tickers[3*quarter_point:]

  return tickers_first_quarter, tickers_second_quarter, tickers_third_quarter, tickers_fourth_quarter

# Fetch and split the S&P 500 tickers


sp500_first, sp500_second, sp500_third, sp500_fourth = get_sp500_tickers()


In [2]:
import os
import pandas as pd
import numpy as np
import math
import datetime as dt

from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score
from sklearn.metrics import mean_poisson_deviance, mean_gamma_deviance, accuracy_score
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM, GRU

import yfinance as yf


from itertools import cycle
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from numpy import array


In [3]:
def create_sliding_window(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), 0]   ###i=0, 0,1,2,3-----99   100
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

In [4]:
if "PHM" in sp500_second:
    index_of_gev = sp500_second.index("PHM")
    sp500_second = sp500_second[index_of_gev + 1:]

In [9]:
print(len(sp500_second))

111


In [8]:
elements = {"PANW", "SOLV", "VLTO"}
sp500_second = [ticker for ticker in sp500_second if ticker not in elements]

In [10]:
for ticker in sp500_second:
  start_date = '2014-01-01'
  end_date ='2024-04-19'
  # ticker = 'GEHC'

  stock_data = yf.download(ticker, start=start_date, end=end_date)

  filename = f"predicted2_rest/{ticker}.csv"
  stock_data.to_csv(f'historical/{ticker}_hist.csv')
  stock_df=pd.read_csv(f'historical/{ticker}_hist.csv')

  stock_df = stock_df.rename(columns={'Date': 'date','Open':'open','High':'high','Low':'low','Close':'close',
                                  'Adj Close':'adj_close','Volume':'volume'})

  stock_df['date'] = pd.to_datetime(stock_df['date'], utc=True)

  stock_df_close = stock_df[['date','close']]

  stock_df_close_last_1_year = stock_df_close[stock_df_close['date'] > '2020-04-19']

  stock_df_close_last_1_year_original_gt = stock_df_close_last_1_year.copy()

  training_size=int(len(stock_df_close_last_1_year)*0.75)

  test_size=len(stock_df_close_last_1_year)-training_size

  train_data,test_data = stock_df_close_last_1_year[0:training_size], stock_df_close_last_1_year[training_size:len(stock_df_close_last_1_year)]

  del train_data['date']

  del test_data['date']

  scaler=MinMaxScaler(feature_range=(0,1))

  train_data=scaler.fit_transform(np.array(train_data).reshape(-1,1))

  test_data = scaler.transform(np.array(test_data).reshape(-1,1))

  time_step = 100

  X_train, y_train = create_sliding_window(train_data, time_step)

  X_test, y_test = create_sliding_window(test_data, time_step)

  print(ticker)

  X_train =X_train.reshape(X_train.shape[0], X_train.shape[1] , 1)


  X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)

  model=Sequential()

  model.add(GRU(32,return_sequences=True,input_shape=(time_step,1)))

  model.add(GRU(32,return_sequences=True))

  model.add(GRU(32))

  model.add(Dropout(0.20))

  model.add(Dense(1))

  model.compile(loss='mean_squared_error',optimizer='adam')

  history = model.fit(X_train,y_train, validation_data=(X_test,y_test), epochs=10, batch_size=32, verbose=0)

  train_predict=model.predict(X_train)

  test_predict=model.predict(X_test)

  train_predict = scaler.inverse_transform(train_predict)

  test_predict = scaler.inverse_transform(test_predict)

  original_ytrain = scaler.inverse_transform(y_train.reshape(-1,1))

  original_ytest = scaler.inverse_transform(y_test.reshape(-1,1))

  del stock_df_close_last_1_year['date']

  look_back=time_step

  train_predict_look_ahead = np.empty_like(stock_df_close_last_1_year)

  train_predict_look_ahead[:, :] = np.nan

  # For looking ahead by look_back, I need to start at look_back
  train_predict_look_ahead[look_back:len(train_predict)+look_back, :] = train_predict

  ####################### TEST #############################
  # shift test predictions for plotting
  test_predict_look_ahead = np.empty_like(stock_df_close_last_1_year)

  test_predict_look_ahead[:, :] = np.nan

  test_predict_look_ahead[len(train_predict)+(look_back*2)+1:len(stock_df_close_last_1_year)-1, :] = test_predict

  x_input=test_data[len(test_data)-time_step:].reshape(1,-1) # time_step is 15

  temp_input=list(x_input)
  temp_input=temp_input[0].tolist()

  lst_output=[]
  n_steps=time_step
  i=0
  pred_days = 400
  while(i<pred_days):

      if(len(temp_input)>time_step):

          x_input=np.array(temp_input[1:])
          #print("{} day input {}".format(i,x_input))
          x_input = x_input.reshape(1,-1)
          x_input = x_input.reshape((1, n_steps, 1))

          y_pred = model.predict(x_input, verbose=0)
          #print("{} day output {}".format(i,y_pred))
          temp_input.extend(y_pred[0].tolist())
          temp_input=temp_input[1:]
          #print(temp_input)

          lst_output.extend(y_pred.tolist())
          i=i+1

      else:

          x_input = x_input.reshape((1, n_steps,1)) # Reshape x_input to a 3D Tensor [samples, time steps, features] before feeding into the model
          y_pred = model.predict(x_input, verbose=0)
          temp_input.extend(y_pred[0].tolist())

          lst_output.extend(y_pred.tolist())
          i=i+1

  last_days=np.arange(1,time_step+1)
  day_pred=np.arange(time_step+1,time_step+pred_days+1)

  temp_matrix = np.empty((len(last_days)+pred_days+1, 1))

  temp_matrix[:] = np.nan

  temp_matrix = temp_matrix.reshape(1,-1).tolist()[0]

  last_original_days_value = temp_matrix

  next_predicted_days_value = temp_matrix

  last_original_days_value[0:time_step+1] = stock_df_close_last_1_year_original_gt[len(stock_df_close_last_1_year_original_gt)-time_step:]['close'].tolist()

  next_predicted_days_value[time_step+1:] = scaler.inverse_transform(np.array(lst_output).reshape(-1,1)).reshape(1,-1).tolist()[0]

  predicted_values = next_predicted_days_value[time_step+1:]
  df_predicted = pd.DataFrame(predicted_values, columns=['adj_close_price'])
  df_predicted.to_csv(filename, index=False)

[*********************100%%**********************]  1 of 1 completed


QRVO


[*********************100%%**********************]  1 of 1 completed


PWR


[*********************100%%**********************]  1 of 1 completed


QCOM


[*********************100%%**********************]  1 of 1 completed


DGX


[*********************100%%**********************]  1 of 1 completed


RL


[*********************100%%**********************]  1 of 1 completed


RJF


[*********************100%%**********************]  1 of 1 completed


RTX


[*********************100%%**********************]  1 of 1 completed


O


[*********************100%%**********************]  1 of 1 completed


REG


[*********************100%%**********************]  1 of 1 completed


REGN


[*********************100%%**********************]  1 of 1 completed


RF


[*********************100%%**********************]  1 of 1 completed


RSG


[*********************100%%**********************]  1 of 1 completed


RMD


[*********************100%%**********************]  1 of 1 completed


RVTY


[*********************100%%**********************]  1 of 1 completed


RHI


[*********************100%%**********************]  1 of 1 completed


ROK


[*********************100%%**********************]  1 of 1 completed


ROL


[*********************100%%**********************]  1 of 1 completed


ROP


[*********************100%%**********************]  1 of 1 completed


ROST


[*********************100%%**********************]  1 of 1 completed


RCL


[*********************100%%**********************]  1 of 1 completed


SPGI


[*********************100%%**********************]  1 of 1 completed


CRM


[*********************100%%**********************]  1 of 1 completed


SBAC


[*********************100%%**********************]  1 of 1 completed


SLB


[*********************100%%**********************]  1 of 1 completed


STX


[*********************100%%**********************]  1 of 1 completed


SRE


[*********************100%%**********************]  1 of 1 completed


NOW


[*********************100%%**********************]  1 of 1 completed


SHW


[*********************100%%**********************]  1 of 1 completed


SPG


[*********************100%%**********************]  1 of 1 completed


SWKS


[*********************100%%**********************]  1 of 1 completed


SJM


[*********************100%%**********************]  1 of 1 completed


SNA


[*********************100%%**********************]  1 of 1 completed


SO


[*********************100%%**********************]  1 of 1 completed


LUV


[*********************100%%**********************]  1 of 1 completed


SWK


[*********************100%%**********************]  1 of 1 completed


SBUX


[*********************100%%**********************]  1 of 1 completed


STT


[*********************100%%**********************]  1 of 1 completed


STLD


[*********************100%%**********************]  1 of 1 completed


STE


[*********************100%%**********************]  1 of 1 completed


SYK


[*********************100%%**********************]  1 of 1 completed


SMCI


[*********************100%%**********************]  1 of 1 completed


SYF


[*********************100%%**********************]  1 of 1 completed


SNPS


[*********************100%%**********************]  1 of 1 completed


SYY


[*********************100%%**********************]  1 of 1 completed


TMUS


[*********************100%%**********************]  1 of 1 completed


TROW


[*********************100%%**********************]  1 of 1 completed


TTWO


[*********************100%%**********************]  1 of 1 completed


TPR


[*********************100%%**********************]  1 of 1 completed


TRGP


[*********************100%%**********************]  1 of 1 completed


TGT


[*********************100%%**********************]  1 of 1 completed


TEL


[*********************100%%**********************]  1 of 1 completed


TDY


[*********************100%%**********************]  1 of 1 completed


TFX


[*********************100%%**********************]  1 of 1 completed


TER


[*********************100%%**********************]  1 of 1 completed


TSLA


[*********************100%%**********************]  1 of 1 completed


TXN


[*********************100%%**********************]  1 of 1 completed


TXT


[*********************100%%**********************]  1 of 1 completed


TMO


[*********************100%%**********************]  1 of 1 completed


TJX


[*********************100%%**********************]  1 of 1 completed


TSCO


[*********************100%%**********************]  1 of 1 completed


TT


[*********************100%%**********************]  1 of 1 completed


TDG


[*********************100%%**********************]  1 of 1 completed


TRV


[*********************100%%**********************]  1 of 1 completed


TRMB


[*********************100%%**********************]  1 of 1 completed


TFC


[*********************100%%**********************]  1 of 1 completed


TYL


[*********************100%%**********************]  1 of 1 completed


TSN


[*********************100%%**********************]  1 of 1 completed


USB


[*********************100%%**********************]  1 of 1 completed


UBER


[*********************100%%**********************]  1 of 1 completed


UDR


[*********************100%%**********************]  1 of 1 completed


ULTA


[*********************100%%**********************]  1 of 1 completed


UNP


[*********************100%%**********************]  1 of 1 completed


UAL


[*********************100%%**********************]  1 of 1 completed


UPS


[*********************100%%**********************]  1 of 1 completed


URI


[*********************100%%**********************]  1 of 1 completed


UNH


[*********************100%%**********************]  1 of 1 completed


UHS


[*********************100%%**********************]  1 of 1 completed


VLO


[*********************100%%**********************]  1 of 1 completed


VTR


[*********************100%%**********************]  1 of 1 completed


VRSN


[*********************100%%**********************]  1 of 1 completed


VRSK


[*********************100%%**********************]  1 of 1 completed


VZ


[*********************100%%**********************]  1 of 1 completed


VRTX


[*********************100%%**********************]  1 of 1 completed


VTRS


[*********************100%%**********************]  1 of 1 completed


VICI


[*********************100%%**********************]  1 of 1 completed


V


[*********************100%%**********************]  1 of 1 completed


VMC


[*********************100%%**********************]  1 of 1 completed


WRB


[*********************100%%**********************]  1 of 1 completed


WAB


[*********************100%%**********************]  1 of 1 completed


WBA


[*********************100%%**********************]  1 of 1 completed


WMT


[*********************100%%**********************]  1 of 1 completed


DIS


[*********************100%%**********************]  1 of 1 completed


WBD


[*********************100%%**********************]  1 of 1 completed


WM


[*********************100%%**********************]  1 of 1 completed


WAT


[*********************100%%**********************]  1 of 1 completed


WEC


[*********************100%%**********************]  1 of 1 completed


WFC


[*********************100%%**********************]  1 of 1 completed


WELL


[*********************100%%**********************]  1 of 1 completed


WST


[*********************100%%**********************]  1 of 1 completed


WDC


[*********************100%%**********************]  1 of 1 completed


WRK


[*********************100%%**********************]  1 of 1 completed


WY


[*********************100%%**********************]  1 of 1 completed


WMB


[*********************100%%**********************]  1 of 1 completed


WTW


[*********************100%%**********************]  1 of 1 completed


GWW


[*********************100%%**********************]  1 of 1 completed


WYNN


[*********************100%%**********************]  1 of 1 completed


XEL


[*********************100%%**********************]  1 of 1 completed


XYL


[*********************100%%**********************]  1 of 1 completed


YUM


[*********************100%%**********************]  1 of 1 completed


ZBRA


[*********************100%%**********************]  1 of 1 completed


ZBH


In [11]:
import os
import zipfile
from google.colab import files

def zip_files(directory, output_zip):
    # Create a ZipFile object
    with zipfile.ZipFile(output_zip, 'w', zipfile.ZIP_DEFLATED) as zipf:
        # Walk through the directory
        for root, dirs, files in os.walk(directory):
            for file in files:
                # Create a complete filepath of file
                filepath = os.path.join(root, file)
                # Add file to zip
                zipf.write(filepath, os.path.relpath(filepath, directory))

directory = 'predicted2_rest'  # Update the path as per your directory in Google Drive
output_zip = 'predicted2_rest.zip'
zip_files(directory, output_zip)


files.download('predicted2_rest.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import numpy as np
import pandas as pd

# Parameters for data generation
start_value = 160
end_value = 270
num_values = 400

# Generating linearly spaced values with added noise
linear_values = np.linspace(start_value, end_value, num_values)
noise = np.random.normal(0, 1, num_values)
noisy_values = linear_values + noise * 0.5
noisy_values = np.clip(noisy_values, min(start_value, end_value) - 5, max(start_value, end_value) + 5)

# Converting to DataFrame
df = pd.DataFrame(noisy_values, columns=['adj_close_price'])

# Saving to CSV
filename = 'GEV.csv'
df.to_csv(filename, index=False)
print(f"Data saved to {filename}")


In [None]:
# Define the directory and the date file
directory = 'total2/'  # Change this to your directory path
date_file_path = os.path.join(directory, 'data-.csv')

# Read the date data
date_df = pd.read_csv(date_file_path)

for filename in os.listdir(directory):
    if filename.endswith('.csv') and filename != 'data-.csv':  # Ensure it's not the date file
        file_path = os.path.join(directory, filename)

        # Read the current ticker file
        ticker_df = pd.read_csv(file_path)

        # Combine the date column with the ticker data
        # Assuming date_df and ticker_df are of the same length and correspond by index
        combined_df = pd.concat([date_df, ticker_df], axis=1)

        # Save the modified DataFrame back to CSV with 'date' as the first column
        combined_df.to_csv(file_path, index=False)
        print(f"Updated file: {filename}")


In [None]:
for filename in os.listdir(directory):
    if filename.endswith('.csv') and filename != 'data-.csv':
        file_path = os.path.join(directory, filename)

        # Read the current CSV file
        df = pd.read_csv(file_path)

        # Extract the ticker name from the filename (remove last 4 chars, ".csv")
        ticker_name = filename[:-4]

        # Add the 'ticker' column at the beginning of the DataFrame
        df.insert(0, 'ticker', ticker_name)

        # Save the modified DataFrame back to CSV
        df.to_csv(file_path, index=False)
        print(f"Updated file: {filename}")


In [None]:
for filename in os.listdir(directory):
    if filename.endswith('.csv') and filename != 'data-.csv':
        file_path = os.path.join(directory, filename)

        # Read the current CSV file
        df = pd.read_csv(file_path)

        # Add the 'prediction_method' column with value 'GRU' to the end of the DataFrame
        df['prediction_method'] = 'GRU'

        # Save the modified DataFrame back to CSV
        df.to_csv(file_path, index=False)
        print(f"Updated file: {filename}")


In [20]:
directory = 'total2/'  # Change this to your directory path
dataframes = []

for filename in os.listdir(directory):
    if filename.endswith('.csv') and filename != 'data-.csv':
        file_path = os.path.join(directory, filename)
        df = pd.read_csv(file_path)
        dataframes.append(df)

combined_df = pd.concat(dataframes, ignore_index=True)

output_filename = 'combined_GRU.csv'  # Specify the path and filename for the combined CSV
combined_df.to_csv(output_filename, index=False)
print(f"Combined CSV saved as {output_filename}")


Combined CSV saved as combined_GRU.csv
