In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from sklearn.metrics import mean_squared_error

# Load data
data = pd.read_csv('combined_data.csv', parse_dates=['Date'])

# Define function to create sequences and labels
def create_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset) - look_back - 1):
        a = dataset[i:(i + look_back), 0]
        X.append(a)
        Y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(Y)

# Define function to create LSTM model
def create_lstm_model():
    model = Sequential()
    model.add(LSTM(50, return_sequences=True, input_shape=(look_back, 1)))
    model.add(LSTM(50, return_sequences=False))
    model.add(Dense(25))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Function to make predictions for each ticker
def make_predictions_for_ticker(data, ticker):
    ticker_data = data[data['Ticker'] == ticker][['Date', 'Close', 'Ticker', 'Sector', 'Industry']]

    sector = np.repeat(data[data['Ticker'] == ticker]['Sector'].unique(), future_days)
    industry = np.repeat(data[data['Ticker'] == ticker]['Industry'].unique(), future_days)

    # Normalize data
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(ticker_data[['Close']])

    # Create datasets
    X, y = create_dataset(scaled_data, look_back)
    X = np.reshape(X, (X.shape[0], X.shape[1], 1))

    # Train-test split
    train_size = int(len(X) * 0.80)
    train_X, train_y = X[:train_size], y[:train_size]
    test_X, test_y = X[train_size:], y[train_size:]

    # Train the model
    model = create_lstm_model()
    model.fit(train_X, train_y, batch_size=32, epochs=20, verbose=0)

    # Make predictions
    train_predict = model.predict(train_X)
    test_predict = model.predict(test_X)

    # Inverse transform predictions
    train_predict = scaler.inverse_transform(train_predict)
    test_predict = scaler.inverse_transform(test_predict)
    actual_y_train = scaler.inverse_transform([train_y])
    actual_y_test = scaler.inverse_transform([test_y])

    # Calculate and print RMSE
    train_score = np.sqrt(mean_squared_error(actual_y_train[0], train_predict[:,0]))
    test_score = np.sqrt(mean_squared_error(actual_y_test[0], test_predict[:,0]))

    # Future predictions
    last_look_back = scaled_data[-look_back:]
    future_predictions = []
    new_input = last_look_back.reshape(1, look_back, 1)
    for _ in range(future_days):
        future_pred = model.predict(new_input)
        future_predictions.append(future_pred[0, 0])
        future_pred_reshaped = future_pred.reshape(1, 1, 1)
        new_input = np.append(new_input[:, 1:, :], future_pred_reshaped, axis=1)
    future_predictions_scaled = scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1))

    # Combine actual prices and future predictions
    future_dates = pd.date_range(start=ticker_data['Date'].iloc[-1] + pd.Timedelta(days=1), periods=future_days)
    future_df = pd.DataFrame({'Date': future_dates, 'Close': future_predictions_scaled.flatten(), 'Ticker' : ticker, 'Sector' : sector, 'Industry' : industry})

    return pd.concat([ticker_data, future_df], ignore_index=True)



In [5]:
# Define look_back and future_days
look_back = 60
future_days = 365

# Iterate through each unique ticker
all_predictions = []
unique_tickers = data['Ticker'].unique()
for ticker in unique_tickers:
    all_predictions.append(make_predictions_for_ticker(data, ticker))

[1;30;43mStreaming output truncated to the last 5000 lines.[0m


In [6]:
# Convert the list of arrays to a list of DataFrames
all_predictions_dataframes = [pd.DataFrame(pred) for pred in all_predictions]

# Combine predictions for all tickers
combined_predictions = pd.concat(all_predictions_dataframes, ignore_index=True)

# Merge with original data to get additional columns (Ticker, Sector, Industry)
merged_data = pd.merge(combined_predictions, data[['Date', 'Ticker']], on=['Date', 'Ticker'], how = 'left')

# Save to CSV
merged_data

Unnamed: 0,Date,Close,Ticker,Sector,Industry
0,2018-01-02,47.320000,WD,Finance,Mortgage
1,2018-01-03,46.130001,WD,Finance,Mortgage
2,2018-01-04,46.099998,WD,Finance,Mortgage
3,2018-01-05,47.980000,WD,Finance,Mortgage
4,2018-01-08,47.820000,WD,Finance,Mortgage
...,...,...,...,...,...
150858,2025-04-11,71.697884,MRK,Healthcare,Drug Manufacturers
150859,2025-04-12,71.683975,MRK,Healthcare,Drug Manufacturers
150860,2025-04-13,71.670235,MRK,Healthcare,Drug Manufacturers
150861,2025-04-14,71.656677,MRK,Healthcare,Drug Manufacturers


In [7]:
merged_data.to_csv('combined_predictions.csv')