In [1]:
#must use anaconda enviornment to import tensorflow modules
#at least on my computer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, LSTM, Flatten
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np
import pandas as pd

In [2]:
def read_symbols(path): #func to read in the symbols from companies.txt
    with open(path,'r') as file: #open file
        return file.read().splitlines() #return array of company symbols

In [6]:
def create_model(symbol,df):#func to create a model
    # drop NaN values
    df = df.drop('1_day_volatility', axis=1)
    #df = df.dropna()
    #print(df.shape)
    for col in df.columns:#conv to numerical numbers
        if col != 'Date':
            df[col] = pd.to_numeric(df[col],errors='coerce')
    df['Close'] = df['Close'].shift(-1) #shift close column up by one since we predict for the next day
    df.dropna(inplace=True)
    print(df.shape)
    #get features and target
    feat_cols = [col for col in df.columns if col not in ['Close', 'Date']]
    features = df[feat_cols]
    target = df['Close']
    
    #scale the data
    scaler=MinMaxScaler()
    scaled_feat = scaler.fit_transform(features)

    #reshape for CONV1D
    X = scaled_feat.reshape((features.shape[0],features.shape[1],1))

    y = df['Close']

    #split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    
    #initialize sequential model
    #this will be used to create a linear stack of layers
    model = Sequential() 

    #add convolutional layer w/ 64 output filters
    #kernel size 3
    #using rectified linear unit for activation function
    #input_shape is shape of data given number of features with one data point per step
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X.shape[1],1)))

    #add LSTM layer w/ 50 neurons
    model.add(LSTM(units=50, return_sequences=True))

    #add layer to flatten input to 1-d array
    model.add(Flatten())

    #add connected NN layer
    #w/ 1 output neuron
    model.add(Dense(units=1))

    #configure model for training
    #use Adam for optimizer
    #Use MSE for loss func
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    # Train the model
    model.fit(X_train, y_train, epochs=10, batch_size=32)
    
    # Evaluate the model
    test_loss = model.evaluate(X_test, y_test)

    # For a more detailed evaluation, you can make predictions and compare them to the actual values
    predictions = model.predict(X_test)

    # Calculate metrics like MSE or MAE
    #rom sklearn.metrics import mean_squared_error, mean_absolute_error

    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    print(f"Mean Squared Error: {mse}")
    print(f"Mean Absolute Error: {mae}")
    
    model.save(f'cnn-lstm-models/{symbol}.h5')

In [7]:
#get array of symbols
symbols_path = 'companies.txt'
symbols = read_symbols(symbols_path)
print(symbols)

['MMM', 'AOS', 'ABT', 'ABBV', 'ABMD', 'ACN', 'ATVI', 'ADM', 'ADBE', 'AAP', 'AMD', 'AES', 'AFL', 'A', 'APD', 'AKAM', 'ALB', 'ALK', 'ARE', 'ALGN', 'ALLE', 'LNT', 'ALL', 'GOOGL', 'GOOG', 'MO', 'AMZN', 'AMCR', 'AEE', 'AAL', 'AEP', 'AXP', 'AIG', 'AMT', 'AWK', 'AMP', 'ABC', 'AME', 'AMGN', 'APH', 'ADI', 'ANSS', 'ANTM', 'AON', 'APA', 'AAPL', 'AMAT', 'APTV', 'ANET', 'AJG', 'AIZ', 'T', 'ATO', 'ADSK', 'ADP', 'AZO', 'AVB', 'AVY', 'BKR', 'BLL', 'BAC', 'BBWI', 'BAX', 'BDX', 'BRK.B', 'BBY', 'BIO', 'TECH', 'BIIB', 'BLK', 'BK', 'BA', 'BKNG', 'BWA', 'BXP', 'BSX', 'BMY', 'AVGO', 'BR', 'BRO', 'BF.B', 'CHRW', 'CDNS', 'CZR', 'CPB', 'COF', 'CAH', 'KMX', 'CCL', 'CARR', 'CTLT', 'CAT', 'CBOE', 'CBRE', 'CDW', 'CE', 'CNC', 'CNP', 'CDAY', 'CERN', 'CF', 'CRL', 'SCHW', 'CHTR', 'CVX', 'CMG', 'CB', 'CHD', 'CI', 'CINF', 'CTAS', 'CSCO', 'C', 'CFG', 'CTXS', 'CLX', 'CME', 'CMS', 'KO', 'CTSH', 'CL', 'CMCSA', 'CMA', 'CAG', 'COP', 'ED', 'STZ', 'CPRT', 'GLW', 'CTVA', 'COST', 'CTRA', 'CCI', 'CSX', 'CMI', 'CVS', 'DHI', 'DHR', '

In [9]:
for symbol in symbols:
    data_path = f"company-data/{symbol}-data.csv" #path to company specific data
    try:
        df = pd.read_csv(data_path)
        print(f'{symbol}:\n')
        print(df.shape)
        create_model(symbol, df)#create model
        print('\n')
    except:
        pass
        

MMM:

(6037, 13)
(6006, 12)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Mean Squared Error: 16.825326592704165
Mean Absolute Error: 2.371208515909865


AOS:

(4743, 13)
(4713, 12)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Mean Squared Error: 1.3126441970282825
Mean Absolute Error: 0.6659436143771158


ABT:

(6037, 13)
(6006, 12)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Mean Squared Error: 1.4725550603892124
Mean Absolute Error: 0.6762593128591528


ABBV:

(2766, 13)
(2736, 12)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Mean Squared Error: 32.297859335086436
Mean Absolute Error: 2.898688230010541


ACN:

(4743, 13)
(4713, 12)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch