In [1]:
#from pandas_datareader import data
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
import urllib.request, json
import os
import numpy as np
import tensorflow as tf

from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM

import logging
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# List of interested Symbols
INTERESTED_SYMBOLS = ['A', 'AAPL', 'ACIU', 'AGYS', 'ALIM', 'AMPE', 'AMZN', 'APH', 'AXP', 'BA', 'BCM', 'BH', 'BNDX',
                      'BRID', 'CAT', 'CC', 'CLMB', 'CNET', 'COWZ', 'CSCO', 'CVCY', 'CVX', 'DIS', 'DIVB', 'DVAX',
                      'EHTH', 'ENG', 'ERJ', 'FOR', 'FXE', 'GE', 'GOOGL', 'GS', 'HD', 'HIW', 'HYDW', 'IBM', 'IGEB',
                      'IMRN', 'INTC', 'ISTR', 'IXP', 'JJC', 'JNJ', 'JPM', 'KIQ', 'KO', 'KT', 'LIT', 'MCD', 'MD',
                      'MGRC', 'MMIN', 'MMM', 'MPWR', 'MRK', 'MSFT', 'MTP', 'NKE', 'NWG', 'OCX', 'PBLA', 'PETS', 'PFE',
                      'PG', 'PML', 'PSMC', 'PXF', 'RRX', 'SCHB', 'SNCR', 'SPFF', 'SRDX', 'SXI', 'TGNA', 'TMFC', 'TRV',
                      'UGP', 'UNH', 'UTX', 'VBND', 'VOX', 'VXF', 'VZ', 'WMT', 'WSBC', 'XHS', 'XOM', 'XWEL', 'ZLAB']

PATH = "/content/drive/MyDrive/DependableAI/Project/Data/"

# Load the stock price data for all interested symbol
years  = ["2018", "2019", "2020", "2021", "2022"]
months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]

market_data = {}

for sym in INTERESTED_SYMBOLS:
    for y in years:
        for m in months:
            csv_file = PATH + y + "/" + m + "/" + sym + ".csv"
            data = pd.read_csv(csv_file,
                               names=["index", "date", "open", "high", "low", "close", "adjusted_close", "volume",
                                      "dividend_amount", "split_coefficient"])
            data = data.tail(-1)
            data = data[data.columns[1:]]
            data.set_index('date', inplace=True)
            data.index = pd.DatetimeIndex(data.index).to_period('D')
            if sym in market_data.keys():
                market_data[sym] = pd.concat([market_data[sym], data])
            else:
                market_data[sym] = data
    print(sym)

A
AAPL
ACIU
AGYS
ALIM
AMPE
AMZN
APH
AXP
BA
BCM
BH
BNDX
BRID
CAT
CC
CLMB
CNET
COWZ
CSCO
CVCY
CVX
DIS
DIVB
DVAX
EHTH
ENG
ERJ
FOR
FXE
GE
GOOGL
GS
HD
HIW
HYDW
IBM
IGEB
IMRN
INTC
ISTR
IXP
JJC
JNJ
JPM
KIQ
KO
KT
LIT
MCD
MD
MGRC
MMIN
MMM
MPWR
MRK
MSFT
MTP
NKE
NWG
OCX
PBLA
PETS
PFE
PG
PML
PSMC
PXF
RRX
SCHB
SNCR
SPFF
SRDX
SXI
TGNA
TMFC
TRV
UGP
UNH
UTX
VBND
VOX
VXF
VZ
WMT
WSBC
XHS
XOM
XWEL
ZLAB


In [4]:
# Add Labels for the close prices
for sym in INTERESTED_SYMBOLS:
    df = market_data[sym]
    # Add Labels to close prices
    labels = []
    prev_close = 0
    close_prices = df['close'].to_list()
    for cp in close_prices:
        if prev_close == "":
            labels.append(0)
            prev_close = cp
        else:
            curr_close = cp
            if curr_close > prev_close + 0.050*prev_close:
                labels.append(1)
            elif curr_close < prev_close + 0.055*prev_close:
                labels.append(-1)
            else:
                labels.append(0)

    df['label'] = labels
    market_data[sym] = df

In [5]:
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split

import pickle

# Training data len
train_len = int(len(market_data['A']) * 0.80)

advers_predictions = {}

for sym in INTERESTED_SYMBOLS:
    df = market_data[sym]
    scaler = MinMaxScaler(feature_range=(0,1))
    open_prices  = df['open'].values.reshape(-1, 1)
    close_prices = df['close'].values.reshape(-1, 1)
    labels       = df['label'].values.reshape(-1, 1)

    scaler = MinMaxScaler(feature_range=(0,1))

    close_prices_norm = scaler.fit_transform(close_prices)
    open_prices_norm  = scaler.fit_transform(open_prices)

    look_back = 30
    X_train = []
    y_train = []

    for i in range(look_back, train_len):
        open_prices_slice = open_prices_norm[i-look_back:i, 0]
        labels_slice = labels[i-look_back:i, 0]
        close_prices_slice = close_prices_norm[i-look_back:i, 0]
        X_train.append(np.column_stack((open_prices_slice, labels_slice)))
        y_train.append(close_prices_slice)

    X_test = []
    y_test = []
    
    for i in range(train_len, len(open_prices)):
        open_prices_slice = open_prices_norm[i-look_back:i, 0]
        labels_slice = labels[i-look_back:i, 0]
        close_prices_slice = close_prices_norm[i-look_back:i, 0]
        X_test.append(np.column_stack((open_prices_slice, labels_slice)))
        y_test.append(close_prices_slice)

    X_train = np.array(X_train).reshape(-1, look_back, 2)
    X_test  = np.array(X_test).reshape(-1, look_back, 2)
    y_train = np.array(y_train)
    y_test  = np.array(y_test)

    #
    # Flip the labels of train data at random
    #
    num_adv = 2
    adv_idx = np.random.choice(X_train.shape[0], num_adv, replace=False)
    for idx in adv_idx:
      for row in range(X_train[idx].shape[0]):
        X_train[idx][row][1] = np.random.choice([0, -1, 1], 1)

    # Build the LSTM model
    adv_model = Sequential()
    adv_model.add(LSTM(128, return_sequences=True, input_shape= (X_train.shape[1], 2)))
    adv_model.add(LSTM(64, return_sequences=False))
    adv_model.add(Dense(25))
    adv_model.add(Dense(1))

    # Compile the model
    adv_model.compile(optimizer='adam', loss='mean_squared_error')


    # Train the model
    adv_model.fit(X_train, y_train, batch_size=10, epochs=30)

    
    # Get the models predicted price values
    adv_predictions = adv_model.predict(X_test)
    adv_predictions = scaler.inverse_transform(adv_predictions)

    advers_predictions[sym] = adv_predictions
    test_indexes = list(df.index.values)[train_len:]
    sym_predictions = {}
    i = 0
    for ind in test_indexes:
        sym_predictions[ind] = adv_predictions[i]
        i = i+1
    predict_file = "/content/drive/MyDrive/DependableAI/Project/Data/adv_predictions/"+sym+"_adv_predictions.pickle"

    with open(predict_file, "wb") as f:
        pickle.dump(sym_predictions, f)
        print("Done dumping predictions for " +  sym)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Done dumping predictions for BA
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Done dumping predictions for BCM
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
