In [1]:
#from pandas_datareader import data
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
import urllib.request, json
import os
import numpy as np
import tensorflow as tf

from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM

import logging
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# List of interested Symbols
INTERESTED_SYMBOLS = ['A', 'AAPL', 'ACIU', 'AGYS', 'ALIM', 'AMPE', 'AMZN', 'APH', 'AXP', 'BA', 'BCM', 'BH', 'BNDX',
                      'BRID', 'CAT', 'CC', 'CLMB', 'CNET', 'COWZ', 'CSCO', 'CVCY', 'CVX', 'DIS', 'DIVB', 'DVAX',
                      'EHTH', 'ENG', 'ERJ', 'FOR', 'FXE', 'GE', 'GOOGL', 'GS', 'HD', 'HIW', 'HYDW', 'IBM', 'IGEB',
                      'IMRN', 'INTC', 'ISTR', 'IXP', 'JJC', 'JNJ', 'JPM', 'KIQ', 'KO', 'KT', 'LIT', 'MCD', 'MD',
                      'MGRC', 'MMIN', 'MMM', 'MPWR', 'MRK', 'MSFT', 'MTP', 'NKE', 'NWG', 'OCX', 'PBLA', 'PETS', 'PFE',
                      'PG', 'PML', 'PSMC', 'PXF', 'RRX', 'SCHB', 'SNCR', 'SPFF', 'SRDX', 'SXI', 'TGNA', 'TMFC', 'TRV',
                      'UGP', 'UNH', 'UTX', 'VBND', 'VOX', 'VXF', 'VZ', 'WMT', 'WSBC', 'XHS', 'XOM', 'XWEL', 'ZLAB']

PATH = "/content/drive/MyDrive/DependableAI/Project/Data/"

# Load the stock price data for all interested symbol
years  = ["2018", "2019", "2020", "2021", "2022"]
months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]

market_data = {}

for sym in INTERESTED_SYMBOLS:
    for y in years:
        for m in months:
            csv_file = PATH + y + "/" + m + "/" + sym + ".csv"
            data = pd.read_csv(csv_file,
                               names=["index", "date", "open", "high", "low", "close", "adjusted_close", "volume",
                                      "dividend_amount", "split_coefficient"])
            data = data.tail(-1)
            data = data[data.columns[1:]]
            data.set_index('date', inplace=True)
            data.index = pd.DatetimeIndex(data.index).to_period('D')
            if sym in market_data.keys():
                market_data[sym] = pd.concat([market_data[sym], data])
            else:
                market_data[sym] = data
    print(sym)

A
AAPL
ACIU
AGYS
ALIM
AMPE
AMZN
APH
AXP
BA
BCM
BH
BNDX
BRID
CAT
CC
CLMB
CNET
COWZ
CSCO
CVCY
CVX
DIS
DIVB
DVAX
EHTH
ENG
ERJ
FOR
FXE
GE
GOOGL
GS
HD
HIW
HYDW
IBM
IGEB
IMRN
INTC
ISTR
IXP
JJC
JNJ
JPM
KIQ
KO
KT
LIT
MCD
MD
MGRC
MMIN
MMM
MPWR
MRK
MSFT
MTP
NKE
NWG
OCX
PBLA
PETS
PFE
PG
PML
PSMC
PXF
RRX
SCHB
SNCR
SPFF
SRDX
SXI
TGNA
TMFC
TRV
UGP
UNH
UTX
VBND
VOX
VXF
VZ
WMT
WSBC
XHS
XOM
XWEL
ZLAB


In [4]:
#plot close price
for sym in INTERESTED_SYMBOLS:
    plt.figure(figsize=(10,6))
    plt.grid(True)
    plt.xlabel('Date')
    plt.ylabel('Prices')
    plt.plot(market_data[sym].close.to_list(), label="close")
    plt.title(sym + ' open & close price')
    plt.legend()
    plt.show()

Output hidden; open in https://colab.research.google.com to view.

In [5]:
import warnings
warnings.filterwarnings('ignore')

# Closing prices
close_prices= {}

for sym in INTERESTED_SYMBOLS:
    close_prices[sym] = market_data[sym]['close'].values
    
# Training data len
train_data_len = int(len(market_data['A']) * 0.80)

single_value_attck_results = {}

for sym in INTERESTED_SYMBOLS:
    scaler = MinMaxScaler(feature_range=(0,1))
    scaled_data = scaler.fit_transform(close_prices[sym].reshape(-1, 1))
    #scaled_data
    # Create the training data set 
    # Create the scaled training data set
    train_data = scaled_data[0:int(train_data_len), :]
    # Split the data into x_train and y_train data sets
    x_train = []
    y_train = []
    for i in range(1, len(train_data)):
        x_train.append(train_data[i-1:i, 0])
        y_train.append(train_data[i, 0])
        
    # Convert the x_train and y_train to numpy arrays 
    x_train, y_train = np.array(x_train), np.array(y_train)

    # Reshape the data
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

    # Build the LSTM model
    model = Sequential()
    model.add(LSTM(128, return_sequences=True, input_shape= (x_train.shape[1], 1)))
    model.add(LSTM(64, return_sequences=False))
    model.add(Dense(25))
    model.add(Dense(1))

    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error')

    # Train the model
    model.fit(x_train, y_train, batch_size=10, epochs=30)

    # Create the testing data set
    test_data = scaled_data[train_data_len - 1: , :]
    # Create the data sets x_test and y_test
    x_test = []
    y_test = close_prices[sym].reshape(-1, 1)[train_data_len:, :]
    for i in range(1, len(test_data)):
        x_test.append(test_data[i-1:i, 0])
    
    # Convert the data to a numpy array
    x_test = np.array(x_test)

    # Reshape the data
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))

    # Select a Single point and add small amount of petrubation to it
    x_test_single     = np.reshape(np.array(x_test[0][0]), (1, 1, 1 ))

    adv_x_test_single = x_test[0][0] + (0.01 * x_test[0][0])
    adv_x_test_single = np.reshape(np.array(adv_x_test_single), (1, 1, 1 ))

    # Get the models predicted price values 
    actual_prediction = model.predict(x_test_single)
    actual_prediction = scaler.inverse_transform(actual_prediction)

    # ADV Predictions
    adv_prediction = model.predict(adv_x_test_single)
    adv_prediction = scaler.inverse_transform(adv_prediction)
    
    predict = {}

    predict["Actual"] = actual_prediction
    predict["ADV"]    = adv_prediction

    single_value_attck_results[sym] = predict


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 

In [11]:
import pickle

fle = "/content/drive/MyDrive/DependableAI/Project/Data/single_valu_attack_results/lstm_single_value_attack_tesults.pickle"

with open(fle, "wb") as f:
  pickle.dump(single_value_attck_results, f)
