In [1]:
import tensorflow as tf
from collections import deque
import random
import numpy as np
from sklearn import preprocessing
from tensorflow.keras.models import load_model,Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, CuDNNLSTM, BatchNormalization
from tensorflow.keras.backend import clear_session

import pandas as pd
import os
from datetime import datetime,timedelta

#hide warnings
import warnings
warnings.filterwarnings('ignore')

# READING CSV

In [2]:
#reading the csv
#parsing the date to timestamp
#setting date as index
df = pd.read_csv('combine.csv',
                 header=None,
                 names=['stock code','date','open','high','low','close','volume','netforeign'])
df['date'] = pd.to_datetime(df['date'])
df.dropna(inplace=True)
df.set_index('date',inplace=True)

#sort values by date
df.sort_values('date',inplace=True)

#grouping by stock code
df=df.groupby('stock code')

# FUNCTIONS

In [3]:
#classify buy and sells
def classify(future_pct_change,buy_signal,sell_signal):
    if future_pct_change >= buy_signal :
        return 1 # buy
    elif future_pct_change < buy_signal and future_pct_change > sell_signal:
        return 0 # hold
    else:
        return 2 # sell

In [4]:
def process_data(data,sequence_length):
    for col in data.columns[:-1]:  # go through all of the columns
        if col != "target":  # normalize all ... except for the target itself!
            data[col] = data[col].pct_change()*100  # pct change "normalizes" the different currencies (each crypto coin has vastly diff values, we're really more interested in the other coin's movements)
            data.replace([np.inf,-np.inf],np.nan)
            data.dropna(inplace=True)  # remove the nas created by pct_change
            data[col] = preprocessing.scale(data[col].values)  # scale between 0 and 1.
            
    data = data.interpolate()
    #########################################################################
    sequential_data = []
    prev_days = deque(maxlen=sequence_length)
    
    #########################################################################
    for i in data.values:  # iterate over the values
        prev_days.append([n for n in i[:-1]])  # store all but the target
        if len(prev_days) == sequence_length:  # make sure we have 60 sequences!
            sequential_data.append([np.array(prev_days), i[-1]])  # append those bad boys!
        
    random.shuffle(sequential_data)  # shuffle for good measure.
    
    #########################################################################
    
    buys = []  # list that will store our buy sequences and targets
    sells = []  # list that will store our sell sequences and targets
    holds = []  # list that will store our hold sequences and targets
    
    for seq, target in sequential_data:  # iterate over the sequential data
        if target == 0:  
            holds.append([seq, target])  # append to hold list
        elif target == 1:  
            buys.append([seq, target])  # it's a buy!
        elif target == 2:
            sells.append([seq, target])  # it's a sell!
    
    
    random.shuffle(buys)  # shuffle the buys
    random.shuffle(sells)  # shuffle the sells!
    random.shuffle(holds)  # shuffle the holds!
    
    
    sequential_data = buys+sells+holds  # add them together
    random.shuffle(sequential_data)  # another shuffle, so the model doesn't get confused with all 1 class then the other.
    
    return sequential_data

In [5]:
def data_prepare(data,sequence_length,future_to_predict,buy_signal,sell_signal):
    
    #shifting pct change
    data['future'] = data['close'].shift(-future_to_predict)
    data['future_pct_change'] = data['close'].pct_change(future_to_predict).shift(-future_to_predict)*100
    data.dropna(inplace=True)
    #mapping
    data['target'] = list(classify(d,buy_signal,sell_signal)for d in data['future_pct_change'])
    data.dropna(inplace=True)
    #clean up
    data.drop('future',1,inplace=True)
    data.drop('future_pct_change',1,inplace=True)
#     print(data.head())
    
#     train_data,test_data = train_val_split(data)
    
#     #print(train_data.head())
#     #print(test_data.head())
#     train_X,train_y = process_data(train_data,sequence_length)
#     test_X,test_y = process_data(test_data,sequence_length)
    
#     return train_X,train_y,test_X,test_y

    sequential_data = process_data(data,sequence_length)
    
    return sequential_data

In [6]:
def split_features_and_labels(sequential_data):
    X = []
    y = []
    for seq, target in sequential_data:  # going over our new sequential data
        X.append(seq)  # X is the sequences
        y.append(target)  # y is the targets/labels (buys vs sell/notbuy)
        
    return np.array(X), np.array(y)  # return X and y...and make X a numpy array!

In [7]:
def create_model(train_X):
    model = Sequential()
    model.add(CuDNNLSTM(128, input_shape=(train_X.shape[1:]), return_sequences=True))
    model.add(Dropout(0.3))
    model.add(BatchNormalization())

    model.add(CuDNNLSTM(128, return_sequences=True))
    model.add(Dropout(0.3))
    model.add(BatchNormalization())

    model.add(CuDNNLSTM(128))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(Dense(32, activation='relu'))
    model.add(Dropout(0.3))

    model.add(Dense(3, activation='softmax'))


    opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)

    # Compile model
    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer=opt,
        metrics=['accuracy']
    )
    return model

# INPUTS

In [8]:
# stock = "JFC"
model_name = "Stock_12_30_1"
train_validation_split = 0.8
sequence_length = 30
future_to_predict = 1
epochs = 100
buy_signal = 2
sell_signal = -2

In [9]:
stock= ['WPI','ALCO','ATN','BLOOM','PXP','VUL','APX','IRC','MRC','ALLHC','MHC','JFC']

In [10]:
# data = df.get_group(stock)
# data.drop('stock code',1,inplace=True)
# data.drop('netforeign',1,inplace=True)
# data['future'] = data['close'].shift(-future_to_predict)
# data['future_pct_change'] = data['close'].pct_change(future_to_predict).shift(-future_to_predict)*100
# data.dropna(inplace=True)
# #mapping
# data['target'] = list(classify(d,buy_signal,sell_signal)for d in data['future_pct_change'])
# data.dropna(inplace=True)
# data.drop('future',1,inplace=True)
# data.drop('future_pct_change',1,inplace=True)
# sequential_data = process_data(data,sequence_length)
# for seq,target in sequential_data:
#     print(seq,target)

In [11]:
all_train = []
all_test = []

In [12]:
for s in stock:
    data = df.get_group(s)
    data.drop('stock code',1,inplace=True)
    data.drop('netforeign',1,inplace=True)

    split = int(len(data)*(train_validation_split))
    train_data = data[:split]
    test_data = data[split:]

    if(len(train_data)>sequence_length and len(test_data)>sequence_length):

        train_sequential_data = data_prepare(train_data,sequence_length,future_to_predict,buy_signal,sell_signal)
        test_sequential_data = data_prepare(test_data,sequence_length,future_to_predict,buy_signal,sell_signal)
        
#         print(len(train_sequential_data))
#         print(len(test_sequential_data))
#         print("")
        
        all_train.extend(train_sequential_data)
        all_test.extend(test_sequential_data)
        
        
        
#         print(len(all_train))
#         print(len(all_test))
#         print("")
        

    
    else:
        print('Error : sequence_length is too big for stock data')

random.shuffle(all_train)
random.shuffle(all_test)

train_X,train_y = split_features_and_labels(all_train)
test_X,test_y = split_features_and_labels(all_test)


print(train_X.shape)
print(train_y.shape)
print(test_X.shape)
print(test_y.shape)


model = create_model(train_X)

W0215 12:03:27.387280 17548 deprecation.py:506] From C:\Users\potato\Anaconda3\envs\test\lib\site-packages\tensorflow\python\ops\init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


(23276, 30, 5)
(23276,)
(5509, 30, 5)
(5509,)


In [13]:
model.fit(
train_X, train_y,
batch_size=64,
epochs=epochs,
validation_data=(test_X,test_y))

Train on 23276 samples, validate on 5509 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100


Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x20602584188>

In [14]:
model.save(f"model_saved/{model_name}.model")