<a href="https://colab.research.google.com/github/srmt99/stock-market/blob/master/dataset_creator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
%tensorflow_version 2.x
import glob
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorboard.plugins.hparams import api as hp
from tensorflow import keras
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import glob
import os
import copy
mpl.rcParams['figure.figsize'] = (6, 4)
mpl.rcParams['axes.grid'] = False

In [0]:
###### hyper_Parameters ######
data_set_num = 9
start_from_market = 100
plt.figure(figsize=(10,6))
test_split = 0.2
future_prediction = 1
corr_w = 40
w = 30

In [0]:
!wget https://github.com/srmt99/stock-market/blob/master/data/markets.npy?raw=true
!wget https://github.com/srmt99/stock-market/blob/master/data/stocks_1.npy?raw=true
!wget https://github.com/srmt99/stock-market/blob/master/data/stocks_2.npy?raw=true

stocks = []
for i in np.load("stocks_1.npy?raw=true",allow_pickle=True):
  stocks.append(i)
for i in np.load("stocks_2.npy?raw=true",allow_pickle=True):
  stocks.append(i)
stocks = np.array(stocks)
markets = np.load("markets.npy?raw=true",allow_pickle=True)

min_market_len = len(markets[0])
for i in markets[1:]:
  if len(i)<min_market_len:
    min_market_len = len(i)

for i in range(len(markets)):
  while len(markets[i])>min_market_len:
    markets[i] = np.delete(markets[i],0,0)

for i in range(len(stocks)):
  while len(stocks[i])>min_market_len:
    stocks[i] = np.delete(stocks[i],0,0)

markets = np.stack(markets,0)

In [0]:
# ploting some random stock prices and markets
r1 = np.random.randint(len(stocks))
r2 = np.random.randint(len(markets))
print(r1,r2)
plt.figure(figsize=(20,6))
plt.plot(stocks[r1][:,1],label="stock prices")
plt.plot(markets[r2][:,1],label="market")
plt.legend()
plt.show()

In [0]:
def turn_to_windows(input,kernel,future):
    data = []
    labels = []
    for i in range(kernel,len(input)-future):
        data.append(input[i-kernel:i,:])
        labels.append(input[i:i+future,1])
    return np.array(data),np.array(labels).reshape(len(labels),future)

def turn_to_windows_multi(input,kernel):
  data = []
  for i in range(kernel,input.shape[1]-1):
        data.append(input[:,i-kernel:i])
  return np.array(data)

def smooth(input):
    output = []
    output.append(input[0])
    output.append(np.mean([input[0],input[1]]))
    for i in range(2,len(input)-2):
        mean = np.mean([input[i-2],input[i-1],input[i],input[i+1],input[i+2]])
        output.append(mean)
    output.append(np.mean([input[len(input)-2],input[len(input)-1]]))
    output.append(input[len(input)-1])
    return output

In [0]:
for count,stock in enumerate(stocks):
  records = []
  labels = []
  correlations = []
  x,y = turn_to_windows(stock,corr_w,1)
  for wc,window in enumerate(x[:,:,1]):
    corr = np.corrcoef(window,markets[: , min_market_len - len(x) - corr_w - 1 + wc : min_market_len - len(x) + wc -1 , 1])[1:,0]
    correlations.append( np.nan_to_num(corr) )
  correlations = np.array(correlations)
  x,y = turn_to_windows(stock,w,1)
  x2 = turn_to_windows_multi(markets[:,:,1],w)
  for wc in range(len(correlations),w,-1):
      record = np.zeros( (2*len(markets)+5,w) )
      record[:5,:] = np.transpose(x[wc + (len(x)-len(correlations)-1) ][:,1:]) # part 1
      record[5:5+len(markets)] = np.transpose(correlations[wc-w:wc]) # part 2
      record[5+len(markets):5+2*len(markets)] = x2[wc + (len(x2)-len(correlations)-1)] # part 3
      records.append(record)
      labels.append(y[wc + (len(x)-len(correlations)-1) ])
  np.save(f"records_{count}",np.array([records,labels]))
  print(f"{count}/{len(stocks)}")

In [0]:
train = []
labels = []
for count,filename in enumerate(glob.glob("records_*.npy")):
  x,y = np.load(filename,allow_pickle=True)
  for i in x:
    train.append(i)
  for i in y:
    labels.append(i[0])
  if count == 30:
    break

train = np.array(train)
labels = np.array(labels)

In [0]:
""" 
We split data into 3 parts: train, validation, test
We control the number of records in each set using
test_split and val_split
"""
val_split, test_split = 0.1, 0.2
data = train
num_val, num_test = int(val_split * data.shape[0]), int(test_split * data.shape[0])

train_x = data[:-(num_val + num_test)]
train_y = labels[:-(num_val + num_test)]
val_x = data[-(num_val + num_test):-num_test]
val_y = labels[-(num_val + num_test):-num_test]
test_x = data[-num_test:]
test_y = labels[-num_test:]

print('train_x:', train_x.shape)
print('train_y:', train_y.shape)
print('val_x:', val_x.shape)
print('val_y:', val_y.shape)
print('test_x:', test_x.shape)
print('test_y:', test_y.shape)

In [0]:
train_mean = np.mean(train,(0,2)).reshape(1,105,1)
train_std = np.std(train,(0,2)).reshape(1,105,1)
labels_mean = np.mean(labels)
labels_std = np.std(labels)

train = (train - train_mean) / train_std
labels = (labels - labels_mean) / labels_std

test_size = np.floor(test_split*len(train))

train_set = tf.data.Dataset.from_tensor_slices((train,labels))
train_set = train_set.shuffle(10000)
test_set = train_set.take(test_size).batch(256)
train_set = train_set.skip(test_size)
train_set = train_set.batch(256)

In [0]:
"""
Building and training a model
"""
lstm1_out = 30
dense1_out = 100
dense2_out = 1
num_epoch = 10
alpha = 0.001

model = keras.Sequential([
  keras.layers.LSTM(lstm1_out, input_shape=train[0].shape, activation='relu'),
  keras.layers.Dense(dense1_out),
  keras.layers.Dense(dense2_out)
])
model.compile(optimizer=keras.optimizers.Adam(learning_rate=alpha),
              loss='mse',
              metrics=['mse', 'mae'])
history = model.fit(train_set, epochs=num_epoch)

In [0]:
model.predict(train[0].reshape(1,105,30)),labels[0]

**Notes**

1 - Clear outputs before committing changes