Whatever folder this file is in, make sure you have a models folder and the data folder containing the csv stock files. You just have to tell it what stock you're interested in. A file is written out that contains two columns. In the first column the overall accuracy of the stock predictions and in the second column is the class prediction accuracy. 

In [6]:
import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
import torch.utils.data as Data
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

##########################################################
STOCK = "VZ"
##########################################################
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

input_size = 20      # rnn input size
output_size = 5
lr = 0.02
batch_size = 12
num_epochs = 100
seq_length = 5
threshold = .02

##########################################################
#no changes here
class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()

        self.rnn = nn.LSTM(
            input_size=input_size,
            hidden_size=128,     # rnn hidden unit
            num_layers=1,       # number of rnn layer
            batch_first=True,   # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size)
        )
        self.dropout = nn.Dropout(p=0.2)
        self.out = nn.Linear(128, output_size)
        self.tanh = nn.Tanh()
        
    def forward(self, x, h_state):
        # x (batch, time_step, input_size)
        # h_state (n_layers, batch, hidden_size)
        # r_out (batch, time_step, hidden_size)
        r_out, h_state = self.rnn(x, h_state)
        outs = []    # save all predictions
        for time_step in range(r_out.size(1)):    # calculate output for each time step
            outs.append(self.out(r_out[:, time_step, :]))
        out = torch.stack(outs, dim=1)
        out = self.tanh(out)

        return out, h_state


def train(stock):
    ##########################################################
    #created a file to store test accuracy
    file1 = open("ClassAccur_"+str(stock)+".txt","a")
    csv_file="./data/"+str(stock)+".csv"
    df = pd.read_csv(csv_file, parse_dates=['Date']).sort_values(by='Date')

    cp = df['Close'].to_numpy().reshape(-1, 1)

    cp = cp[-1000:, :]
    scaler = MinMaxScaler().fit(cp)
    norm_scale = scaler.transform(cp)
    df = pd.DataFrame(norm_scale, columns=['Close'])
    
    ##########################################################
    #wanted to automate the number of days to use for prediction
    prev = []

    for i in range(input_size):
        col = 'prev'+str(i+1)
        prev.append(col)

    for i in range(len(prev)):
        df[prev[i]] = df['Close'].shift(i+1)


    df = df.dropna(subset=[prev[-1]]).reset_index(drop=True)

    ##########################################################
    #x is the prediction day input
    #y is the 1 day output
    x = df[prev].to_numpy(dtype=np.float32)
    y = df['Close'].to_numpy(dtype=np.float32).reshape(-1, 1)

    #create new arrays, z & w
    #z is the 5 day output instead of 1 day
    #w is the predefined shifted input days
    
    z=[]
    for i in range(0, len(y)-4, 20):
        z.append(y[i:i+output_size])
    w=[]
    for i in range(0, len(x)-4, 20):
        w.append(x[i])
    
    w=np.array(w).astype(dtype=np.float32)
    w=np.reshape(w, (-1, input_size))   
    z=np.array(z).astype(dtype=np.float32)
    z=np.reshape(z, (-1, output_size))

    ###########################################################
    #creating the dataset to be passed into dataloaders
    xs = []
    ys = []
    for i in range(0, len(w) - seq_length + 1, 1):
        xs.append(w[i : i + seq_length, :].copy())
        ys.append(z[i : i + seq_length, :].copy())
    xs = np.array(xs)
    ys = np.array(ys)

    x_train, x_test, y_train, y_test = train_test_split(xs, ys, test_size=0.25)

    x_train = torch.from_numpy(x_train)
    y_train = torch.from_numpy(y_train)
    train_dataset = Data.TensorDataset(x_train, y_train)

    train_loader = Data.DataLoader(
        dataset=train_dataset,      # torch TensorDataset format
        batch_size=batch_size,      # mini batch size
        shuffle=True               # random shuffle for training
    )


    x_test = torch.from_numpy(x_test)
    y_test = torch.from_numpy(y_test)

    test_dataset = Data.TensorDataset(x_test, y_test)
    test_loader = Data.DataLoader(
        dataset=test_dataset,      # torch TensorDataset format
        batch_size=batch_size,      # mini batch size
        shuffle=True               # random shuffle for training
    )

    rnn = RNN().to(device)
    optimizer = torch.optim.Adam(rnn.parameters(), lr=lr)   # optimize all cnn parameters
    loss_func = nn.MSELoss()
    total_step = len(train_loader)

    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_loader):
            images = images.view(-1, seq_length, input_size).type(torch.float).to(device)            
            
            labels = labels.view(-1, seq_length * output_size).type(torch.float).to(device)

            # Forward pass
            outputs, _ = rnn(images, h_state=None)
            loss = loss_func(outputs.view(-1, seq_length * output_size), labels)
            
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward() #retain_graph=True

            optimizer.step()
            
            if epoch % 1 == 0 and i == 0:
                tot_tru_cls = []
                tot_prd_cls = []
                for j, (t_x, t_y) in enumerate(test_loader):
                    ##########################################################
                    #get the batch_size, seq_length, and output size for reshaping
                    #arrays later
                    bt_sz = t_y.size(0)
                    seq_len = t_y.size(1)
                    pred_wd = t_y.size(2)
                    #take and convert the normalized last price from input set
                    #into the actual stock value (to do inverse_transform, must be 1d array)
                    last_pr = scaler.inverse_transform(t_x[:, :, 0].reshape(-1, 1))
                    last_pr = np.reshape(last_pr, (bt_sz, seq_len, 1))
                    #create an empty true class & empty pred class
                    #index 0:4 the class order will be 5% dec, 2% dec, 0%, 2% inc, 5% inc
                    tru_cls = np.zeros((bt_sz, seq_len, pred_wd))
                    prd_cls = np.zeros((bt_sz, seq_len, pred_wd))
                    #create the absolute bounds for class
                    last_pr_02 = last_pr * 0.02
                    last_pr_05 = last_pr * 0.05
                    #take and convert the normalized true prices
                    #into the actual stock value, reshape 
                    true_pr = scaler.inverse_transform(t_y.reshape(-1,1))
                    true_pr = np.reshape(true_pr, (bt_sz, seq_len, pred_wd))
                    
                    ##########################################################
                    #go through each true price and check if it decreased by at least
                    #5%, decreased by at least 2%, increased by at least 5%, increased
                    #by at least 2%, or doesn't change by 2%
                    #only count class once
                    for m in range(len(true_pr)):
                        for n in range(len(true_pr[m])):
                            for o in range(len(true_pr[m][n])):
                                if true_pr[m][n][o] <= last_pr[m][n]-last_pr_05[m][n] and tru_cls[m][n][0] < 1:
                                    tru_cls[m][n][0] += 1
                                    tru_cls[m][n][1] += 1
                                if true_pr[m][n][o] <= last_pr[m][n]-last_pr_02[m][n] and tru_cls[m][n][1] < 1:
                                    tru_cls[m][n][1] += 1
                                if true_pr[m][n][o] >= last_pr[m][n]+last_pr_05[m][n] and tru_cls[m][n][4] < 1:
                                    tru_cls[m][n][4] += 1
                                    tru_cls[m][n][3] += 1
                                if true_pr[m][n][o] >= last_pr[m][n]+last_pr_02[m][n] and tru_cls[m][n][3] < 1:
                                    tru_cls[m][n][3] += 1
                                if tru_cls[m][n][2] < 1:
                                    tru_cls[m][n][2] += 1
                                else:
                                    pass
                                    

                    t_x = t_x.view(-1, seq_length, input_size).type(torch.float).to(device)                 
                    t_y = t_y.view(-1, seq_length * output_size).type(torch.float).to(device)  
                    test_output, _ = rnn(t_x, h_state=None)

                    ##########################################################
                    #take and convert the normalized predicted prices
                    #into the actual stock value, reshape 
                    pred_pr = scaler.inverse_transform(test_output.cpu().data.numpy().reshape(-1, 1))
                    pred_pr = np.reshape(pred_pr, (bt_sz, seq_len, pred_wd))
                    
                    #go through each predicted price and check if it decreased by at least
                    #5%, decreased by at least 2%, increased by at least 5%, increased
                    #by at least 2%, or doesn't change by 2%
                    #only count class once
                    for m in range(len(true_pr)):
                        for n in range(len(true_pr[m])):
                            for o in range(len(true_pr[m][n])):
                                if pred_pr[m][n][o] <= last_pr[m][n]-last_pr_05[m][n] and prd_cls[m][n][0] < 1:
                                    prd_cls[m][n][0] += 1
                                    prd_cls[m][n][1] += 1
                                if pred_pr[m][n][o] <= last_pr[m][n]-last_pr_02[m][n] and prd_cls[m][n][1] < 1:
                                    prd_cls[m][n][1] += 1
                                if pred_pr[m][n][o] >= last_pr[m][n]+last_pr_05[m][n] and prd_cls[m][n][4] < 1:
                                    prd_cls[m][n][4] += 1
                                    prd_cls[m][n][3] += 1
                                if pred_pr[m][n][o] >= last_pr[m][n]+last_pr_02[m][n] and prd_cls[m][n][3] < 1:
                                    prd_cls[m][n][3] += 1
                                if prd_cls[m][n][2] < 1:
                                    prd_cls[m][n][2] += 1
                                else:
                                    pass
                    tru_cls = np.where(tru_cls > 1, 1, tru_cls)
                    prd_cls = np.where(prd_cls > 1, 1, prd_cls)
                    test_loss = loss_func(test_output.view(-1, seq_length * output_size), t_y)                    
                    t_out = test_output.cpu().data.numpy().reshape(-1, 1)                    
                    t_y = t_y.cpu().data.numpy().reshape(-1, 1)                    
                    t_out = scaler.inverse_transform(t_out)                    
                    t_y = scaler.inverse_transform(t_y)
                    

                    #define correct as the prediction being off less than 2%
                    correct = np.count_nonzero(np.absolute(t_out - t_y) <= threshold*t_y) / len(t_y)
                    
                    ##########################################################
                    #this compares the prediction class to the true class
                    #and sums up all correct class predictions and divides by 
                    #the total number of possible predictions

                    total_correct = (prd_cls == tru_cls).sum().item()
                    total_num = len(prd_cls)*len(prd_cls[0])*len(prd_cls[0][0])
                    cls_acc = total_correct/total_num
                    
                    if j == 0:
                        print('Epoch: ' + str(epoch) + ' | Val_Loss: ' + str(test_loss.item()) + ' | CR: ' + str(correct))
                        print('Epoch: '+ str(epoch)+ ' | Class Corr: '+ str(cls_acc)+'\n')
                        file1.write(str(correct)+'\t'+str(cls_acc)+'\n')
                    tot_tru_cls.append(tru_cls.sum(axis=0).sum(axis=0))
                    tot_prd_cls.append(prd_cls.sum(axis=0).sum(axis=0))
                tot_tru_cls=np.asarray(tot_tru_cls)
                tot_prd_cls=np.asarray(tot_prd_cls)

        # decay learning rate
        if((epoch + 1) % 20) == 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] /= 2
                
        if epoch % 1 == 0:
            torch.save(rnn.state_dict(), './models/model_' + str(epoch) + '.pkl')
            print('Model saved\n')
    file1.close()

if __name__ == '__main__':
    train(stock = STOCK)

Epoch: 0 | Val_Loss: 0.037934400141239166 | CR: 0.24333333333333335
Epoch: 0 | Class Corr: 0.6766666666666666

Model saved

Epoch: 1 | Val_Loss: 0.6437296867370605 | CR: 0.09333333333333334
Epoch: 1 | Class Corr: 0.5933333333333334

Model saved

Epoch: 2 | Val_Loss: 0.12701845169067383 | CR: 0.10666666666666667
Epoch: 2 | Class Corr: 0.67

Model saved

Epoch: 3 | Val_Loss: 0.09960348904132843 | CR: 0.19333333333333333
Epoch: 3 | Class Corr: 0.6566666666666666

Model saved

Epoch: 4 | Val_Loss: 0.04882657900452614 | CR: 0.19
Epoch: 4 | Class Corr: 0.6533333333333333

Model saved

Epoch: 5 | Val_Loss: 0.04259958490729332 | CR: 0.13666666666666666
Epoch: 5 | Class Corr: 0.6933333333333334

Model saved

Epoch: 6 | Val_Loss: 0.039697811007499695 | CR: 0.14666666666666667
Epoch: 6 | Class Corr: 0.6733333333333333

Model saved

Epoch: 7 | Val_Loss: 0.04868621379137039 | CR: 0.23333333333333334
Epoch: 7 | Class Corr: 0.7133333333333334

Model saved

Epoch: 8 | Val_Loss: 0.0451948307454586 | CR

Model saved

Epoch: 80 | Val_Loss: 0.00425325334072113 | CR: 0.66
Epoch: 80 | Class Corr: 0.8566666666666667

Model saved

Epoch: 81 | Val_Loss: 0.0042058187536895275 | CR: 0.6766666666666666
Epoch: 81 | Class Corr: 0.8533333333333334

Model saved

Epoch: 82 | Val_Loss: 0.0042961593717336655 | CR: 0.6833333333333333
Epoch: 82 | Class Corr: 0.8566666666666667

Model saved

Epoch: 83 | Val_Loss: 0.004258580971509218 | CR: 0.6833333333333333
Epoch: 83 | Class Corr: 0.8566666666666667

Model saved

Epoch: 84 | Val_Loss: 0.0041662477888166904 | CR: 0.6833333333333333
Epoch: 84 | Class Corr: 0.8566666666666667

Model saved

Epoch: 85 | Val_Loss: 0.0041434685699641705 | CR: 0.6766666666666666
Epoch: 85 | Class Corr: 0.8566666666666667

Model saved

Epoch: 86 | Val_Loss: 0.004127693362534046 | CR: 0.6733333333333333
Epoch: 86 | Class Corr: 0.86

Model saved

Epoch: 87 | Val_Loss: 0.004119297489523888 | CR: 0.6933333333333334
Epoch: 87 | Class Corr: 0.86

Model saved

Epoch: 88 | Val_Loss: 0.00

The following is what I used to generate predictions. I just copied the RNN model from above. I am analyzing a stock called VZ.csv. VZ.csv is just the GPS stock from October to April 28th. This returns prd_cls[-1][-1] whichs is the very last prediction class for based on April 28th. The five classes are decrease by 5%, decrease by 2%, no change, increase by 2%, and increase by 5%.

In [7]:
import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
import torch.utils.data as Data
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

##########################################################
STOCK = "VZ"
##########################################################
input_size = 20
output_size = 5
lr = 0.02
batch_size = 12
num_epochs = 10
seq_length = 5
threshold = .02

##########################################################

class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()

        self.rnn = nn.LSTM(
            input_size=input_size,
            hidden_size=128,
            num_layers=1,
            batch_first=True,
        )
        self.out = nn.Linear(128, output_size)
        self.tanh = nn.Tanh()
        
    def forward(self, x, h_state):
        # x (batch, time_step, input_size)
        # h_state (n_layers, batch, hidden_size)
        # r_out (batch, time_step, hidden_size)
        r_out, h_state = self.rnn(x, h_state)

        outs = []
        for time_step in range(r_out.size(1)):
            outs.append(self.out(r_out[:, time_step, :]))
        out = torch.stack(outs, dim=1)
        out = self.tanh(out)

        return out, h_state

csv_file="./data/"+str(STOCK)+".csv"
df = pd.read_csv(csv_file, parse_dates=['Date']).sort_values(by='Date')
cp = df['Close'].to_numpy().reshape(-1, 1)
cp = cp[-150:]

scaler = MinMaxScaler().fit(cp)
norm_scale = scaler.transform(cp)
df = pd.DataFrame(norm_scale, columns=['Close'])

##########################################################
#grab 25 days for prediction
prev = []
for i in range(input_size):
    col = 'prev'+str(i)
    prev.append(col)
for i in range(len(prev)):
    df[prev[i]] = df['Close'].shift(i)
df = df.dropna(subset=[prev[-1]]).reset_index(drop=True)

##########################################################
#x is the prediction day input
x = df[prev].to_numpy(dtype=np.float32)
    
w=[]
for i in range(0, len(x)):
    w.append(x[i])

    
w=np.array(w).astype(dtype=np.float32)
w=np.reshape(w, (-1, input_size))  

xs=[]    
for i in range(0, len(w) - seq_length + 1, 1):
    xs.append(w[i : i + seq_length, :].copy())

###########################################################
    
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

#I needed to create a yt for some reason in order to get dataloader to 
#output the proper xt size for the prediction
xt = np.array(xs)
yt = xt

xt = torch.from_numpy(xt)
yt = torch.from_numpy(yt)


test_data = Data.TensorDataset(xt, yt)
test_loader = Data.DataLoader(dataset=test_data, batch_size=12, shuffle=False)


rnn = RNN().to(device)
rnn.load_state_dict(torch.load('models/model_98.pkl'))

def test(model, test_loader, device, stock):
    print(stock)
    file1 = open("ClassPred_"+str(stock)+".txt","a")

    model.to(device)
    model.eval()
    
    with torch.no_grad():
        t_x = test_loader

        bt_sz = t_x.size(0)
        seq_len = t_x.size(1)
        pred_wd = output_size

        last_pr = scaler.inverse_transform(t_x[:, :, 0].reshape(-1, 1))
        last_pr = np.reshape(last_pr, (bt_sz, seq_len, 1))
        last_pr_02 = last_pr * 0.02
        last_pr_05 = last_pr * 0.05
        prd_cls = np.zeros((bt_sz, seq_len, pred_wd))

        test_output, _ = rnn(t_x, h_state=None)
        pred_pr = scaler.inverse_transform(test_output.cpu().data.numpy().reshape(-1, 1))
        pred_pr = np.reshape(pred_pr, (bt_sz, seq_len, pred_wd))
        for m in range(len(prd_cls)):
            for n in range(len(prd_cls[m])):
                for o in range(len(prd_cls[m][n])):
                    if pred_pr[m][n][o] <= last_pr[m][n]-last_pr_05[m][n] and prd_cls[m][n][0] < 1:
                        prd_cls[m][n][0] += 1
                        prd_cls[m][n][1] += 1
                    if pred_pr[m][n][o] <= last_pr[m][n]-last_pr_02[m][n] and prd_cls[m][n][1] < 1:
                        prd_cls[m][n][1] += 1
                    if pred_pr[m][n][o] >= last_pr[m][n]+last_pr_05[m][n] and prd_cls[m][n][4] < 1:
                        prd_cls[m][n][4] += 1
                        prd_cls[m][n][3] += 1
                    if pred_pr[m][n][o] >= last_pr[m][n]+last_pr_02[m][n] and prd_cls[m][n][3] < 1:
                        prd_cls[m][n][3] += 1
                    if prd_cls[m][n][2] < 1:
                        prd_cls[m][n][2] += 1
                    else:
                        pass 

    prd_cls = np.where(prd_cls > 1, 1, prd_cls)
    last_prd = prd_cls[-1][-1]
    print(last_prd)
    
    file1.write("VZ"+'\t'+str(last_prd))
    file1.close()
    return prd_cls[-1][-1]

if __name__ == '__main__':
    test(rnn, xt, device, STOCK)

VZ
[0. 0. 1. 0. 0.]
