In [1]:
# Pytorch Imports
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
import pandas as pd
import datetime
import time

In [6]:
# Function to generate two csv files for training/testing
def generate_train_test_data(file_name, percent_test=0.8):
    ''' 
        Will divide a dataset into both test and training sets
        
        Args:
        
        file_name (string): path to the csv file containing all data
        percent_test (float): percentage of dataset to set aside for testing
        
        returns nothing
    '''
    # Open file and place into dataframe
    columns = ['date', 'open', 'high', 'low', 'close', 'volume', '50ma', 'label']
    stock_data = pd.read_csv(file_name)[columns]
    stock_data['date'] = stock_data['date'].apply(lambda x: time.mktime(datetime.datetime.strptime(x, "%Y-%m-%d").timetuple()), convert_dtype=True)
    stock_data = stock_data.dropna()
    return stock_data

In [7]:
def get_batches(arr, batch_size, seq_length):
    '''Create a generator that returns batches of size
       batch_size x seq_length from arr.
       
       Arguments
       ---------
       arr: Array you want to make batches from
       batch_size: Batch size, the number of sequences per batch
       seq_length: Number of encoded chars in a sequence
    '''
    
    # total number of batches we can make
    n_batches = len(arr)//batch_size

    # Keep only enough days to make full batches
    arr = arr[:n_batches * batch_size]

    # iterate through the array, one sequence at a time
    for n in range(0, arr.shape[1], seq_length):
        # The features
        x = arr[:batch_size, n:n+seq_length]
        # The targets
        y = arr[:batch_size, -1:]
        yield x, y

In [8]:
def one_hot_encode(arr, n_labels):
    
    # Initialize the the encoded array
    one_hot = np.zeros((np.multiply(*arr.shape), n_labels), dtype=np.float32)
    
    # Fill the appropriate elements with ones
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
    
    # Finally reshape it to get back to the original array
    one_hot = one_hot.reshape((*arr.shape, n_labels))
    
    return one_hot

In [9]:
arr = generate_train_test_data('chart_data-csv/aapl.csv').as_matrix()
batches = get_batches(arr, 10, 7)
x, y = next(batches)
y = y.astype(int)
y = one_hot_encode(y, 3)
n_input = x.shape[1]
n_output = 3

In [10]:
print('x\n', x)
print('\ny\n', y)

x
 [[1.33222680e+09 7.80100000e+01 7.89700000e+01 7.57300000e+01
  7.88500000e+01 2.04165500e+08 1.38824658e+08]
 [1.33231320e+09 7.84300000e+01 7.93300000e+01 7.82600000e+01
  7.84000000e+01 1.61010500e+08 1.40074746e+08]
 [1.33239960e+09 7.77800000e+01 7.86600000e+01 7.74900000e+01
  7.79900000e+01 1.55967700e+08 1.41903118e+08]
 [1.33248600e+09 7.81400000e+01 7.83100000e+01 7.73400000e+01
  7.75600000e+01 1.07622200e+08 1.42980138e+08]
 [1.33274520e+09 7.80400000e+01 7.90000000e+01 7.74600000e+01
  7.89800000e+01 1.48935500e+08 1.44895912e+08]
 [1.33283160e+09 7.88800000e+01 8.01900000e+01 7.88600000e+01
  7.99600000e+01 1.51782400e+08 1.46801452e+08]
 [1.33291800e+09 8.04600000e+01 8.08600000e+01 7.94100000e+01
  8.03600000e+01 1.63865100e+08 1.48864268e+08]
 [1.33300440e+09 7.97400000e+01 8.02300000e+01 7.90100000e+01
  7.93600000e+01 1.52059600e+08 1.50521504e+08]
 [1.33309080e+09 7.92100000e+01 7.94500000e+01 7.78000000e+01
  7.80100000e+01 1.82759500e+08 1.52868002e+08]
 [1.333

In [11]:
# check if GPU is available
train_on_gpu = torch.cuda.is_available()
if(train_on_gpu):
    print('Training on GPU!')
else: 
    print('No GPU available, training on CPU; consider making n_epochs very small.')

Training on GPU!


In [12]:
class CharRNN(nn.Module):
    
    def __init__(self, tokens, n_input = 7, n_output = 3, n_hidden = 64, n_layers=1,
                               drop_prob=0.2, lr=0.001):
        super().__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.lr = lr
        
        ## TODO: define the LSTM
        self.lstm = nn.LSTM(n_input, n_hidden, n_layers, 
                            dropout=drop_prob, batch_first=True)
        
        ## TODO: define a dropout layer
        self.dropout = nn.Dropout(drop_prob)
        
        ## TODO: define the final, fully-connected output layer
        self.fc = nn.Linear(n_hidden, n_output)
      
    
    def forward(self, x, hidden):
        ''' Forward pass through the network. 
            These inputs are x, and the hidden/cell state `hidden`. '''

        # Get the outputs and the new hidden state from the lstm
        r_output, hidden = self.lstm(x, hidden)
            
        # Stack up LSTM outputs using view
        # You may need to use contiguous to reshape the output
        out = r_output.view(-1, self.n_hidden)
        
        # Put x through the fully-connected layer
        out = self.dropout(self.fc(out))
        
        # Return the final output and the hidden state
        return out, hidden
    
    
    def init_hidden(self, batch_size):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x n_hidden,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data
        
        if (train_on_gpu):
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
                  weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
                  weight.new(self.n_layers, batch_size, self.n_hidden).zero_())
        
        return hidden
        

In [13]:
def train(net, data, epochs=10, batch_size=10, seq_length=7, lr=0.001, clip=5, val_frac=0.1, print_every=10):
    ''' Training a network 
    
        Arguments
        ---------
        
        net: CharRNN network
        data: text data to train the network
        epochs: Number of epochs to train
        batch_size: Number of mini-sequences per mini-batch, aka batch size
        seq_length: Number of character steps per mini-batch
        lr: learning rate
        clip: gradient clipping
        val_frac: Fraction of data to hold out for validation
        print_every: Number of steps for printing training and validation loss
    
    '''
    net.train()
    
    opt = torch.optim.Adam(net.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    
    # create training and validation data
    val_idx = int(len(data)*(1-val_frac))
    data, val_data = data[:val_idx], data[val_idx:]
    
    if(train_on_gpu):
        net.cuda()
    
    counter = 0
    for e in range(epochs):
        # initialize hidden state
        h = net.init_hidden(batch_size)

        for x, y in get_batches(data, batch_size, seq_length):
            counter += 1
            
            # Make our feature and target arrays Torch tensors
            x = x.astype('f')
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
            print('input at counter position {}: {}'.format(counter, inputs.shape))
            print(inputs)
            inputs.unsqueeze_(1)
            
            if(train_on_gpu):
                inputs, targets = inputs.cuda(), targets.cuda()

            # Creating new variables for the hidden state, otherwise
            # we'd backprop through the entire training history
            h = tuple([each.data for each in h])

            # zero accumulated gradients
            net.zero_grad()
            # get the output from the model
            print('before net shape: {}'.format(inputs.shape))
            output, h = net(inputs, h)
            print('output is: {}'.format(output))
            print('after net shape: {}'.format(inputs.shape))

            # calculate the loss and perform backprop
            targets.squeeze_(-1)
            loss = criterion(output, targets.long())
            loss.backward()
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            opt.step()
            
            # loss stats
            if counter % print_every == 0:
                # Get validation loss
                val_h = net.init_hidden(batch_size)
                val_losses = []
                net.eval()
                for x, y in get_batches(val_data, batch_size, seq_length):
                    # One-hot encode our data and make them Torch tensors
                    x = x.astype('f')
                    x, y = torch.from_numpy(x), torch.from_numpy(y)
                    
                    # Creating new variables for the hidden state, otherwise
                    # we'd backprop through the entire training history
                    val_h = tuple([each.data for each in val_h])
                    
                    inputs, targets = x, y
                    if(train_on_gpu):
                        inputs, targets = inputs.cuda(), targets.cuda()
                    output, val_h = net(inputs, val_h)
                    val_loss = criterion(output, targets.long())
                
                    val_losses.append(val_loss.item())
                
                net.train() # reset to train mode after iterationg through validation data
                
                print("Epoch: {}/{}...".format(e+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))

In [14]:
# define and print the net
n_input=7
n_layers=1

net = CharRNN(arr, n_input, n_layers)
print(net)

CharRNN(
  (lstm): LSTM(7, 64, batch_first=True, dropout=0.2)
  (dropout): Dropout(p=0.2)
  (fc): Linear(in_features=64, out_features=1, bias=True)
)


  "num_layers={}".format(dropout, num_layers))


In [16]:
batch_size = 10
seq_length = 7
n_epochs = 10 # start smaller if you are just testing initial behavior

# train the model
train(net, arr, epochs=n_epochs, batch_size=batch_size, seq_length=seq_length, lr=0.001, print_every=10)

input at counter position 1: torch.Size([10, 7])
tensor([[1.3322e+09, 7.8010e+01, 7.8970e+01, 7.5730e+01, 7.8850e+01, 2.0417e+08,
         1.3882e+08],
        [1.3323e+09, 7.8430e+01, 7.9330e+01, 7.8260e+01, 7.8400e+01, 1.6101e+08,
         1.4007e+08],
        [1.3324e+09, 7.7780e+01, 7.8660e+01, 7.7490e+01, 7.7990e+01, 1.5597e+08,
         1.4190e+08],
        [1.3325e+09, 7.8140e+01, 7.8310e+01, 7.7340e+01, 7.7560e+01, 1.0762e+08,
         1.4298e+08],
        [1.3327e+09, 7.8040e+01, 7.9000e+01, 7.7460e+01, 7.8980e+01, 1.4894e+08,
         1.4490e+08],
        [1.3328e+09, 7.8880e+01, 8.0190e+01, 7.8860e+01, 7.9960e+01, 1.5178e+08,
         1.4680e+08],
        [1.3329e+09, 8.0460e+01, 8.0860e+01, 7.9410e+01, 8.0360e+01, 1.6387e+08,
         1.4886e+08],
        [1.3330e+09, 7.9740e+01, 8.0230e+01, 7.9010e+01, 7.9360e+01, 1.5206e+08,
         1.5052e+08],
        [1.3331e+09, 7.9210e+01, 7.9450e+01, 7.7800e+01, 7.8010e+01, 1.8276e+08,
         1.5287e+08],
        [1.3334e+09, 7.8

RuntimeError: input.size(-1) must be equal to input_size. Expected 7, got 1

## OLD CODE

In [None]:
# Define our network structure
class PricePredictNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        
        # Define our layer structure
        self.fc1 = nn.Linear(6, 32)
        self.fc2 = nn.Linear(32, 16)
        self.fc3 = nn.Linear(16, 3)
        
        # Add dropout capability
        self.dropout = nn.Dropout(p=0.2)
        
    def forward(self, x):
        
        # Flatten tensor
        x = x.view(x.shape[0], -1)
        
        # Feed forward
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))

        #Output layer -- no dropout
        x = F.log_softmax(self.fc3(x))
        
        return x

In [None]:
# Define training/validation loop
model = PricePredictNetwork()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.02)

epochs = 250
train_losses = []
test_losses = []

# Train and Validate network
for e in range(epochs):
    train_loss = 0
    for sample in trainloader:
        # Get data and labels from sample
        data = sample['data']
        labels = torch.transpose(sample['label'],0,1)
        labels = labels.to(dtype=torch.int64)
        
        
        # Reset our gradiants
        optimizer.zero_grad()
        
        # Feed forward
        output = model(data)

        print(output)
        # Loss
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        
        # Tally up loss
        train_loss += loss.item()
    else:
        # Validation step
        model.eval()
        with torch.no_grad():
            test_loss = 0
            for sample in testloader:
                # Get data and labels from sample
                test_data = sample['data']
                labels = torch.transpose(sample['label'],0,1)
                
                # Run model
                output = model(test_data)
                output = torch.transpose(output,0,1)
                
                # Calculate loss
                loss = criterion(output, labels)
                test_loss += loss.item()
        
        # Record loss values
        train_losses.append(train_loss/len(trainset))
        test_losses.append(test_loss/len(testset))
        
        # Set model back to training mode to include dropout
        model.train()
    
    if(e%10 == 0):
        print('Epoch {0}\n-----------------'.format(str(e+1)))
        print('Training Loss: {0}'.format(train_losses[e]))
        print('Testing Loss: {0}\n'.format(test_losses[e]))

In [None]:
# Plot performance of the training
plt.plot(train_losses, label='Training loss')
plt.plot(test_losses, label='Validation loss')
plt.legend(frameon=False)

In [None]:
# Network Stats
avg_train_loss = np.mean(train_losses)
avg_test_loss = np.mean(test_losses)
print('Average Training Loss: {0}'.format(avg_train_loss))
print('Average Testing Loss: {0}'.format(avg_test_loss))