In [13]:
import numpy as np
import random
import _pickle as cPickle
import gzip

In [32]:
# basic sigmoid function: similar to logistic regression
def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

# derivative of the sigmoid function
def sigmoid_prime(z):
    return sigmoid(z)*(1-sigmoid(z))

# Network object defines the initial setting of the model- number of layers and neurons

class Network(object):
    
    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x) for x,y in zip(sizes[:-1], sizes[1:])]
        
# feedforward method of the network vectorizes the sigmoid function over the matrices
# of weights and biases. returns output of the model if 'a' is input. 
# Review matrix multiplication to understand the output dimensions

    def feedfoward(self, a):
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a)+b)
        return a

# stochastic gradient descent, using mini batches of samples.

    def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None):
        if test_data: n_test = len(test_data)
        n = len(training_data)
        for j in range(epochs):
            random.shuffle(training_data)
            mini_batches = [training_data[k:k+mini_batch_size] 
                            for k in range(0, n, mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta)
            if test_data:
                print('Epoch {0}: {1} / {2}'.format(j, self.evaluate(test_data), n_test))
            else:
                print('Epoch {0} complete'.format(j))
                
# update mini batch code

    def update_mini_batch(self, mini_batch, eta):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
            
        self.weights = [w-(eta/len(mini_batch))*nw for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b-(eta/len(mini_batch))*nb for b, nb in zip(self.biases, nabla_b)]

#backpropagation code
    def backprop(self, x, y):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        activation = x
        activations = [x] #all activations layer by layer
        zs = [] #all z vectors layer by layer
        for b, w in zip(self.biases, self.weights):
            w.dtype()
            b.dtype()
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        for l in xrange(2, self.numlayers):
            z = zs[-1]
            sp = sigmoid_prime(z)
            delta = np.dot(delta, activations[-l-1].transpose())
        return (nabla_d, nabla_w)
        
    def cost_derivative(self, output_activation, y):
        return(output_activation-y)
    
    def evaluate(self, test_data):
        test_results = [(np.argmax(self.feedfoward(x)), y) for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)        

In [28]:
net = Network([9, 30, 3])

In [29]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.cross_validation import train_test_split
from sklearn import metrics
from sklearn.cross_validation import cross_val_score 
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

xls_file = pd.ExcelFile('horly_corn_log_reg.xlsx')
table = xls_file.parse('Sheet1')

#create a column of cent changes of the candlesticks
table['change'] = table['close']-table['open']
#remove cent changes of zero from a table
table = table[table.change != 0]
#create a binary variable value for change
table['long'] = 0
#assign +-1 values for the binary variable
table['long'] = table['long'].where(table['change'] < 0, 1)
table['long'] = table['long'].where(table['change'] > 0, -1)
#table['long'] = table['long'].where(table['change'] != 0, 0)
#create a column of cent changes of the candlesticks of all the previous candles
for i in range(9):
    table['change'+str(i+1)] = table['close'+str(i+1)] - table['open'+str(i+1)]
#create a binary variable value for change of all the previous candles
for i in range(9):
    table['long'+str(i+1)] = 1
#assign +-1 values for the binary variable of all the previous candles    
for n in range(9):    
    table['long'+str(n+1)] = table['long'+str(n+1)].where(table['change'+str(n+1)] < 0, 1)
    table['long'+str(n+1)] = table['long'+str(n+1)].where(table['change'+str(n+1)] > 0, -1)
    table['long'+str(n+1)] = table['long'+str(n+1)].where(table['change'+str(n+1)] != 0, 0)
    
table2 = pd.concat([table['change1'], table['change2']], axis=1)

#define features
x = table2
y = table['long']
#split test train
X_train, X_test, y_train, y_test = train_test_split(x , y, test_size=0.3, random_state=0)

In [30]:
training_data = list(zip(X_train, y_train))
test_data = list(zip(X_test, y_test))

In [33]:
net.SGD(training_data, 30, 10, 3.0, test_data=test_data)

TypeError: Cannot cast array data from dtype('float64') to dtype('<U32') according to the rule 'safe'