In [2]:
import tensorflow as tf
import pandas as pd
import numpy as np

### Data Preprocessing

In [1]:
def read_goog_sp500_dataframe():
    googFile = 'data/GOOG.csv'
    spFile = 'data/SP_500.csv'
    
    goog = pd.read_csv(googFile,sep=',',usecols=[0,5],names=['Date','Goog'],header=0)
    sp = pd.read_csv(spFile,sep=',',usecols=[0,5],names=['Date','SP500'],header=0)
    
    goog['SP500'] = sp['SP500']
    
    goog['Date'] = pd.to_datetime(goog['Date'],format='%Y-%m-%d')
    
    goog = goog.sort_values(['Date'],ascending=True)
    
    returns = goog[[key for key in dict(goog.dtypes) if dict(goog.dtypes)[key] in ['float64','int64']]].pct_change()
    
    return returns

In [3]:
def read_goog_sp500_logistic_data():
    returns = read_goog_sp500_dataframe()
    returns['Intercept'] =1
    xData = np.array(returns[["SP500",  "Intercept"]][1:-1])
    yData = (returns["Goog"]>0)[1:-1]
    return (xData, yData)

In [7]:
xData, yData = read_goog_sp500_logistic_data()

### Baseline Implementation

In [157]:
from sklearn.linear_model import LogisticRegression

In [185]:
classifier = LogisticRegression(C=1e5,solver='liblinear')

In [186]:
classifier.fit(xData[:,0].reshape(-1,1),yData)

LogisticRegression(C=100000.0, class_weight=None, dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='ovr', n_jobs=1, penalty='l2', random_state=None,
          solver='liblinear', tol=0.0001, verbose=0, warm_start=False)

In [187]:
prediction = classifier.predict(xData[:,0].reshape(-1,1))

In [188]:
acc_predictions = (list(yData==prediction)).count(True)

In [189]:
acc = acc_predictions/len(prediction)

In [190]:
acc*100

72.8

### TensorFlow Implementation

In [263]:
W = tf.Variable(tf.ones([1, 2]), name="W")
b = tf.Variable(tf.zeros([2]), name="b")

In [264]:
x = tf.placeholder(tf.float32, [None, 1], name="x")

In [265]:
y_ = tf.placeholder(tf.float32, [None, 2], name="y_")

y = tf.matmul(x, W) + b

In [266]:
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))

In [267]:
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

In [268]:
all_xs = np.expand_dims(xData[:,0], axis=1)

In [269]:
all_ys = np.array([([1,0] if yEl == True else [0,1]) for yEl in yData])

In [270]:
dataset_size = len(all_xs)

In [283]:
def trainWithMultiplePointsPerEpoch(steps, train_step, batch_size):
  init = tf.global_variables_initializer()

  with tf.Session() as sess:
    sess.run(init)

    for i in range(steps):
      if dataset_size == batch_size:
        batch_start_idx = 0
      elif dataset_size < batch_size:
        raise ValueError("dataset_size: %d, must be greater than batch_size: %d" % (dataset_size, batch_size))
      else:
        batch_start_idx = (i * batch_size) % (dataset_size)

      batch_end_idx = batch_start_idx + batch_size

      batch_xs = all_xs[batch_start_idx : batch_end_idx]
      batch_ys = all_ys[batch_start_idx : batch_end_idx]

      feed = { x: batch_xs, y_: batch_ys }

      sess.run(train_step, feed_dict=feed)

      if (i + 1) % 1000 == 0:
        print("After %d iteration:" % i)
        print(sess.run(W))
        print(sess.run(b))

        print("cross entropy: %f" % sess.run(cross_entropy, feed_dict=feed))

    # Test model
    correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
    print("y = {}".format(sess.run(y,feed)))

    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    print("Accuracy: %f" % sess.run(accuracy, feed_dict={x: all_xs, y_: all_ys}))

In [284]:
trainWithMultiplePointsPerEpoch(20000, train_step, dataset_size)

After 999 iteration:
[[ 5.196305  -3.1963046]]
[ 0.13512917 -0.13512877]
cross entropy: 0.609988
After 1999 iteration:
[[ 7.973796  -5.9737945]]
[ 0.1246955  -0.12469481]
cross entropy: 0.578771
After 2999 iteration:
[[ 9.935836  -7.9358315]]
[ 0.11727998 -0.11727909]
cross entropy: 0.563244
After 3999 iteration:
[[11.390679 -9.390675]]
[ 0.1116586  -0.11165741]
cross entropy: 0.554724
After 4999 iteration:
[[ 12.505717  -10.5057125]]
[ 0.10726263 -0.10726164]
cross entropy: 0.549724
After 5999 iteration:
[[ 13.380331 -11.380326]]
[ 0.10375845 -0.10375739]
cross entropy: 0.546651
After 6999 iteration:
[[ 14.077989 -12.077984]]
[ 0.10092742 -0.10092653]
cross entropy: 0.544697
After 7999 iteration:
[[ 14.641535 -12.64153 ]]
[ 0.09861766 -0.09861698]
cross entropy: 0.543422
After 8999 iteration:
[[ 15.101157 -13.101152]]
[ 0.09671898 -0.09671844]
cross entropy: 0.542574
After 9999 iteration:
[[ 15.478832 -13.478827]]
[ 0.09514897 -0.09514863]
cross entropy: 0.542002
After 10999 iteration