In [None]:
import pandas as pd 
import numpy as np 
import statsmodels.api as sm


In [None]:
def read_goog_sp500_dataframe():
    """Returns a dataframe with the results (percentage change of adjusted prices with months) 
    for Google and S&P 500."""
    
    filePath = "E:\\Eskills-Academy-projects\\LearningTensorFlow\\data\\"
    googFile = filePath + "GOOG.csv"
    spFILE = filePath + "SP_500.csv"

    goog = pd.read_csv(googFile, sep=",", usecols=[0, 5], names=['Date', 'Goog'], header=0)
    sp = pd.read_csv(spFILE, sep=",", usecols=[0, 5], names=['Date', 'SP500'], header=0)

    goog['SP500'] = sp['SP500']

    # Convert string format to date 
    goog['Date'] = pd.to_datetime(goog['Date'], format='%Y-%m-%d')

    goog = goog.sort_values(['Date'], ascending=[True])
    returns = goog[[key for key in dict(goog.dtypes) if dict(goog.dtypes)[key] in ['float64', 'int64']]].pct_change()

    return returns

In [None]:
def read_goog_sp500_logistic_data():
    """Returns a data frame with the results for google 
    and S&P 500 set up for logistic regression"""
    returns = read_goog_sp500_dataframe()

    # Add 'Intercept' column with 1 values
    returns['Intercept'] = 1

    # Leave out the first row 
    # Leave out the last row as it will not have a value for returns
    # Resultant dataframe with the S&P 500 and intercept values of all 

    xData = np.array(returns[["SP500", "Intercept"]][1:-1])
    yData = (returns["Goog"] > 0)[1:-1]

    return (xData, yData)

In [None]:
xData, yData = read_goog_sp500_logistic_data()

In [None]:
logit = sm.Logit(yData, xData)

In [None]:
# fit the Logistic model
result = logit.fit()

# All values > 0.5 predict an up-day for Google
predictions = (result.predict(xData) > 0.5)

# Count the number of times the actual updays match the predicted updays
accuratePredictionCount = (list(yData == predictions)).count(True)

pctAccuracy = float(accuratePredictionCount) / float(len(predictions))
print("Accuracy:", pctAccuracy)

In [None]:
import tensorflow as tf 
tf.compat.v1.disable_eager_execution()

In [None]:
W = tf.Variable(tf.ones([1, 2]), name='W')
b = tf.Variable(tf.zeros([2]), name='b')

x = tf.compat.v1.placeholder(tf.float32, [None, 1], name='x')
y_ = tf.compat.v1.placeholder(tf.float32, [None, 2], name='y_')

y = tf.matmul(x, W) + b
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
train_step = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.5).minimize(cross_entropy)


In [32]:
all_xs = np.expand_dims(xData[:,0], axis=1)  # Convert to 2D array
all_ys = np.array([([1, 0] if y_el == True else [0,1]) for y_el in yData])  # Convert to One-hot encoding  
dataset_size = len(all_xs)

In [35]:
def trainWithMultiplePointsPerEpoch(steps, train_step, batch_size):
    init = tf.compat.v1.global_variables_initializer()

    with tf.compat.v1.Session() as sess:
        sess.run(init)
        for step in range(steps):

            if dataset_size == batch_size:
                batch_start_idx = 0
            elif dataset_size < batch_size:
                raise ValueError("data set size: %d, must be greater than the batch_size: %d"%(dataset_size, batch_size))
            else:
                batch_start_idx =(step*batch_size) % dataset_size

            batch_end_idx = batch_start_idx + batch_size

            # Access the x and y values in batches
            batch_xs = all_xs[batch_start_idx : batch_end_idx]
            batch_ys = all_ys[batch_start_idx : batch_end_idx]

            # Reshape the 1-D arrays as 2D feature vectors with many rows and 1-column
            feed = {x: batch_xs, y_:batch_ys}
            sess.run(train_step, feed_dict=feed)

            # Print result to scren for every 500 iterations
            if (step + 1) % 500 == 0:
                print("After %d iterations"%step)
                print("W:", sess.run(W))
                print("b:", sess.run(b))
                print("cross entropy: %f" % sess.run(cross_entropy, feed_dict=feed))
            
        # Test model
        correct_predictions = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))

        # Calculate accuracy 
        accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))   # Cast from true:1 and false:0
        print("Accuracy: %f"%sess.run(accuracy, feed_dict = {x:all_xs, y_: all_ys}))
                


In [40]:
trainWithMultiplePointsPerEpoch(20000, train_step, dataset_size)

After 499 iterations
W: [[ 3.3300729 -1.3300734]]
b: [ 0.14253847 -0.14253835]
cross entropy: 0.637952
After 999 iterations
W: [[ 5.196305  -3.1963043]]
b: [ 0.13512911 -0.13512884]
cross entropy: 0.609988
After 1499 iterations
W: [[ 6.7153406 -4.7153387]]
b: [ 0.12940016 -0.1294001 ]
cross entropy: 0.591472
After 1999 iterations
W: [[ 7.973796 -5.973794]]
b: [ 0.12469519 -0.12469512]
cross entropy: 0.578771
After 2499 iterations
W: [[ 9.032861  -7.0328565]]
b: [ 0.12071175 -0.12071171]
cross entropy: 0.569779
After 2999 iterations
W: [[ 9.935836 -7.935831]]
b: [ 0.11727957 -0.11727951]
cross entropy: 0.563244
After 3499 iterations
W: [[10.714037 -8.714031]]
b: [ 0.11428789 -0.11428776]
cross entropy: 0.558391
After 3999 iterations
W: [[11.390679 -9.390674]]
b: [ 0.11165804 -0.11165797]
cross entropy: 0.554723
After 4499 iterations
W: [[11.983359 -9.983354]]
b: [ 0.10933124 -0.10933139]
cross entropy: 0.551910
After 4999 iterations
W: [[ 12.505717 -10.505712]]
b: [ 0.10726198 -0.107262