In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [3]:
%matplotlib inline

import matplotlib
import matplotlib.pyplot as plt

from matplotlib.mlab import PCA

In [4]:
print(tf.__version__)
print(np.__version__)
print(pd.__version__)
print(matplotlib.__version__)

1.4.1
1.13.3
0.20.1
2.0.2


In [5]:
prices = pd.read_csv('stocks.csv')

In [6]:
prices.head()

Unnamed: 0,Date,ADBE,CVX,MDLZ,NFLX,ORCL,SBUX
0,3-Jan-17,113.82,110.759811,44.43,141.220001,40.23,55.649239
1,1-Dec-16,102.949997,116.584061,44.330002,123.800003,38.299999,55.270943
2,1-Nov-16,102.809998,110.502274,41.066032,117.0,40.033211,57.709953
3,3-Oct-16,107.510002,102.728424,44.75042,124.870003,38.270115,52.588333
4,1-Sep-16,108.540001,100.933739,43.714809,98.550003,38.975769,53.648621


In [7]:
prices['Date'] = pd.to_datetime(prices['Date'], infer_datetime_format=True)

In [8]:
prices = prices.sort_values(['Date'], ascending=[True])

In [9]:
prices.head()

Unnamed: 0,Date,ADBE,CVX,MDLZ,NFLX,ORCL,SBUX
120,2007-01-03,38.869999,50.777351,17.519524,3.258571,15.696321,15.752188
119,2007-02-01,39.25,48.082939,16.019426,3.218571,15.028588,13.930813
118,2007-03-01,41.700001,51.900383,16.009354,3.312857,16.583584,14.138198
117,2007-04-02,41.560001,54.588032,16.924608,3.167143,17.196436,13.984914
116,2007-05-01,44.060001,57.598267,17.111704,3.128572,17.726965,12.988567


In [11]:
prices = prices[['ADBE', 'MDLZ', 'SBUX']]

In [12]:
prices.head()

Unnamed: 0,ADBE,MDLZ,SBUX
120,38.869999,17.519524,15.752188
119,39.25,16.019426,13.930813
118,41.700001,16.009354,14.138198
117,41.560001,16.924608,13.984914
116,44.060001,17.111704,12.988567


In [13]:
returns = prices[[key for key in dict(prices.dtypes) \
    if dict(prices.dtypes)[key] in ['float64', 'int64']]].pct_change()

In [14]:
returns = returns[1:]

In [15]:
returns.head()

Unnamed: 0,SBUX,MDLZ,ADBE
119,-0.115627,-0.085624,0.009776
118,0.014887,-0.000629,0.06242
117,-0.010842,0.05717,-0.003357
116,-0.071244,0.011055,0.060154
115,-0.089205,0.049031,-0.088743


In [16]:
returns_arr = returns.as_matrix()[:10]

In [17]:
returns_arr.shape

(10, 3)

In [18]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [19]:
returns_arr_scaled = scaler.fit_transform(returns_arr)

In [20]:
returns_arr_scaled

array([[-1.3815757 , -1.66841975, -0.00794824],
       [ 0.93127707, -0.06227426,  0.79937034],
       [ 0.47533596,  1.02993615, -0.20935564],
       [-0.59506817,  0.15850482,  0.76461402],
       [-0.91335326,  0.87614265, -1.51877095],
       [ 0.96462026, -1.39059372, -0.10439707],
       [ 1.24532886, -0.44852127,  0.7784667 ],
       [-0.20090235,  1.56355606,  0.16856723],
       [ 0.99212851, -0.65272298,  1.33141124],
       [-1.51779118,  0.59439231, -2.00195763]])

In [21]:
results = PCA(returns_arr_scaled, standardize=False)

In [22]:
results.fracs

array([ 0.61673564,  0.2826719 ,  0.10059245])

In [23]:
results.Y 

array([[-0.17991028,  2.07240494,  0.60432531],
       [ 1.13763134, -0.46280938, -0.04183796],
       [-0.2667366 , -1.08660301, -0.28051385],
       [ 0.07215127, -0.01306041,  0.97902407],
       [-1.93968973, -0.15745777, -0.34830467],
       [ 1.10422729,  0.85352113, -0.96297751],
       [ 1.47847086, -0.24640766, -0.33381563],
       [-0.6566515 , -1.33920141,  0.53743148],
       [ 1.77228548, -0.04546335,  0.1998985 ],
       [-2.52177814,  0.4250769 , -0.35322975]])

In [24]:
results.Wt

array([[ 0.6233919 , -0.41154938,  0.66483807],
       [-0.42026022, -0.89337244, -0.15895609],
       [-0.65936628,  0.18031306,  0.72987897]])

In [25]:
np.dot(results.Y, results.Wt)

array([[-1.3815757 , -1.66841975, -0.00794824],
       [ 0.93127707, -0.06227426,  0.79937034],
       [ 0.47533596,  1.02993615, -0.20935564],
       [-0.59506817,  0.15850482,  0.76461402],
       [-0.91335326,  0.87614265, -1.51877095],
       [ 0.96462026, -1.39059372, -0.10439707],
       [ 1.24532886, -0.44852127,  0.7784667 ],
       [-0.20090235,  1.56355606,  0.16856723],
       [ 0.99212851, -0.65272298,  1.33141124],
       [-1.51779118,  0.59439231, -2.00195763]])

In [26]:
returns_arr_scaled

array([[-1.3815757 , -1.66841975, -0.00794824],
       [ 0.93127707, -0.06227426,  0.79937034],
       [ 0.47533596,  1.02993615, -0.20935564],
       [-0.59506817,  0.15850482,  0.76461402],
       [-0.91335326,  0.87614265, -1.51877095],
       [ 0.96462026, -1.39059372, -0.10439707],
       [ 1.24532886, -0.44852127,  0.7784667 ],
       [-0.20090235,  1.56355606,  0.16856723],
       [ 0.99212851, -0.65272298,  1.33141124],
       [-1.51779118,  0.59439231, -2.00195763]])

In [27]:
n_inputs = 3
n_hidden = 2  # codings
n_outputs = n_inputs

In [28]:
learning_rate = 0.01

In [29]:
tf.reset_default_graph()

In [30]:
X = tf.placeholder(tf.float32, shape=[None, n_inputs])

In [31]:
hidden = tf.layers.dense(X, n_hidden)

In [32]:
outputs = tf.layers.dense(hidden, n_outputs)

In [33]:
reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))

In [34]:
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(reconstruction_loss)

In [35]:
init = tf.global_variables_initializer()

In [36]:
n_iterations = 10000

In [37]:
with tf.Session() as sess:
    init.run()

    for iteration in range(n_iterations):
        training_op.run(feed_dict={X: returns_arr_scaled})
    
    outputs_val = outputs.eval(feed_dict={X: returns_arr_scaled})
    print(outputs_val)

[[-0.98150289 -1.77904904 -0.44743055]
 [ 0.90408623 -0.05467882  0.83026832]
 [ 0.28890061  1.08167148 -0.00606127]
 [ 0.05027498 -0.0179458   0.04986156]
 [-1.14511693  0.93994194 -1.26656651]
 [ 0.33134007 -1.21805727  0.60008794]
 [ 1.0261507  -0.38860524  1.02298927]
 [ 0.15135062  1.46818554 -0.22575612]
 [ 1.12529922 -0.6893332   1.18681109]
 [-1.75277281  0.6587956  -1.74611008]]


In [38]:
np.dot(results.Y[:,[0,1]], results.Wt[[0,1]])

array([[-0.98310396, -1.77738749, -0.44903258],
       [ 0.90369053, -0.05473033,  0.82990699],
       [ 0.29037459,  1.08051646, -0.00461448],
       [ 0.05046729, -0.01802601,  0.05004494],
       [-1.14301362,  0.93894653, -1.2645507 ],
       [ 0.32966536, -1.21695631,  0.59845995],
       [ 1.02522209, -0.38832995,  1.0221117 ],
       [ 0.15346185,  1.46665014, -0.2236927 ],
       [ 1.12393484, -0.68876729,  1.18550953],
       [-1.75069897,  0.65808424, -1.74414266]])