In [95]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
%matplotlib inline
plt.style.use('ggplot')

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

In [96]:
def gauss_gp(y):
    """
    y: np n x 1 array of input data
    """
    x = np.atleast_2d(np.linspace(0, y.shape[0] - 1, y.shape[0])).T
    
    # squared gaussian kernel
    kernel = RBF(1) * C(1.0, (1e-3, 1e3))

    # initialize gaussian process
    gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)
  
    # point to predict at
    gp.fit(x, y)
    point = (x[-1] + 1).reshape(1, -1)
    
    # run gp
    pred, sigma = gp.predict(point, return_std=True)
    
    return_frac = pred / y[-1] - 1
    
    # return sharpe
    return return_frac, sigma

In [97]:
def cor_return_sigma(X, sharpe_func):
    """
    X: n x k numpy array of stocks
    sharpe_func: function that takes an n x 1 array of input data and 
                 returns a tuple of return rate and std. deviation of 
                 that return rate
    """
    
    cor = np.corrcoef(X)
    
    return_sigma = [sharpe_func(X[i]) for i in range(X.shape[0])]
    
    return cor, np.array(return_sigma)
    

In [139]:
def read_stocks(dir_name):
    """
    dir_name: string directory name
    
    returns: stocks as rows all concatenated to min. length
    
    Assumes stocks are csv's
    """
    stock_files = glob.glob(dir_name + '/*.csv')
    
    X = []
    min_len = 100000
    
    for stock in stock_files:
        x = pd.read_csv(stock)['Adj Close'].as_matrix().astype(np.float64)
        if x.shape[0] < min_len:
            min_len = x.shape[0]
        X.append(x[-min_len:])
    
    return np.array(X)

In [143]:
X = read_stocks('/home/nate/projects/stocks')
cor_return_sigma(X, gauss_gp)

(array([[ 1.        ,  0.01874344,  0.02134416,  0.737496  ,  0.35623885,
          0.23775294, -0.15721298, -0.10684355],
        [ 0.01874344,  1.        , -0.12549223, -0.13182161,  0.00289092,
         -0.58723827,  0.02031588,  0.7454691 ],
        [ 0.02134416, -0.12549223,  1.        ,  0.03513374,  0.05957011,
          0.19796489,  0.02510202, -0.06450011],
        [ 0.737496  , -0.13182161,  0.03513374,  1.        ,  0.73089603,
          0.60138208,  0.20012134, -0.13029708],
        [ 0.35623885,  0.00289092,  0.05957011,  0.73089603,  1.        ,
          0.66277987,  0.48059125,  0.07928716],
        [ 0.23775294, -0.58723827,  0.19796489,  0.60138208,  0.66277987,
          1.        ,  0.2615273 , -0.52480686],
        [-0.15721298,  0.02031588,  0.02510202,  0.20012134,  0.48059125,
          0.2615273 ,  1.        ,  0.12829842],
        [-0.10684355,  0.7454691 , -0.06450011, -0.13029708,  0.07928716,
         -0.52480686,  0.12829842,  1.        ]]), array([[[ -1.4