In [32]:
from sklearn.neural_network import MLPRegressor
import numpy as np
import matplotlib.pyplot as plt
import pandas
import requests
from pandas.stats.moments import rolling_mean
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn import preprocessing


def loadData(fileName, splitRatio=.7, beta=0):
    """
    Read data, process it and split into train and test set
    """
    
    # urlInput = 'H:/test/MLChallenge/HDFC_price_vol.csv'
    urlInput = fileName
    
    datasetIn = pandas.read_csv(urlInput).dropna()
    datasetIn = preprocess(datasetIn).dropna()
    data_obv = calculateOBV(datasetIn)
    obv = pandas.Series(data_obv, name="OBV")
    df1 = pandas.concat([datasetIn,obv], axis=1)
    df1["betas"] = beta
    #betas = pandas.Series(betas,name = "beta")
    #df1 = pandas.concat([df1,betas], axis=1)
    #df1 = df1.add(3,"beta")
    datanp = np.array(df1.dropna())
    split = round(splitRatio * df1.shape[0])
    #print df1.dropna()
    
    train = datanp[:int(split), :]
    test = datanp[int(split)+1:,:]
    
    X_tr = train[:-1, :]
    Y_tr = train[1:, 0]
    X_te = test[:-1, :]
    Y_te = test[1:, 0]
    
    return X_tr, Y_tr, X_te, Y_te
    

def preprocess(datasetIn):
    """
    calculates 12 day and 26 day moving averages and MACD using them.
    """
    
    inSet = datasetIn["Adj Close"]
    
    rm12 = rolling_mean(inSet,12).round(2)
    rm26 = rolling_mean(inSet,26).round(2)
    
    # MACD calculation using ma12 and ma26
    
    macd = rm12 - rm26
    macdDf = pandas.DataFrame(macd)
    mac = pandas.Series(macd,name="MACD")
    df1 = pandas.concat([datasetIn,mac], axis=1)
    return df1
    
    
def calculateOBV(datasetIn):
     
    d = datasetIn
    close_data = np.array(d["Adj Close"])
    #print close_data
    volume = np.array(d["Volume"])
    obv = preprocessing.scale(volume)
    #print len(volume)

    obv = np.zeros(len(volume))
    obv[0] = 1
    #print len(obv)

    for idx in range(1, len(obv)):
        if close_data[idx] > close_data[idx-1]:
            obv[idx] = obv[idx-1] + volume[idx]
        elif close_data[idx] < close_data[idx-1]:
            obv[idx] = obv[idx-1] - volume[idx]
        elif close_data[idx] == close_data[idx-1]:
                obv[idx] = obv[idx-1]
    
    return obv
    #TODO: might need to scale OBVs since they are large numbers            
    #scaled_obv = preprocessing.scale(obv)
    #print obv

#TODO: why don't you run with main()!!?
#def main():
    #print "Un Poco Loco" 

#def mergeData():
#stocks = ['hdfc', 'tcs', 'sunpharma']
stocks = ['hdfc', 'sunpharma']

input_tr = []
input_te = []
output_tr = []
output_te = []

#TODO: add betas as different columns - beta1, beta2, beta3
for stock in stocks:
    filePath = "C:\Users\hcl\Downloads\%s.csv" %stock
    print "Loading data for...%s" %stock
    X_tr, Y_tr, X_te, Y_te = loadData(filePath)
    input_tr.append(pandas.DataFrame(X_tr))
    input_te.append(pandas.DataFrame(X_te))
    output_tr.append(pandas.DataFrame(Y_tr))
    output_te.append(pandas.DataFrame(Y_te))

X_train = pandas.concat(input_tr)
X_test = pandas.concat(input_te)
Y_train = pandas.concat(output_tr)
Y_test = pandas.concat(output_te)

print len(X_test),len(Y_test)
print "es perfecto!!"
print X_test
        
    #X_tr, Y_tr, X_te, Y_te = loadData('C:\Users\hcl\Downloads\hdfc.csv', beta=1.3)
    #X_tr1, Y_tr1, X_te1, Y_te1 = loadData('C:\Users\hcl\Downloads\tcs.csv', beta=2)
    #X_tr2, Y_tr2, X_te2, Y_te2 = loadData('C:\Users\hcl\Downloads\sunpharma.csv', beta=1.8)

    #append training/test input/outputs

#print X_tr    

    
'''
print len(X_tr)
print len(Y_tr)
print len(X_te)
print len(Y_te)
'''



Loading data for...hdfc
Loading data for...sunpharma
74 74
es perfecto!
              0           1      2            3    4
0   1840.400024   2239100.0   9.07  185345126.0  0.0
1   1846.400024    799558.0   8.76  186956472.0  0.0
2   1819.900024    602647.0   6.47  186230098.0  0.0
3   1821.349976    726865.0   3.54  190392483.0  0.0
4   1838.050049    920773.0   0.81  193256480.0  0.0
5   1873.449951   3112531.0  -0.94  195468922.0  0.0
6   1878.949951   3074497.0  -1.03  194206973.0  0.0
7   1884.500000   1238028.0  -0.73  195404615.0  0.0
8   1868.150024   2335876.0  -0.42  197585532.0  0.0
9   1868.300049   1713636.0   0.95  199644505.0  0.0
10  1876.650024   1117726.0   4.41  198045115.0  0.0
11  1868.349976   1696424.0   7.66  199780952.0  0.0
12  1856.750000    872346.0   8.30  198150597.0  0.0
13  1878.050049   2175130.0   9.72  196314066.0  0.0
14  1872.400024   1586468.0  13.43  194332370.0  0.0
15  1854.500000   1645129.0  15.75  191644925.0  0.0
16  1872.349976   1194079.0

	Series.rolling(window=12,center=False).mean()
	Series.rolling(window=26,center=False).mean()


'\nprint len(X_tr)\nprint len(Y_tr)\nprint len(X_te)\nprint len(Y_te)\n'