In [87]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression, LassoCV

In [126]:
class TeamA_Predictor():
    # stocks is a list of stocks to look up
    # investment is starting dollar amount
    def __init__(self):
        self.training_tickers = ["TMO","AMZN","AAPL","GM","IBM","TWTR","CBS","TM","BIO"]
        self.train_data = pd.concat([self.get_data(stock) for stock in self.training_tickers])
        self.X = self.train_data[['x_0','x_1','x_2']]
        self.y = self.train_data['y']
        self.model = LassoCV(alphas= [0.01, 0.1, 1, 10, 100]).fit(self.X, self.y)
        
    def get_data(self, stock):
        url = (
            'http://www.google.com/finance/historical?output=csv&startdate=2000-01-01&enddate=2017-01-01&q={}'.format(stock)
        )
        df = pd.read_csv(url, encoding = 'utf8')
        df = df.drop(['Open', 'High', 'Low', 'Volume'], axis = 1)
        df.set_index(u'﻿Date', inplace = True)
        df.index = df.index.to_datetime()
        
        offset_1, offset_2, offset_3 = 252, (252*2), (252*3)
        df_length = len(df) - offset_3
        x_3 = df[:df_length] # target variable
        x_2 = df[offset_1:df_length + offset_1] # one year offset
        x_1 = df[offset_2:df_length + offset_2] # two year offset
        x_0 = df[offset_3: df_length + offset_3]
        
        x_3 = x_3.reset_index().drop('index', axis=1)
        x_2 = x_2.reset_index().drop('index', axis=1)
        x_1 = x_1.reset_index().drop('index', axis=1)
        x_0 = x_0.reset_index().drop('index', axis=1)
        
        frames = [x_0, x_1, x_2, x_3]
        
        final_df = pd.concat(frames, axis=1)
        final_df.columns = ['x_0', 'x_1', 'x_2', 'y']
        return final_df

    
    def make_predictions(self, stocks):
        test_data = pd.concat([self.get_data(stock) for stock in stocks])
        test_X = self.train_data[['x_0','x_1','x_2']]
        test_y = self.train_data['y']
        pred_y = self.model.predict(test_X)
        return test_X, pred_y
    
    #def predict_2017_1_1_price(ticker):
        #test_data = pd.concat([self.get_data(stock) for stock in stocks])
        #test_X = df[['x_0', 'x_1', 'x_2', 'x_3']]
        #test_y = df['y']
      # lm_LassoCV = linear_model.LassoCV(alphas=[0.01,0.1, 1, 10,100])
      # model_LassoCV = lm_LassoCV.fit(X, y)
      # predictions_LassoCV = model_LassoCV.predict(X)
      # #plt.scatter(y, predictions_LassoCV)
    
    def predict2014_to_2017(self, stocks, investment):
        predicted_2017_return = 0
        test_X, pred_y = self.make_predictions(stocks)
        #for ticker in stocks:
        for y in pred_y:
            predicted_2017_return += (
                y/len(stocks)*investment
            )
            #predicted_2017_return += (
                #pred_y/ float(test_X['x_0'])/len(stocks)*investment
            #)
            
        return predicted_2017_return
            
        
   

In [127]:
pr = TeamA_Predictor()

In [128]:
predictions = pr.predict2014_to_2017(["VIAB","GOOG","F","MSFT"], 10000)

In [130]:
predictions

5592192499.9999638