In [None]:
from Robinhood.Robinhood.Robinhood import Robinhood

import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 500)

import numpy as np
import glob
import json

import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

from sklearn.metrics import r2_score, median_absolute_error, mean_absolute_error, median_absolute_error, mean_squared_error, mean_squared_log_error

from scipy.optimize import minimize
import statsmodels.tsa.api as smt
import statsmodels.api as sm

from datetime import datetime
from datetime import timedelta  

import time
import os

#from tqdm import tqdm_notebook

#from itertools import product

# Analysis and visualizations

In [None]:
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    

def exponential_smoothing(series, alpha):

    result = [series[0]] # first value is same as series
    for n in range(1, len(series)):
        result.append(alpha * series[n] + (1 - alpha) * result[n-1])
    return result


def double_exponential_smoothing(series, alpha, beta):

    result = [series[0]]
    for n in range(1, len(series)+1):
        if n == 1:
            level, trend = series[0], series[1] - series[0]
        if n >= len(series): # forecasting
            value = result[-1]
        else:
            value = series[n]
        last_level, level = level, alpha * value + (1 - alpha) * (level + trend)
        trend = beta * (level - last_level) + (1 - beta) * trend
        result.append(level + trend)
    return result


def plot_moving_average(series, window, align=False, plot_intervals=False, scale=1.96):

    rolling_mean = series.rolling(window=window).mean()
    r_mean = pd.Series([None]*len(rolling_mean))
    if align:
        r_mean.iloc[window:] = rolling_mean[window:]
    else:
        r_mean.iloc[int(window/2):-int(window/2)] = rolling_mean[window:]
    
    plt.figure(figsize=(17,8))
    plt.title('Moving average\n window size = {}'.format(window))
    plt.plot(r_mean, 'r', label='Rolling mean trend',linewidth=5)
    
    #Plot confidence intervals for smoothed values
    if plot_intervals:
        mae = mean_absolute_error(series[window:], rolling_mean[window:])
        deviation = np.std(series[window:] - rolling_mean[window:])
        lower_bound = rolling_mean - (mae + scale * deviation)
        upper_bound = rolling_mean + (mae + scale * deviation)
        plt.plot(upper_bound, 'r--', label='Upper bound / Lower bound')
        plt.plot(lower_bound, 'r--')
            
    plt.plot(series[:], label='Actual values')
    plt.legend(loc='best')
    plt.grid(True)
    
    
def plot_exponential_smoothing(series, alphas):
 
    plt.figure(figsize=(17, 8))
    for alpha in alphas:
        plt.plot(exponential_smoothing(series, alpha), label="Alpha {}".format(alpha))
    plt.plot(series.values, "c", label = "Actual")
    plt.legend(loc="best")
    plt.axis('tight')
    plt.title("Exponential Smoothing")
    plt.grid(True);
    
    
def plot_double_exponential_smoothing(series, alphas, betas):
     
    plt.figure(figsize=(17, 8))
    for alpha in alphas:
        for beta in betas:
            plt.plot(double_exponential_smoothing(series, alpha, beta), label="Alpha {}, beta {}".format(alpha, beta))
    plt.plot(series.values, label = "Actual")
    plt.legend(loc="best")
    plt.axis('tight')
    plt.title("Double Exponential Smoothing")
    plt.grid(True)
    
def tsplot(y, lags=None, figsize=(12, 7), syle='bmh'):
    
    if not isinstance(y, pd.Series):
        y = pd.Series(y)
        
    with plt.style.context(style='bmh'):
        fig = plt.figure(figsize=figsize)
        layout = (2,2)
        ts_ax = plt.subplot2grid(layout, (0,0), colspan=2)
        acf_ax = plt.subplot2grid(layout, (1,0))
        pacf_ax = plt.subplot2grid(layout, (1,1))
        
        y.plot(ax=ts_ax)
        p_value = sm.tsa.stattools.adfuller(y)[1]
        ts_ax.set_title('Time Series Analysis Plots\n Dickey-Fuller: p={0:.5f}'.format(p_value))
        smt.graphics.plot_acf(y, lags=lags, ax=acf_ax)
        smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax)
        plt.tight_layout()
        

# Data collection framework

In [None]:
INTERVALS = ['5minute','10minute','30minute','day','week']
DATE_RANGES = ['day','week','year','5year','all']

class Collector:

    def __init__(self,config_file='config.txt'):
        now = datetime.now()
        self.init_time = now
        self.client = None        
        print(f'{datetime.now().strftime("%Y-%m-%d %H:%M:%S")} | Collector initiated')
            
    
    def login(self,path='config.txt'):
        client = Robinhood()
        with open(path,'r') as f:
            creds = json.load(f)['creds']
        try:
            client.login(username=creds['username'],
                         password=creds['password'],
                         qr_code=creds['qr_code'])
            self.client = client
            print(f'{datetime.now().strftime("%Y-%m-%d %H:%M:%S")} | Successful login to Robinhood')
        except:
            print(f'{datetime.now().strftime("%Y-%m-%d %H:%M:%S")} | Login to Robinhood API failed!')


    def initiate_day(self,day=datetime.now()):
        trade_start = datetime(day.year,day.month,day.day,6,29)
        trade_end = datetime(day.year,day.month,day.day,13,1)
        now = datetime.now()
        print(f'{now.strftime("%Y-%m-%d %H:%M:%S")} | Trade starts at {trade_start.strftime("%Y-%m-%d %H:%M:%S")}')
        
        if (now > trade_end):
            trade_start,trade_end = self.adjust_times(trade_start,trade_end)
            self.go_to_sleep(trade_start)

        elif (now < trade_start):
            self.go_to_sleep(trade_start)

        return trade_start,trade_end
    
    
    def adjust_times(self,trade_start,trade_end):
        trade_start = trade_start + timedelta(days=1)
        trade_end = trade_end + timedelta(days=1)
        print(f'{datetime.now().strftime("%Y-%m-%d %H:%M:%S")} | Adjusting trade start to {trade_start.strftime("%Y-%m-%d %H:%M:%S")}')
        return trade_start,trade_end

    
    def go_to_sleep(self,wake_time):
        sleep = (wake_time - datetime.now()).seconds
        print(f'{datetime.now().strftime("%Y-%m-%d %H:%M:%S")} | Going to sleep for {sleep//3600} hours and {(sleep%3600)/60} minutes')
        time.sleep(sleep)
        print(f'{datetime.now().strftime("%Y-%m-%d %H:%M:%S")} | Good Morning!')    

        
    def is_business_day(self,date):
        return bool(len(pd.bdate_range(date, date)))

    
    def get_and_append_quote(self,quotes,ticker='SPY'):
        stock_quote = self.client.quote_data(ticker)
        quotes.append(stock_quote)
        return quotes

    
    def write_quotes(self,quotes,path):
        pd.read_csv(path).append(quotes,ignore_index=True).to_csv(path,index=False)
        print(f'{datetime.now().strftime("%Y-%m-%d %H:%M:%S")} | Wrote {len(quotes)} quotes to file "{path}""')
        return []

    
    def write_clean_history(self,path,ticker='SPY',interval='5minute',date_range='week'):
        quotes = self.client.get_historical_quotes(ticker,interval,date_range)
        try:
            df = pd.DataFrame(quotes['results'][0]['historicals'])
            df['begins_at'] = pd.to_datetime(df['begins_at'])
            df[['open_price','close_price','high_price','low_price']] = df[['open_price','close_price','high_price','low_price']].astype(float)
            df.to_csv(path,index=False)
            print(f'{datetime.now().strftime("%Y-%m-%d %H:%M:%S")} | Wrote {df.shape[0]} quotes to file "{path}""')
        except:
            print(f'{datetime.now().strftime("%Y-%m-%d %H:%M:%S")} | Failed to write "{path}"')


    def get_paths(self,date,config='config.txt',interval='5min',date_range='1week'):
        week = (date - timedelta(days=date.weekday())).strftime("%Y%m%d")
        with open(config,'r') as f:
            paths = json.load(f)['paths']
        rt_path = os.path.join(paths['rt_path'], f'real_time_{week}.csv')
        historical_path = os.path.join(paths['historical_path'], 
                                       f'historical_{interval}_{date_range}_{week}.csv')
        template = os.path.join(paths['template_path'])

        return rt_path, historical_path, template
        
        
    def collect_quotes(self,day=datetime.now(),path='cache.csv'):
        self.login()
        trade_start,trade_end = self.initiate_day(day)
        quotes = []
        _,_,template_path = self.get_paths(trade_start)
        pd.read_csv(template_path).to_csv(path,index=False)
        
        while True:    
            while not self.is_business_day(trade_start):
                trade_start,trade_end = self.adjust_times(trade_start,trade_end)
                self.go_to_sleep(trade_start)

            now = datetime.now()
            quotes = self.get_and_append_quote(quotes)

            if now.minute%60 == 0:
                quotes = self.write_quotes(quotes,path)

            time.sleep(61)

            if now > trade_end:
                quotes = self.write_quotes(quotes,path)
                
                if trade_end.day == 4:
                    rt_path,historical_path,_ = self.get_paths(trade_end)
                    pd.read_csv(path).to_csv(rt_path,index=False)
                    self.write_clean_history(historical_path)
                    pd.DataFrame(columns = pd.read_csv(template_path).columns).to_csv(path,index=False)
                
                trade_start,trade_end = self.adjust_times(trade_start,trade_end)
                self.go_to_sleep(trade_start)


In [None]:
# Write week to file
collector = Collector()
collector.login()
rt_path,historical_path,_ = collector.get_paths(datetime.now())
collector.write_clean_history(historical_path)