In [1]:
import pandas as pd 
import os
from config import * 
import matplotlib.pyplot as plt 
import datetime
import numpy as np 
warnings.filterwarnings("ignore")

directory = os.fsencode('popularity_export')
dir_str = os.fsdecode(directory)

In [3]:


def merge_rh_vol_w_barset(security,calc_profit = True,trend_window = 4,vol_thresh = 0,path = 'popularity_export/popularity_export'):
    rh_vol = pd.read_csv('{}/{}.csv'.format(path,security))
    rh_vol.index = pd.to_datetime(rh_vol['timestamp']).dt.date
    rh_vol.drop_duplicates(inplace=True,keep = 'first')
    rh_vol.drop('timestamp',axis =1,inplace=True)
    bars = api.polygon.historic_agg_v2(security,1,'day',_from= rh_vol.index[0],to=datetime.date.today()).df
    latest = api.polygon.historic_agg_v2(security,1,'minute',_from= datetime.date.today() - datetime.timedelta(days=5),to=datetime.date.today()).df.iloc[-1]
    bars = bars.append(latest)
    bars.iloc[-1]['volume'] = np.NaN
    bars.index = bars.index.date
    merged = bars.join(rh_vol.groupby('timestamp').max()['users_holding'])
    merged['rh_vol_pct'] = merged['users_holding']*100/merged['volume']
    merged['rh_action'] = merged['users_holding'].copy()*0
    rh_trend_up = ((merged['users_holding'] > merged['users_holding'].shift(1)).rolling(trend_window).sum() == trend_window)
    price_trend_down = ((merged['open'] < merged['open'].shift(1)).rolling(trend_window).sum() == trend_window)
    is_vol_significant = merged['rh_vol_pct'].shift(1) > vol_thresh
    merged['rh_action'][rh_trend_up & price_trend_down & is_vol_significant] = 1
    if calc_profit:
        merged['profit'] = merged['users_holding'].copy()*0
        merged['profit'][merged['rh_action'] == 1] = (merged['close'].shift(-trend_window) - merged['open'])/merged['open'] 
    #print('{} traded {} times, total_profit_pct: {}'.format(security,len(merged[merged['rh_action'] == 1]),merged[merged['rh_action'] == 1]['profit'].sum()))
    return merged

def normalize_merged(schema,df):
    blank_df = pd.DataFrame(0, index=schema.index, columns=list(schema))
    blank_df.update(df)
    return blank_df

# asset = merge_rh_vol_w_barset('AMD')
# print(asset[asset['rh_action'] != 0])
# print(asset[asset['rh_action'] == 1]['profit'].sum())
# fig,ax = plt.subplots()
# ax.plot(asset['close'],color ='red')
# ax2 = ax.twinx()
# ax2.plot(asset['users_holding'])
# plt.show()


In [None]:


def robintrack_strat(limit=100):    
    asset_data = {}
    standard = merge_rh_vol_w_barset('AMZN') # standard barset schema
    returns = pd.DataFrame(1, index=standard.index, columns=['returns','num_assets_traded']) 
    returns['num_assets_traded'] -= 1 
    for filename in os.listdir(directory)[:limit]:
        security = os.fsdecode(filename).replace('.csv','')
        try:
            asset_returns = normalize_merged(standard,merge_rh_vol_w_barset(security))
        except: 
            print('{} data not found'.format(security))
            continue
        returns['returns'] += asset_returns['profit']
        returns['num_assets_traded'] += asset_returns['rh_action']
        if asset_returns['profit'].sum() > 0: 
            print('{} traded, net: {}, num trades: {}'.format(security,asset_returns['profit'].sum(),len(asset_returns[asset_returns['profit'] > 0])))
            
    returns['returns'][returns['num_assets_traded'] > 0] /= returns[returns['num_assets_traded'] > 0]['num_assets_traded']
    returns['cum_returns'] = returns['returns'].cumprod()
    return returns

res = robintrack_strat()
fig,ax = plt.subplots()
ax.plot(res['cum_returns'],color ='red')
ax2 = ax.twinx()
ax2.plot(res['returns'].cumsum())
print(res)
print('cumulative return: {}%, APR: {}%, max_drawdown (approx): {}%, total trades: {}'.format((res['cum_returns'].iloc[-1]-1)*100,(np.sqrt(res['cum_returns'].iloc[-1])-1)*100,(1- res['cum_returns'].min())*100,res['num_assets_traded'].sum()))
plt.show()


In [None]:
plt.plot(res['num_assets_traded'])
print(res['num_assets_traded'].sum())

In [6]:
for filename in os.listdir(os.fsencode('popularity_export/popularity_export')):
    security = os.fsdecode(filename).replace('.csv','')
    if security in alpaca_universe:
        try:
            asset_returns = merge_rh_vol_w_barset(security)
            if asset_returns['rh_action'].iloc[-1] == 1:
                print('{} recommendation '.format(security))
                print(asset_returns.iloc[-1])
        except: 
            #print('{} data not found'.format(security))
            pass
print('done')
        

done


In [None]:
asset = merge_rh_vol_w_barset('MDB')
print(asset)
print(asset[asset['rh_action'] == 1])

In [None]:
api.polygon.historic_agg_v2('PSM',1,'minute',_from= datetime.date.today() - datetime.timedelta(days=5),to=datetime.date.today()).df
print(api.polygon.last_quote('PSM'))

In [4]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import os
import shutil
import tarfile
from pathlib import Path


# function to take care of downloading file
def enable_download_headless(browser,download_dir):
    browser.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command')
    params = {'cmd':'Page.setDownloadBehavior', 'params': {'behavior': 'allow', 'downloadPath': download_dir}}
    browser.execute("send_command", params)


def dl_robintrack_data(chromedriverpath = "../chromedriver",dir_str = 'popularity_export'):
    directory = os.fsencode(dir_str)
    
    if not os.listdir(directory):
        os.rmdir(dir_str)
    else:   # emptying current data directory
        shutil.rmtree(dir_str)
    os.mkdir(dir_str)
    
    chrome_options = Options() # instantiate a chrome options object so you can set the size and headless preference

    chrome_options.headless = True
    #chrome_options.add_argument("--headless")
    # some of these chrome options might be uncessary but I just used a boilerplate
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_options.add_argument("--disable-notifications")
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--verbose')
    chrome_options.add_experimental_option("prefs", {
            "download.default_directory": dir_str,
            "download.prompt_for_download": False,
            "download.directory_upgrade": True,
            "safebrowsing_for_trusted_sources_enabled": False,
            "safebrowsing.enabled": False
    })
    chrome_options.add_argument('--disable-gpu')
    chrome_options.add_argument('--disable-software-rasterizer')
    # initialize driver object and change the <path_to_chrome_driver> depending on your directory where your chromedriver should be
    driver = webdriver.Chrome(options=chrome_options, executable_path= chromedriverpath)
    # change the <path_to_place_downloaded_file> to your directory where you would like to place the downloaded file
    download_dir = dir_str
    # function to handle setting up headless download
    enable_download_headless(driver, download_dir)
    # get request to target the site selenium is active on
    driver.get("http://robintrack.net/data-download")
    # initialize an object to the location on the html page and click on it to download
    search_input = driver.find_element_by_xpath('//*[@id="root"]/div/div[2]/div/a')
    search_input.click()
    while not os.listdir(directory):
        time.sleep(1)
    fname = os.fsdecode(os.listdir(directory)[0])
    while not fname.endswith('tar.gz'):
        time.sleep(1)
        fname = os.fsdecode(os.listdir(directory)[0])

    fname = dir_str + '/' + fname
    tar = tarfile.open(fname, "r:gz")
    tar.extractall(path = dir_str)
    tar.close()
    os.remove(fname)

    shutil.move(dir_str + '/tmp/popularity_export',dir_str)
    shutil.rmtree('popularity_export/tmp')
    print('done')
    return 1


In [5]:
dl_robintrack_data()

done


1

In [13]:
pd.read_csv('popularity_export/popularity_export/AAPL.csv')

Unnamed: 0,timestamp,users_holding
0,2018-05-02 04:53:46,150785
1,2018-05-02 06:38:58,150785
2,2018-05-03 00:35:25,145510
3,2018-05-03 06:33:53,145510
4,2018-05-03 06:48:56,145510
...,...,...
17777,2020-06-17 03:47:53,436450
17778,2020-06-17 04:47:24,436450
17779,2020-06-17 05:47:21,436450
17780,2020-06-17 06:47:42,436395
