Skip to content

Commit

Permalink
⚖️ Feature: Volume Weighted Average ⚖️ (#61)
Browse files Browse the repository at this point in the history
* resolved

* add mergers to planned data collection in readme

* add get_intraday_path

* comments

* add institutional sentiment to readme data checklista

* to compile

* limit polygon save timeframe to prevent retroactive weirdness

* add multiprocessing to speed up data updates

* delete local data files after each file update to preserve disk space

* constants tests done

* conflict

* conflict

* polygon test_intraday

* iex test_intraday

* test_save_intraday + repo rename

* readme

* git cleanup local branches

* package name change

* remove package upload in dev pipeline

* fix bugs and add volume weighted avg price to ohlc

* tests

* lint

* lint again

* pandas warning

* fix date polygon
  • Loading branch information
suchak1 committed Nov 8, 2020
1 parent 7b2df8f commit 9c338b7
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 47 deletions.
10 changes: 0 additions & 10 deletions .github/workflows/sandbox.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,3 @@ jobs:

- name: Upload repo to S3
run: python scripts/update_repo.py

- name: Publish package
env:
GITHUB: ${{ secrets.GITHUB }}
PYPI_TEST: ${{ secrets.PYPI_TEST }}
run: |
pip install twine python-dotenv setuptools wheel
python setup.py sdist bdist_wheel
python util/pypi.py
python -m twine upload --repository testpypi dist/*
2 changes: 1 addition & 1 deletion scripts/update_hist_ohlc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
iex = IEXCloud()
poly_stocks = Polygon()
poly_crypto = Polygon(os.environ['POLYGON'])
stock_symbols = iex.get_symbols()[140:]
stock_symbols = iex.get_symbols()[250:]
crypto_symbols = POLY_CRYPTO_SYMBOLS
# Double redundancy

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def get_readme():


setup(
name='hyperdrive-quant',
name='hyperdrive',
version=get_version(),
description='An algorithmic trading platform',
long_description=get_readme(),
Expand Down
1 change: 1 addition & 0 deletions src/Constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def get_env_bool(var_name):
LOW = 'Low'
CLOSE = 'Close'
VOL = 'Vol'
AVG = 'Avg'

# Sentiment
POS = 'Pos'
Expand Down
32 changes: 9 additions & 23 deletions src/DataSource.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
from time import sleep
import pandas as pd
from pytz import timezone
from operator import attrgetter
from datetime import datetime, timedelta
from datetime import datetime
from polygon import RESTClient
from dotenv import load_dotenv
from FileOps import FileReader, FileWriter
Expand Down Expand Up @@ -124,8 +123,8 @@ def save_splits(self, **kwargs):
def standardize_ohlc(self, symbol, df):
full_mapping = dict(
zip(
['date', 'open', 'high', 'low', 'close', 'volume'],
[C.TIME, C.OPEN, C.HIGH, C.LOW, C.CLOSE, C.VOL]
['date', 'open', 'high', 'low', 'close', 'volume', 'average'],
[C.TIME, C.OPEN, C.HIGH, C.LOW, C.CLOSE, C.VOL, C.AVG]
)
)
df = self.standardize(
Expand Down Expand Up @@ -413,25 +412,11 @@ def _get_splits(symbol, timeframe='max'):
return self.try_again(func=_get_splits, **kwargs)

def get_ohlc(self, **kwargs):
def _get_prev_ohlc(symbol):
today = datetime.now(timezone('US/Eastern'))
one_day = timedelta(days=1)
yesterday = today - one_day
formatted_date = yesterday.strftime('%Y-%m-%d')
response = self.client.stocks_equities_daily_open_close(
symbol, formatted_date, unadjusted=False)
raw = attrgetter('from_', 'open', 'high', 'low',
'close', 'volume')(response)
labels = ['date', 'open', 'high', 'low', 'close', 'volume']
data = dict(zip(labels, raw))
df = pd.DataFrame([data])
return self.standardize_ohlc(symbol, df)

def _get_ohlc(symbol, timeframe='max'):
if timeframe == '1d':
return _get_prev_ohlc(symbol)
end = datetime.now(timezone('US/Eastern'))
delta = self.reader.convert_delta(timeframe)
end = datetime.now(timezone('US/Eastern')) - \
self.reader.convert_delta('1d')
delta = self.reader.convert_delta(
timeframe) - self.reader.convert_delta('1d')
start = end - delta
formatted_start = start.strftime('%Y-%m-%d')
formatted_end = end.strftime('%Y-%m-%d')
Expand All @@ -440,7 +425,8 @@ def _get_ohlc(symbol, timeframe='max'):
from_=formatted_start, to=formatted_end, unadjusted=False
).results
columns = {'t': 'date', 'o': 'open', 'h': 'high',
'l': 'low', 'c': 'close', 'v': 'volume'}
'l': 'low', 'c': 'close', 'v': 'volume',
'vw': 'average'}
df = pd.DataFrame(response).rename(columns=columns)
df['date'] = df['date'].apply(
lambda x: datetime.fromtimestamp(int(x)/1000))
Expand Down
19 changes: 11 additions & 8 deletions src/FileOps.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import os
import json
import time
from datetime import date, datetime, timedelta
from pytz import timezone
from datetime import datetime, timedelta
import pandas as pd
from Storage import Store
# consider combining fileoperations into one class
Expand Down Expand Up @@ -83,16 +84,18 @@ def convert_delta(self, timeframe):

return delta

def data_in_timeframe(self, df, col, timeframe='max', tolerance='0d'):
def data_in_timeframe(self, df, col, timeframe='max'): # noqa , tolerance='0d'):
if col not in df:
return df
delta = self.convert_delta(timeframe)
tol = self.convert_delta(tolerance)
df[col] = pd.to_datetime(df[col])
filtered = df[df[col] > pd.to_datetime(date.today() - delta)]
if filtered.empty:
filtered = df[df[col] > pd.to_datetime(
date.today() - (delta + tol))]
# tol = self.convert_delta(tolerance)
tz = timezone('US/Eastern')
df[col] = pd.to_datetime(df[col]).dt.tz_localize(tz)
today = datetime.now(tz)
filtered = df[df[col] > pd.to_datetime(today - delta)].copy(deep=True)
# if filtered.empty:
# filtered = df[df[col] > pd.to_datetime(today - (delta + tol))]
filtered[col] = filtered[col].dt.tz_localize(None)
return filtered

# def convert_dates(self, timeframe):
Expand Down
13 changes: 11 additions & 2 deletions src/Storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def upload_dir(self, **kwargs):

def delete_objects(self, keys):
if keys:
objects = [{'Key': key} for key in keys]
objects = [{'Key': key.replace('\\', '/')} for key in keys]
bucket = self.get_bucket()
bucket.delete_objects(Delete={'Objects': objects})

Expand All @@ -44,6 +44,7 @@ def get_all_keys(self):
return keys

def key_exists(self, key, download=False):
key = key.replace('\\', '/')
try:
if download:
self.download_file(key)
Expand All @@ -60,13 +61,17 @@ def download_file(self, key):
self.finder.make_path(key)
with open(key, 'wb') as file:
bucket = self.get_bucket()
bucket.download_fileobj(key, file)
s3_key = key.replace('\\', '/')
bucket.download_fileobj(s3_key, file)
except ClientError as e:
print(f'{key} does not exist in S3.')
os.remove(key)
print(e)
raise e

def copy_object(self, src, dst):
src = src.replace('\\', '/')
dst = dst.replace('\\', '/')
bucket = self.get_bucket()
copy_source = {
'Bucket': self.bucket_name,
Expand All @@ -75,16 +80,20 @@ def copy_object(self, src, dst):
bucket.copy(copy_source, dst)

def rename_key(self, old_key, new_key):
old_key = old_key.replace('\\', '/')
new_key = new_key.replace('\\', '/')
self.copy_object(old_key, new_key)
self.delete_objects([old_key])

def last_modified(self, key):
key = key.replace('\\', '/')
bucket = self.get_bucket()
obj = bucket.Object(key)
then = obj.last_modified.replace(tzinfo=None)
return then

def modified_delta(self, key):
key = key.replace('\\', '/')
then = self.last_modified(key)
now = datetime.utcnow()
return now - then
4 changes: 2 additions & 2 deletions test/test_DataSource.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def test_save_ohlc(self):

# assert md.reader.check_file_exists(intra_path)
# assert md.reader.store.modified_delta(
# intra_path).total_seconds() < 60)
# intra_path).total_seconds() < 60
# df = md.reader.load_csv(intra_path)
# assert {C.TIME, C.OPEN, C.HIGH, C.LOW,
# C.CLOSE, C.VOL}.issubset(df.columns)
Expand Down Expand Up @@ -362,7 +362,7 @@ def test_get_splits(self):
def test_get_ohlc(self):
df = poly.get_ohlc(symbol='AAPL', timeframe='1m')
assert {C.TIME, C.OPEN, C.HIGH, C.LOW,
C.CLOSE, C.VOL}.issubset(df.columns)
C.CLOSE, C.VOL, C.AVG}.issubset(df.columns)
assert len(df) > 10

# def test_get_intraday(self):
Expand Down

0 comments on commit 9c338b7

Please sign in to comment.