In [15]:
import time
import math
import warnings
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import talib as ta
from model import Company, StockLog, IndexLog, PerformanceLog
from model import start_engine
from datetime import date, datetime
from sqlalchemy import desc

%matplotlib inline
warnings.filterwarnings("ignore")

In [2]:
engine = start_engine()

In [6]:
# get all companies
# engine = start_engine()
# start = datetime.strptime('2008-07-01', '%Y-%m-%d').date()
# start = np.datetime64(start)
# companies = Company().query().all()

In [101]:
def update_all_stocks(engine=engine):
    CSV = 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED'\
          '&outputsize=full'\
          '&datatype=csv'\
          '&apikey=OW4NZBLAQU5EBFEV'\
          '&symbol='
   
    if not engine:
        engine = start_engine()
    companies = Company().query().all()
    # companies = [Company().query().get('ABC'), Company().query().get('CBA')]

    final1 = pd.DataFrame()
    final2 = pd.DataFrame()
    for company in companies:
        print('Updating', company.code)
        sls = StockLog().query().filter(StockLog.company == company).order_by(desc(StockLog.date)).limit(200)
        sl = pd.read_sql(sls.statement, engine, parse_dates=['date'])
        sl.sort_values(by='date', inplace=True)
        sl.set_index('date', inplace=True)
        cut_off = sl.index[-1]
        df = pd.read_csv(CSV + company.code + '.AX', usecols=[0, 1, 2, 3, 4, 5, 6], parse_dates=['timestamp'])
        df.rename(columns={'timestamp': 'date', 'adjusted_close': 'adjusted', 'close': 'closing', 'open' : 'opening'}, inplace=True)
        df.sort_values(by='date', inplace=True)
        df.set_index('date', inplace=True)
        df.drop(df[df.index <= cut_off].index, inplace=True)
        sl = sl.append(df)
        sl['code'] = company.code
        # calculate techincal indicators
        sl['change'] = sl['closing'].diff(1)
        sl['change_pct'] = sl['closing'].pct_change(1)
        for period in [15, 50, 200]:
            sl['sma%d' % period] = ta.SMA(sl['closing'], timeperiod=period)
        for period in [15, 50, 200]:
            sl['ema%d' % period] = ta.EMA(sl['closing'], timeperiod=period)  
        sl['macd'], sl['macd_sig'], sl['macd_hist']  = ta.MACD(sl['closing'], fastperiod=12, slowperiod=26, signalperiod=9)
        sl['bb_hi'], sl['bb_mid'], sl['bb_lo'] = ta.BBANDS(sl['closing'], timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)
        sl['slowk'], sl['slowd'] = ta.STOCH(sl['high'], sl['low'], sl['closing'], fastk_period=14, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
        sl['rsi'] = ta.RSI(sl['closing'], timeperiod=14)
        sl['adx'] = ta.ADX(sl['high'], sl['low'], sl['closing'], timeperiod=14)
        sl['cci'] = ta.CCI(sl['high'], sl['low'], sl['closing'], timeperiod=14)
        sl['aroon_dn'], sl['aroon_up'] = ta.AROON(sl['high'], sl['low'], timeperiod=25)
        sl['chaikin'] = ta.AD(sl['high'], sl['low'], sl['closing'], sl['volume'])
        sl['obv'] = ta.OBV(sl['closing'], sl['volume'])
        sl['mom'] = ta.MOM(sl['closing'], timeperiod=10)
        # read annual reports
        perf_log = PerformanceLog().query().filter(PerformanceLog.company == company)
        fi = pd.read_sql(perf_log.statement, engine)
        fi.set_index('date', inplace=True)
        # Price to EPS Ratio
        eps = fi['eps']
        sl = sl.merge(eps, how='outer', left_index=True, right_index=True)
        sl['eps'].interpolate(method='pad', inplace=True)
        sl['pe_ratio'] = sl.apply(lambda x: x['closing'] / x['eps'], axis=1)
        sl.drop(['eps'], axis=1, inplace=True)
        # Dividend to Price Ratio (Dividend Yield)
        dividend = fi['net_dividend']
        sl = sl.merge(dividend, how='outer', left_index=True, right_index=True)
        sl['net_dividend'].interpolate(method='pad', inplace=True)
        sl['dp_ratio'] = sl.apply(lambda x: x['net_dividend'] / x['closing'], axis=1)
        sl.drop(['net_dividend'], axis=1, inplace=True)    
        # Price to Book Ratio
        bvps = fi['bv_ps']
        sl = sl.merge(bvps, how='outer', left_index=True, right_index=True)
        sl['bv_ps'].interpolate(method='pad', inplace=True)
        sl['pb_ratio'] = sl.apply(lambda x: x['closing'] / x['bv_ps'], axis=1)
        sl.drop(['bv_ps'], axis=1, inplace=True)    
        # clean up
        sl.dropna(subset=['opening'], inplace=True)
        # drop all old records
        sl.drop(sl[sl.index <= cut_off].index, inplace=True)
        sl.reset_index(inplace=True)
        final1 = final1.append(sl)
        time.sleep(15)
    
    for date in sorted(set(final1['date'])):
        tmp = final1[final1['date'] == date]
        tmp['rank'] = tmp['change_pct'].rank(ascending=False)
        final2 = final2.append(tmp)
    
    print('Writing databse')
    for idx, row in final2.iterrows():
        stock_log = StockLog()
        stock_log.date = row['date']
        stock_log.adjusted = row['adjusted']
        stock_log.adx = row['adx']        
        stock_log.aroon_dn = row['aroon_dn']
        stock_log.aroon_up = row['aroon_up']
        stock_log.bb_hi = row['bb_hi'] 
        stock_log.bb_lo = row['bb_lo']
        stock_log.bb_mid = row['bb_mid']
        stock_log.cci = row['cci'] 
        stock_log.chaikin = row['chaikin']
        stock_log.change = row['change']
        stock_log.change_pct = row['change_pct'] 
        stock_log.closing = row['closing']
        stock_log.code = row['code']
        stock_log.dp_ratio = row['dp_ratio'] 
        stock_log.ema15 = row['ema15']
        stock_log.ema200 = row['ema200']
        stock_log.ema50 = row['ema50'] 
        stock_log.high = row['high']
        stock_log.low = row['low']
        stock_log.macd = row['macd'] 
        stock_log.macd_hist = row['macd_hist']
        stock_log.macd_sig = row['macd_sig']
        stock_log.mom = row['mom']
        stock_log.obv = row['obv']
        stock_log.opening = row['opening']
        stock_log.pb_ratio = row['pb_ratio']
        stock_log.pe_ratio = row['pe_ratio']
        stock_log.prediction = row['prediction']
        stock_log.rank = row['rank'] 
        stock_log.rsi = row['rsi']
        stock_log.slowd = row['slowd']
        stock_log.slowk = row['slowk'] 
        stock_log.sma15 = row['sma15']
        stock_log.sma200 = row['sma200']
        stock_log.sma50 = row['sma50']         
        stock_log.volume = row['volume']
        stock_log.save()

In [None]:
update_all_stocks()

Updating ALQ
Updating SYD
Updating ABC
Updating WOW
Updating AGL
Updating OZL
Updating AWC
Updating ALU
Updating NAB
Updating AMC
Updating AMP
Updating ANN
Updating IAG
Updating ANZ
Updating ALL
Updating ASX
Updating BSL
Updating BOQ
Updating BHP
Updating BLD
Updating BXB
Updating CTX
Updating CGF
Updating CIM
Updating SHL
Updating DMP
Updating CWY
Updating CCL
Updating COH
Updating ILU
Updating CPU
Updating CWN
Updating CSL
Updating DXS
Updating DOW
Updating FLT
Updating GMG
Updating EVN
Updating FMG
Updating GPT
Updating IPL
Updating JHG
Updating JHX
Updating JBH
Updating MQG
Updating MFG
Updating MGR
Updating NCM
Updating NST
Updating OSH
Updating AST
Updating ORI
Updating ORG
Updating PDL
Updating QAN
Updating CBA
Updating QUB
Updating RHC
Updating RIO


In [82]:
def update_last_ticks(companies='all', engine=engine):
    CSV = 'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY'\
          '&interval=1min'\
          '&outputsize=full'\
          '&datatype=csv'\
          '&apikey=B2S8XKGQNA9PKVS0'\
          '&symbol='
    
    if not engine:
        engine = start_engine()
    
    if companies == 'all':
        companies = Company().query().all()
    else:
        company_list = []
        for code in companies:
            company_list.append(Company().query().get(code))
        companies = company_list

    for company in companies:
        stock = company.code
        print('Updating', stock)
        df = pd.read_csv(CSV + stock + '.AX', parse_dates=['timestamp'])
        df['timestamp'] = df['timestamp'].dt.tz_localize('Australia/Sydney')
        df['timestamp'] = df['timestamp'].dt.tz_convert(None)
        df['timestamp'] = df['timestamp'] + pd.Timedelta(days=1)
        df = df.loc[df['timestamp'].idxmax()]
        company.last_dt = df['timestamp'].to_pydatetime()
        company.last_tick = df['close']
        company.save()
        time.sleep(15)

In [10]:
update_last_ticks(companies='all')

Updating ALU
Updating AWC
Updating AMC
Updating AMP
Updating ANN
Updating ANZ
Updating APA
Updating ALL
Updating ASX
Updating AST
Updating BOQ
Updating BEN
Updating BHP
Updating BSL
Updating BLD
Updating BXB
Updating CTX
Updating CGF
Updating CHC
Updating CIM
Updating CWY
Updating CCL
Updating COH
Updating CBA
Updating CPU
Updating CWN
Updating CSL
Updating DXS
Updating DMP
Updating DOW
Updating EVN
Updating FLT
Updating FMG
Updating GMG
Updating GPT
Updating ILU
Updating IPL
Updating IAG
Updating JHX
Updating JHG
Updating JBH
Updating MQG
Updating MFG
Updating MGR
Updating NAB
Updating NCM
Updating NST
Updating OSH
Updating ORI
Updating ORG
Updating OZL
Updating PDL
Updating QAN
Updating QBE
Updating QUB
Updating RHC
Updating REA
Updating RMD
Updating RIO
Updating STO
Updating SEK
Updating SHL
Updating SOL
Updating SGP
Updating SYD
Updating TAH
Updating TLS
Updating TPM
Updating TCL
Updating WES
Updating WBC
Updating WHC
Updating WPL
Updating WOW
Updating WOR
Updating ABC
Updating AGL

In [17]:
table = pd.DataFrame()
for company in companies:
    print('Processing', company.code)
    # get stock data
    CSV = 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED'\
          '&outputsize=full'\
          '&datatype=csv'\
          '&apikey=OW4NZBLAQU5EBFEV'\
          '&symbol=%s' % company.code
    df = pd.read_csv(CSV + '.AX', usecols=[0, 1, 2, 3, 4, 5, 6], parse_dates=['timestamp'])
    df.rename(columns={'timestamp': 'date', 'adjusted_close': 'adj_close', 'close': 'closing', 'open' : 'opening'}, inplace=True)
    df.sort_values(by='date', inplace=True)
    df.set_index('date', inplace=True)
    df.drop(df[df.index < start].index, inplace=True)
    df.drop(df[df.closing == 0].index, inplace=True)
    df['code'] = company.code
    # calculate techincal indicators
    df['change'] = df['closing'].diff(1)
    df['change_pct'] = df['closing'].pct_change(1)
    for period in [15, 50, 200]:
        df['sma%d' % period] = ta.SMA(df['closing'], timeperiod=period)
    for period in [15, 50, 200]:
        df['ema%d' % period] = ta.EMA(df['closing'], timeperiod=period)  
    df['macd'], df['macd_sig'], df['macd_hist']  = ta.MACD(df['closing'], fastperiod=12, slowperiod=26, signalperiod=9)
    df['bb_hi'], df['bb_mid'], df['bb_lo'] = ta.BBANDS(df['closing'], timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)
    df['slowk'], df['slowd'] = ta.STOCH(df['high'], df['low'], df['closing'], fastk_period=14, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
    df['rsi'] = ta.RSI(df['closing'], timeperiod=14)
    df['adx'] = ta.ADX(df['high'], df['low'], df['closing'], timeperiod=14)
    df['cci'] = ta.CCI(df['high'], df['low'], df['closing'], timeperiod=14)
    df['aroon_dn'], df['aroon_up'] = ta.AROON(df['high'], df['low'], timeperiod=25)
    df['chaikin'] = ta.AD(df['high'], df['low'], df['closing'], df['volume'])
    df['obv'] = ta.OBV(df['closing'], df['volume'])
    df['mom'] = ta.MOM(df['closing'], timeperiod=10)
    # read annual reports
    perf_log = PerformanceLog().query().filter(PerformanceLog.company==company)
    fi = pd.read_sql(perf_log.statement, engine)
    fi.set_index('date', inplace=True)
    # Price to EPS Ratio
    eps = fi['eps']
    df = df.merge(eps, how='outer', left_index=True, right_index=True)
    df['eps'].interpolate(method='pad', inplace=True)
    df['pe_ratio'] = df.apply(lambda x: x['closing'] / x['eps'], axis=1)
    df.drop(['eps'], axis=1, inplace=True)
    # Dividend to Price Ratio (Dividend Yield)
    dividend = fi['net_dividend']
    df = df.merge(dividend, how='outer', left_index=True, right_index=True)
    df['net_dividend'].interpolate(method='pad', inplace=True)
    df['dp_ratio'] = df.apply(lambda x: x['net_dividend'] / x['closing'], axis=1)
    df.drop(['net_dividend'], axis=1, inplace=True)    
    # Price to Book Ratio
    bvps = fi['bv_ps']
    df = df.merge(bvps, how='outer', left_index=True, right_index=True)
    df['bv_ps'].interpolate(method='pad', inplace=True)
    df['pb_ratio'] = df.apply(lambda x: x['closing'] / x['bv_ps'], axis=1)
    df.drop(['bv_ps'], axis=1, inplace=True)    
    # clean up
    df.dropna(subset=['opening'], inplace=True)
    table = table.append(df)
    time.sleep(12)
    # write to DB
    # df.set_index('code', append=True, inplace=True) 
    # df.to_sql('test_table', engine, if_exists='append')

Processing WOW
Processing ABC
Processing SYD
Processing IAG
Processing AGL
Processing AWC
Processing ALU
Processing NAB
Processing AMC
Processing AMP
Processing ANN
Processing ANZ
Processing APA
Processing ALL
Processing ASX
Processing AST
Processing BOQ
Processing BEN
Processing BHP
Processing BSL
Processing BLD
Processing BXB
Processing CTX
Processing CGF
Processing CHC
Processing CIM
Processing CWY
Processing CCL
Processing COH
Processing CBA
Processing CPU
Processing CWN
Processing CSL
Processing DXS
Processing DMP
Processing DOW
Processing EVN
Processing FLT
Processing FMG
Processing GMG
Processing GPT
Processing ILU
Processing IPL
Processing JHX
Processing JHG
Processing JBH
Processing MQG
Processing MFG
Processing MGR
Processing NCM
Processing NST
Processing OSH
Processing ORI
Processing ORG
Processing OZL
Processing PDL
Processing QAN
Processing QUB
Processing RHC
Processing REA
Processing RMD
Processing RIO
Processing STO
Processing SEK
Processing SHL
Processing SOL
Processing

In [19]:
table['code'] = table['code'].astype(str)
table['volume'] = table['volume'].astype(np.int64)
table['prediction'] = np.NaN
table['adjusted'] = table['adj_close']

In [21]:
table.to_csv('all_stock_logs.csv')

In [22]:
final_table = pd.DataFrame()
for idx in sorted(set(table.index)):
    print(idx)
    tmp = table[table.index == idx]
    tmp['rank'] = tmp['change_pct'].rank(ascending=False)
    final_table = final_table.append(tmp)

2008-07-01 00:00:00
2008-07-02 00:00:00
2008-07-03 00:00:00
2008-07-04 00:00:00
2008-07-07 00:00:00
2008-07-08 00:00:00
2008-07-09 00:00:00
2008-07-10 00:00:00
2008-07-11 00:00:00
2008-07-14 00:00:00
2008-07-15 00:00:00
2008-07-16 00:00:00
2008-07-17 00:00:00
2008-07-18 00:00:00
2008-07-21 00:00:00
2008-07-22 00:00:00
2008-07-23 00:00:00
2008-07-24 00:00:00
2008-07-25 00:00:00
2008-07-28 00:00:00
2008-07-29 00:00:00
2008-07-30 00:00:00
2008-07-31 00:00:00
2008-08-01 00:00:00
2008-08-04 00:00:00
2008-08-05 00:00:00
2008-08-06 00:00:00
2008-08-07 00:00:00
2008-08-08 00:00:00
2008-08-11 00:00:00
2008-08-12 00:00:00
2008-08-13 00:00:00
2008-08-14 00:00:00
2008-08-15 00:00:00
2008-08-18 00:00:00
2008-08-19 00:00:00
2008-08-20 00:00:00
2008-08-21 00:00:00
2008-08-22 00:00:00
2008-08-25 00:00:00
2008-08-26 00:00:00
2008-08-27 00:00:00
2008-08-28 00:00:00
2008-08-29 00:00:00
2008-09-01 00:00:00
2008-09-02 00:00:00
2008-09-03 00:00:00
2008-09-04 00:00:00
2008-09-05 00:00:00
2008-09-08 00:00:00


In [35]:
final_table = final_table[['code', 'opening', 'high', 'low', 'closing', 'adjusted', 'volume',
                           'sma15', 'sma50', 'sma200', 'ema15', 'ema50', 'ema200', 'macd', 'macd_sig', 'macd_hist', 'bb_hi', 'bb_mid', 'bb_lo',
                           'slowk', 'slowd', 'rsi', 'adx', 'cci', 'aroon_dn', 'aroon_up', 'chaikin', 'obv', 'mom',
                           'pe_ratio', 'dp_ratio', 'pb_ratio', 'prediction', 'rank', 'change', 'change_pct']]

In [41]:
final_table.to_csv('final_stock_logs.csv')

In [21]:
final_table.set_index('code', append=True, inplace=True) 