In [1]:
import datetime as dt
import pymysql, talib, sys, os, tempfile
import pandas as pd , numpy as np

#############################################
class HKta:
    conn = None
    symbol=None
    
    raw=None
    daily=None
    weekly=None
    monthly=None
    yearly=None
    ####################
    def __init__(self,symbol):
        self.symbol = symbol
        self.populate()
    ####################
    def populate(self):
        if self.conn is None:
            self.conn = pymysql.connect(host='tptcn.ddns.net', port=53306, db='p_master',
                                   user=os.environ['MYSQL_READUSER'], password=os.environ['MYSQL_READPASSWORD'],
                                   cursorclass=pymysql.cursors.DictCursor)

        tmpl = "SELECT  * FROM `consolidated_daily` WHERE `symbol`='%s' "
        sql = tmpl % (self.symbol)
        self.raw= pd.read_sql(sql, self.conn, index_col=['Date'] )[['Open', 'High', 'Low', 'Close', 'Volume']]
        ### drop NA 
        self.daily = self.raw.fillna(0)
        self.daily['Volume'] = self.daily['Volume'].astype('int')
        self.daily = self.daily[self.daily['Volume'] > 0]
        self.daily = self.daily.reset_index()
        self.daily['Date'] = pd.to_datetime(self.daily['Date'])
        self.daily=self.daily.set_index('Date')

        #Close should be official; so adjust High/Low if necessary
        self.daily['High'] = np.where(self.daily['Close'] > self.daily['High'], self.daily['Close'], self.daily['High'])
        self.daily['Low']  = np.where(self.daily['Close'] < self.daily['Low'],  self.daily['Close'], self.daily['Low'])
        #Open is calculated; so, adjust according to the High/Low if necessary
        self.daily['Open'] = np.where(self.daily['Open']  > self.daily['High'], self.daily['High'],  self.daily['Open'])
        self.daily['Open'] = np.where(self.daily['Open']  < self.daily['Low'],  self.daily['Low'],   self.daily['Open'])
         
        ### roll up 
        tmpdf = self.daily
        tmpdf = tmpdf.reset_index()

        tmpdf['yyyymm'] = tmpdf['Date'].dt.strftime("%Y%m")
        tmpdf['Month_Number'] = tmpdf['Date'].dt.month
        tmpdf['Year'] = tmpdf['Date'].dt.year

        # 'daysoffset' will container the weekday, as integers
        tmpdf['daysoffset'] = tmpdf['Date'].apply(lambda x: x.weekday())
        # We apply, row by row (axis=1) a timedelta operation
        tmpdf['WeekStart'] = tmpdf.apply(lambda x: x['Date'] - dt.timedelta(days=x['daysoffset']), axis=1)

        ### weekly
        self.weekly = tmpdf.groupby(['WeekStart']).agg({'Open':'first', 'High':'max', 'Low':'min', 'Close':'last', 'Volume':'sum'})
        self.weekly['Count'] =tmpdf.groupby(['WeekStart']).size()
        self.weekly['Volume'] = self.weekly['Volume'].astype('int')
        self.weekly['AvgDVol'] = (self.weekly['Volume'] / self.weekly['Count']).astype('int')
        self.weekly = self.weekly.reset_index().set_index('WeekStart')[['Open', 'High', 'Low', 'Close', 'Volume', 'AvgDVol', 'Count']]

        ### monthly
        self.monthly = tmpdf.groupby(['Year','Month_Number']).agg({'Open':'first', 'High':'max', 'Low':'min', 'Close':'last', 'Volume':'sum', 'yyyymm':'first'})
        self.monthly['Count'] =tmpdf.groupby(['Year','Month_Number']).size()
        self.monthly['Volume'] = self.monthly['Volume'].astype('int')
        self.monthly['AvgDVol'] = (self.monthly['Volume'] / self.monthly['Count']).astype('int')
        self.monthly = self.monthly.reset_index().set_index('yyyymm')[['Open', 'High', 'Low', 'Close', 'Volume', 'AvgDVol', 'Count']]

        ### yearly
        self.yearly = tmpdf.groupby(['Year']).agg({'Open':'first', 'High':'max', 'Low':'min', 'Close':'last', 'Volume':'sum'})
        self.yearly['Count'] =tmpdf.groupby(['Year']).size()
        self.yearly['Volume'] = self.yearly['Volume'].astype('int')
        self.yearly['AvgDVol'] = (self.yearly['Volume'] / self.yearly['Count']).astype('int')
        self.yearly = self.yearly[['Open', 'High', 'Low', 'Close', 'Volume', 'AvgDVol', 'Count']]


    ####################
    def __del__(self):
        if self.conn is not None:
            self.conn.close()
            self.conn = None


In [2]:

data = HKta('2800.HK')
df = data.daily
### daily done, calc TA
df['WCLPRICE'] = talib.WCLPRICE(df['High'], df['Low'], df['Close'])


for y in range(2008,2020):
    yyyyStart="%d-01-01"%(y)
    yyyyEnd  ="%d-12-31"%(y)
    df = data.daily.loc[yyyyStart:yyyyEnd]
    print "year=%4d, mean=%.3f, stddev=%.3f, open=%.3f, high=%.3f, low=%.3f, rng=%.3f, close=%.3f, count=%d" % (
        y, df.WCLPRICE.mean(), df.WCLPRICE.std()
        , df.iloc[0]['WCLPRICE']  #Open
        , df['WCLPRICE'].max() #High
        , df['WCLPRICE'].min() #Low
        , df['WCLPRICE'].max() - df['WCLPRICE'].min() #Low
        , df.iloc[-1]['WCLPRICE'] #Close
        , len(df.index)  # count
    )
    #print

year=2008, mean=21.180, stddev=4.251, open=27.725, high=27.725, low=11.985, rng=15.740, close=14.595, count=245
year=2009, mean=18.336, stddev=3.501, open=15.025, high=23.000, low=11.840, rng=11.160, close=21.962, count=249
year=2010, mean=21.806, stddev=1.330, open=21.987, high=25.038, low=19.430, rng=5.608, close=23.225, count=248
year=2011, mean=21.687, stddev=2.144, open=23.500, high=24.587, low=16.960, rng=7.627, close=18.600, count=244
year=2012, mean=20.812, stddev=1.062, open=18.995, high=22.875, low=18.495, rng=4.380, close=22.837, count=246
year=2013, mean=22.973, stddev=0.825, open=23.375, high=24.212, low=20.317, rng=3.895, close=23.487, count=244
year=2014, mean=23.660, stddev=1.117, open=23.500, high=26.100, low=21.500, rng=4.600, close=23.837, count=236
year=2015, mean=24.738, stddev=2.126, open=24.013, high=28.863, low=21.375, rng=7.488, close=22.113, count=233
year=2016, mean=21.810, stddev=1.603, open=21.587, high=24.850, low=18.550, rng=6.300, close=22.100, count=245

In [3]:
t =dt.datetime.now()-dt.timedelta(days=365)
yStart= dt.datetime(t.year, t.month, t.day, tzinfo=t.tzinfo)
yEnd= dt.datetime(dt.datetime.now().year, dt.datetime.now().month, dt.datetime.now().day, tzinfo=dt.datetime.now().tzinfo)

df = data.daily.loc[yStart:yEnd]
print "year=%4d, mean=%.3f, stddev=%.3f, open=%.3f, high=%.3f, low=%.3f, rng=%.3f, close=%.3f, count=%d" % (
    y, df.WCLPRICE.mean(), df.WCLPRICE.std()
    , df.iloc[0]['WCLPRICE']  #Open
    , df['WCLPRICE'].max() #High
    , df['WCLPRICE'].min() #Low
    , df['WCLPRICE'].max() - df['WCLPRICE'].min() #Low
    , df.iloc[-1]['WCLPRICE'] #Close
    , len(df.index)  # count
)


year=2019, mean=28.764, stddev=1.978, open=30.750, high=31.950, low=25.025, rng=6.925, close=28.137, count=243


### Assumption:
### assume 2019 range 23 - 30 ; mean at 26.5 ; stddev at 1.7

In [4]:
from scipy import stats
n = stats.norm( 26.5, 1.7 )

for q in np.linspace(10.0,90.0, 9):
    print "%2.1f  %.1f " %(100 - q, (n.ppf(q/100)) )
print
for p in np.linspace(30.5,22.5, 9):
    print "%2.2f  %.2f " %(p, 100-(n.cdf(p) * 100) )


90.0  24.3 
80.0  25.1 
70.0  25.6 
60.0  26.1 
50.0  26.5 
40.0  26.9 
30.0  27.4 
20.0  27.9 
10.0  28.7 

30.50  0.93 
29.50  3.88 
28.50  11.97 
27.50  27.82 
26.50  50.00 
25.50  72.18 
24.50  88.03 
23.50  96.12 
22.50  99.07 
