In [26]:
import sqlite3
import numpy as np
import pandas as pd
import itertools
import time

# initiate time variable to determine how long it would take to run each block of script
st = time.time()

# connect to sqlite database and retrieve plain data - financial ratio and stock price
# close connection when it is done
conn = sqlite3.connect("stock.db")
ds_ratio = pd.read_sql_query('select STOCK_CODE, STOCK_FACTOR, STOCK_YEAR, STOCK_DATA from VCSC_FINANCIAL_RATIO', conn)
ds_price = pd.read_sql_query('select STOCK_CODE, STOCK_DATE, STOCK_CLOSE from VCSC_STOCK_PRICE', conn)
conn.close()

# conclude time variable for 1st block
# initiate time variable for 2nd block
print("1--- %s seconds ---" % (time.time() - st))
st = time.time()

# create weight dataframe table to hold combination of 2 values with the sum of them equals 1, shortest jump by 0.01
ds_weight = np.arange(0.0, 1.01, 0.01)
ds_weight = [x for x in itertools.permutations(ds_weight, 2) if sum(x) == 1]
ds_weight = pd.DataFrame(ds_weight,columns=('W1','W2'))

# retrieve all financial ratio required (eps, bvs etc.)
# clean the raw financial ratio data to ensure its credibility 
ds_ratio = ds_ratio.loc[ds_ratio['STOCK_FACTOR'].isin(['EPS (VND)', 'BVS'])]
ds_ratio = ds_ratio.loc[ds_ratio['STOCK_YEAR'].str.contains('TTM')==False]
ds_ratio['STOCK_DATA'] = ds_ratio['STOCK_DATA'].str.replace('^-$', '')
ds_ratio['STOCK_DATA'] = ds_ratio['STOCK_DATA'].str.replace(',', '')
ds_ratio['STOCK_DATA'] = pd.to_numeric(ds_ratio['STOCK_DATA'])
ds_ratio['STOCK_YEAR'] = pd.to_numeric(ds_ratio['STOCK_YEAR'])

# clean the raw stock price data to ensure its credibility 
ds_price['STOCK_DATE'] = pd.to_datetime(ds_price['STOCK_DATE'], format='%m/%d/%Y')
ds_price['STOCK_YEAR'] = ds_price['STOCK_DATE'].dt.year
ds_price['STOCK_QUARTER'] = ds_price['STOCK_YEAR'].astype('str') + ds_price['STOCK_DATE'].dt.quarter.astype('str')
ds_price['STOCK_CLOSE'] = ds_price['STOCK_CLOSE'].str.replace(',', '').astype(float)

# conclude time variable for 2nd block
# initiate time variable for 3rd block
print("2--- %s seconds ---" % (time.time() - st))
st = time.time()

# manipulate price table to get price of current year/quarter lined up with its respective next year/quarter
ds_price = ds_price.sort_values(['STOCK_DATE'], ascending=[False]).groupby(['STOCK_CODE', 'STOCK_QUARTER']).nth(0)
ds_price['NEXT-PRICE-Q'] = ds_price.groupby(level=0)['STOCK_CLOSE'].shift(-1)
ds_price = ds_price.sort_values(['STOCK_DATE'], ascending=[False]).groupby(['STOCK_CODE', 'STOCK_YEAR']).nth(0)
ds_price['NEXT-PRICE-Y'] = ds_price.groupby(level=0)['STOCK_CLOSE'].shift(-1)

# merge price table into ratio table, then calculate xp/return and rank for each financial ratio type
ds_ratio = ds_ratio.merge(ds_price, left_on=['STOCK_CODE', 'STOCK_YEAR'], right_on=['STOCK_CODE', 'STOCK_YEAR'], how='right')
ds_ratio['XP'] = ds_ratio['STOCK_DATA'].div(ds_ratio['NEXT-PRICE-Q'])
ds_ratio['RETURN'] = ds_ratio['NEXT-PRICE-Y'].div(ds_ratio['NEXT-PRICE-Q'])
ds_ratio['RANK'] = ds_ratio.groupby(['STOCK_FACTOR', 'STOCK_YEAR'])['XP'].rank('dense', ascending=True)

# conclude time variable for 3rd block
# initiate time variable for 4th block
print("3--- %s seconds ---" % (time.time() - st))
st = time.time()

# pivot ratio table and ensure that weight table and ratio table can be merged
# formulate data of rank and qcut(10) the results to acquire decile information
ds_p = ds_ratio.pivot_table(index=['STOCK_YEAR', 'STOCK_CODE'], columns='STOCK_FACTOR', values=['RANK', 'RETURN'])
ds_p.columns = [' '.join(col).strip() for col in ds_p.columns.values]
ds_p = ds_p.reset_index()

ds_p['TMP'] = 1
ds_weight['TMP'] = 1
ds_p = pd.merge(ds_p, ds_weight, on='TMP', how='outer')
ds_p['RANK'] = ds_p['RANK BVS'] * ds_p['W1'] + ds_p['RANK EPS (VND)'] * ds_p['W2']
ds_p['DECILE'] = (ds_p.groupby(['W1', 'W2', 'STOCK_YEAR'])['RANK']
                                .apply(lambda x: pd.qcut(x,10,duplicates='drop',labels=False)))
ds_p = ds_p.loc[ds_p['DECILE'] == 9.0]

# conclude time variable for 4th block
# initiate time variable for 5th block
print("4--- %s seconds ---" % (time.time() - st))
st = time.time()

# clean up result table of the above manipulation/calculation
ds_p['RETURN'] = ds_p['RETURN BVS']
ds_p = ds_p.drop(columns=['STOCK_CODE', 'RANK BVS', 'RANK EPS (VND)', 'RETURN BVS', 'RETURN EPS (VND)'])
ds_p = ds_p.drop(columns=['TMP', 'RANK', 'DECILE'])

# calculate average return then reset index to reverse to regular dataframe table
ds_p = ds_p.groupby(['W1', 'W2', 'STOCK_YEAR']).agg({'RETURN':'mean'})
ds_p = ds_p.reset_index()

# conclude time variable for 5th block
# initiate time variable for next block when applicable
print("5--- %s seconds ---" % (time.time() - st))
st = time.time()

# create s6 as table to hold final result of the strategy
# pivot the table and calculate average return across the years
# order data, highest average at top
s6 = ds_p.pivot_table(index=(['W1', 'W2']),columns='STOCK_YEAR', values='RETURN')
s6['AVERAGE'] = s6.mean(axis=1)
s6.sort_values(by='AVERAGE', ascending=False)

1--- 2.9471685886383057 seconds ---
2--- 7.740442752838135 seconds ---
3--- 1.3510770797729492 seconds ---
4--- 1.2210698127746582 seconds ---
5--- 0.049002885818481445 seconds ---


Unnamed: 0_level_0,STOCK_YEAR,2011,2012,2013,2014,2015,2016,2017,AVERAGE
W1,W2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.73,0.27,1.275562,1.684460,1.657596,1.707348,1.267876,1.320689,0.942905,1.408062
0.75,0.25,1.275562,1.664556,1.651472,1.700313,1.288300,1.321274,0.947452,1.406990
0.72,0.28,1.275562,1.682369,1.658699,1.707715,1.251891,1.319947,0.942905,1.405584
0.74,0.26,1.275562,1.664556,1.651472,1.701357,1.278132,1.320689,0.946332,1.405443
0.77,0.23,1.277020,1.673402,1.645527,1.693444,1.286612,1.311082,0.950653,1.405392
0.76,0.24,1.277020,1.661382,1.644172,1.700313,1.289541,1.316472,0.943500,1.404629
0.78,0.22,1.277020,1.673402,1.645527,1.693444,1.286612,1.307591,0.948178,1.404539
0.79,0.21,1.277020,1.673402,1.645527,1.690479,1.269210,1.298228,0.932305,1.398025
0.80,0.20,1.266653,1.676940,1.631482,1.703640,1.257984,1.301113,0.927451,1.395037
0.82,0.18,1.253232,1.691088,1.632008,1.712408,1.257290,1.290366,0.923018,1.394201
