### 第一部分：
- 基本統計量說明: Raw Data在當沖前/當沖後的各種基本統計量陳述（平均數、中位數、min、max、標準差），對Raw Data有個基本的概念

### 第二部分：
- 當沖前/當沖後的各種基本統計量的檢定（ex:現股當沖比重、日報酬率、週報酬率、月報酬率）
- 做平均數檢定、中位數檢定，看看當沖前/當沖後這些基本統計量有沒有顯著變化

### 第三部分：
- 政策面的研究。政府開放當沖是為了縮小spread、提高成交量、提高成交量週轉率。
- 研究data做的回歸是不是符合政府所宣稱的?當沖真的有穩定市場嗎？對資本市場有貢獻嗎？
- <span style="color:red">價量資料</span>

### 第四部分：
- 市場面研究。有沒有其他在市場上面的因素會影響我們的回歸式？要控制這些變數，放入回歸式（很多x)

### $ \Delta $ $上市公司日報酬率標準差_i$ = $a_0$ + $a_1$ * (上市公司現股當沖比重平均)

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import statsmodels.stats.api as sms
import statsmodels.api as sm
import statsmodels.formula.api as smf
import warnings
import datetime as datetime
import json
import os
import csv
from datetime import timedelta
from openpyxl import load_workbook
warnings.filterwarnings("ignore")
%matplotlib inline

from sklearn.linear_model import LinearRegression
from scipy import stats

In [30]:
class tw_day_trade():
    def __init__(self):
        self.pos_1 = ""
        self.pos_2 = ""
        self.pos_3 = ""
        self.df_before = "" 
        self.df_after = ""  
        self.basic_stats_info_before = []
        self.basic_stats_info_after = []
        self.basic_stats_info_ttest = []
        self.basic_stock_stats_info_dict = {}
        self.x = "" 
        self.y_daily = "" 
        self.y_weekly = "" 
        self.y_monthly = "" 
        self.day_trade_data = ""
        self.excel_data = {}

        """
        當沖相關重要日期們
        
        第一階段：2014/1/6 開放單向當沖200檔
        「台灣50、台灣中型100、富櫃50」200檔。
        
        第二階段：2014/6/30 開放雙向當沖
        仍為「台灣50、台灣中型100、富櫃50」200檔。
        
        第三階段：2015/6/1 新增「得發行認購(售)權證標的」及「ETF」亦得為當沖標的
        此時有377檔。
        
        第四階段：2016/2/1 開放「所有可做融資融券」的股票
        上市櫃共1432檔。
        
        第五階段：2017/4/28 調降當沖稅率
        """

    def read_raw_csv(self, str_filename):
        """
        讀進我們需要的raw data
        """
        self.day_trade_data = pd.read_csv(str_filename)
        print("Read Data Successfully")
        print("data shape: {}".format(self.day_trade_data.shape))

    def day_trade_split(self, date_1, date_2, date_3):
        """
        根據date_1, date_2, date_3來切割dataframe
        """
        tmp_arr = np.array(list(self.day_trade_data))
        self.pos_1 = np.where(tmp_arr==date_1)[0][0]
        self.pos_2 = np.where(tmp_arr==date_2)[0][0]
        self.pos_3 = np.where(tmp_arr==date_3)[0][0]
        #print("date_1 pos: {}\ndate_2 pos: {}\ndate_3 pos: {}".format(self.pos_1,self.pos_2,self.pos_3))
        
        self.df_before = self.day_trade_data.iloc[:,self.pos_1:self.pos_2]
        self.df_after = self.day_trade_data.iloc[:,self.pos_2:(self.pos_3+1)]
        #print("df_before shape: {}".format(self.df_before.shape))
        #print("df after shape: {}".format(self.df_after.shape))
        #print(self.day_trade_data)
        #print(self.df_before)
        #print(self.df_after)
          
    
    def day_trade_stats(self,flag):
        """
        第一部分：計算當沖前/當沖後的基本統計量，對Raw Data有個基本概念
        """
        
        # 平均數 (axis=1橫條往右apply func., axis=0是直條往下apply func.)
        # 取axis=1, 每間公司在一段期間內為一個單位
        self.df_before['mean_before'] = self.df_before.mean(axis=1)
        self.df_after['mean_after'] = self.df_after.mean(axis=1)
        self.df_before['std_before'] = self.df_before.std(axis=1)
        self.df_after['std_after'] = self.df_after.std(axis=1)
        self.df_before['median_before'] = self.df_before.median(axis=1)
        self.df_after['median_after'] = self.df_after.median(axis=1)
        
        # count, mean, std, min, 25%, 50%, 75%, max
        # 日/週/月報酬率/現股當沖比重
        if flag==0:
            for i in range(0,4):
                self.basic_stats_info_before.append(self.df_before['mean_before'][i::4].describe())
                self.basic_stats_info_after.append(self.df_after['mean_after'][i::4].describe())

        # 成交量/成交量週轉率
        if flag==1:
            self.basic_stats_info_before.append(self.df_before['mean_before'].describe())
            self.basic_stats_info_after.append(self.df_after['mean_after'].describe())
        
        # 賣價-買價
        if flag==2:
            for i in range(0,2):
                self.basic_stats_info_before.append(self.df_before['mean_before'][i::2].describe())
                self.basic_stats_info_after.append(self.df_after['mean_after'][i::2].describe())
        
        #print(self.df_before['mean_before'])
        #print(self.basic_stats_info_before)
        #print(self.basic_stats_info_after)
        
        """
        第二部分：Raw Data各種基本統計量檢定
        """
        
        if flag==0:
            # 平均數檢定
            # 取p-value
            for i in range(0,4):
                p_value = stats.ttest_ind(self.df_before['mean_before'][i::4],self.df_after['mean_after'][i::4])[1]
                if p_value <= 0.05:
                    self.basic_stats_info_ttest.append("significant")
                else:
                    self.basic_stats_info_ttest.append("not significant")
                
        
        print(self.basic_stats_info_ttest)
        #print(json.dumps(self.basic_stock_stats_info_dict, indent=2))

    
    def write_excel_reports_prepro(self,flag):
        arr_weight = []
        arr_day = []
        arr_week = []
        arr_month = []
        arr_vol = []
        arr_vol_turn = []
        arr_spread = [] #賣價-買價
        arr_spread_p = [] #(賣價-買價)/p
        
        self.excel_data = {
            'weight': arr_weight,
            'day': arr_day,
            'week': arr_week,
            'month': arr_month,
            'vol':arr_vol,
            'vol_turn':arr_vol_turn,
            'spread': arr_spread,
            'spread_p':arr_spread_p
        }
        
        if flag==0:
            for outside_item in range(len(self.basic_stats_info_before)):
                if outside_item == 0:# day
                    for j1 in range(1,len(self.basic_stats_info_before[0])):
                        arr_day.append(self.basic_stats_info_before[outside_item][j1])
                if outside_item == 1:# week
                    for j2 in range(1,len(self.basic_stats_info_before[0])):
                        arr_week.append(self.basic_stats_info_before[outside_item][j2])
                if outside_item == 2:# month
                    for j3 in range(1,len(self.basic_stats_info_before[0])):
                        arr_month.append(self.basic_stats_info_before[outside_item][j3])
                if outside_item == 3:# weight
                    for j4 in range(1,len(self.basic_stats_info_before[0])):
                        arr_weight.append(self.basic_stats_info_before[outside_item][j4])
            for outside_item in range(len(self.basic_stats_info_after)):
                if outside_item == 0:# day
                    for j1 in range(1,len(self.basic_stats_info_after[0])):
                        arr_day.append(self.basic_stats_info_after[outside_item][j1])
                if outside_item == 1:# week
                    for j2 in range(1,len(self.basic_stats_info_after[0])):
                        arr_week.append(self.basic_stats_info_after[outside_item][j2])
                if outside_item == 2:# month
                    for j3 in range(1,len(self.basic_stats_info_after[0])):
                        arr_month.append(self.basic_stats_info_after[outside_item][j3])
                if outside_item == 3:# weight
                    for j4 in range(1,len(self.basic_stats_info_after[0])):
                        arr_weight.append(self.basic_stats_info_after[outside_item][j4])

        if flag==1:
            for i in range(1,len(self.basic_stats_info_before[0])):
                arr_vol.append(self.basic_stats_info_before[0][i])
            
            for i in range(1,len(self.basic_stats_info_after[0])):
                arr_vol.append(self.basic_stats_info_after[0][i])
                
        # spread
        if flag==2:
            for outside_item in range(len(self.basic_stats_info_before)):
                if outside_item == 0: # spread
                    for j1 in range(1,len(self.basic_stats_info_before[0])):
                        arr_spread.append(self.basic_stats_info_before[outside_item][j1])
                if outside_item == 1: # spread/p
                    for j2 in range(1,len(self.basic_stats_info_before[0])):
                        arr_spread_p.append(self.basic_stats_info_before[outside_item][j2])
            
            for outside_item in range(len(self.basic_stats_info_after)):
                if outside_item == 0: # spread
                    for j1 in range(1,len(self.basic_stats_info_after[0])):
                        arr_spread.append(self.basic_stats_info_after[outside_item][j1])
                if outside_item == 1: # spread/p
                    for j2 in range(1,len(self.basic_stats_info_before[0])):
                        arr_spread_p.append(self.basic_stats_info_after[outside_item][j2])



        #print(json.dumps(self.excel_data, indent=2))
    
    
    def write_excel_reports(self,sheetName,types):
        """
        上市type=0, 上櫃type=1, 上市上櫃type=2
        """
        data = self.excel_data
        print(data)
        wb = load_workbook('20191007.xlsx')
        print(wb.get_sheet_names())

        itemList = ['B','C','D','E','F','G','H','I','J','K','L','M','N','O']
        rowIndex = -1
        columeIndex = -1
        result = True

        wb_sheet = wb[sheetName]
        print(wb_sheet)

        # 上市type=0, 上櫃type=1, 上市上櫃type=2
        for rowItem in data:

            if rowItem == 'weight':
                rowIndex = 5+types*15
            elif rowItem == 'day':
                rowIndex = 6+types*15
            elif rowItem == 'week':
                rowIndex = 7+types*15
            elif rowItem == 'month':
                rowIndex = 8+types*15
            elif rowItem == 'vol':
                rowIndex = 10+types*15
            elif rowItem == 'vol_turn':
                rowIndex = 11+types*15
            elif rowItem == 'spread':
                rowIndex = 13+types*15
            elif rowItem == 'spread_p':
                rowIndex = 14+types*15
            else:
                result = False
                break

            for columeIndex in range(0,len(data[rowItem])):
                wb_sheet[str(itemList[columeIndex])+str(rowIndex)] = round(data[rowItem][columeIndex],2)

        wb.save(r'20191007.xlsx')
    
    
    def day_trade_calculate(self):
        """
        準備OLS的x和y
        """
        self.x = self.df_after['mean_after'][3::4]-self.df_before['mean_before'][3::4]
        self.y_daily = self.df_after['std_after'][0::4]-self.df_before['std_before'][0::4]
        self.y_weekly = self.df_after['std_after'][1::4]-self.df_before['std_before'][1::4]
        self.y_monthly = self.df_after['std_after'][2::4]-self.df_before['std_before'][2::4]
    
    
    def day_trade_OLS(self, date_freq):
        """
        根據傳進的date_freq，計算OLS(日/週/月)
        date_freq's value: 'daily','weekly','monthly'
        """
        if date_freq=='daily':
            self.x = sm.add_constant(self.x)
            self.y_daily = list(self.y_daily)
            model_daily = sm.OLS(self.y_daily, self.x).fit()
            print(model_daily.summary())

        if date_freq=='weekly':
            self.x = sm.add_constant(self.x)
            self.y_weekly = list(self.y_weekly)
            model_weekly = sm.OLS(self.y_weekly, self.x).fit()
            print(model_weekly.summary())

        if date_freq=='monthly':
            self.x = sm.add_constant(self.x)
            self.y_monthly = list(self.y_monthly)
            model_monthly = sm.OLS(self.y_monthly, self.x).fit()
            print(model_monthly.summary())
            
    

In [31]:
data = tw_day_trade()
data.read_raw_csv("報酬率_pei/日報酬csv/上市上櫃公司20160428_20180428.csv")
data.day_trade_split("2016/4/28","2017/4/28","2018/4/27")
data.day_trade_stats(flag=0)
data.day_trade_calculate()
#data.correlation(corr_a)

Read Data Successfully
data shape: (4004, 493)
      證券代碼 Data Field  2016/4/28  2016/4/29  2016/5/3  2016/5/4  2016/5/5  \
0  1101 台泥     日報酬率 %     0.7576    -1.0526   -0.7599   -5.0536   -0.6452   
1  1101 台泥     週報酬率 %    -0.7463    -0.6042   -0.9105   -6.0606   -7.3684   
2  1101 台泥     月報酬率 %     6.0607     4.4446    6.0066    0.6495    0.0001   
3  1101 台泥     現股當沖比重    10.4900     6.8500    2.2800    4.9200    7.9800   
4  1102 亞泥     日報酬率 %    -0.6838    -1.0327   -0.3478   -4.1885   -1.0929   

   2016/5/6  2016/5/9  2016/5/10  ...  2018/4/16  2018/4/17  2018/4/18  \
0   -0.3247   -2.2801     0.6667  ...     0.1309    -0.6536     0.5263   
1   -6.6870   -8.8146    -7.5038  ...     2.4097    -0.5235    -0.6502   
2    2.3334   -0.6621     0.0002  ...     1.7288     1.0639     1.5958   
3    8.3500    3.9700     5.2100  ...     9.5300     8.7300     4.4800   
4    0.1842   -1.4706    -0.9328  ...     0.3431    -0.3419     0.1715   

   2018/4/19  2018/4/20  2018/4/23  2018/4/24

['significant', 'significant', 'not significant', 'significant']


#### 計算Correlation

In [3]:
# 第五階段：2017/4/28 調降當沖稅率
df_assets = pd.read_csv("資產總額/上市櫃20160428-20180428.csv", index_col="公司")
df_daytrade = pd.read_csv("報酬率_pei/日報酬csv/上市上櫃公司20160428_20180428.csv", index_col="證券代碼")
df_daytrade = df_daytrade.iloc[3::4]

In [26]:
df_assets['2016/12/30'][0]

'0'

In [31]:
corr_a = []
for row_num in range(len(df_assets)):
    if df_assets['2016/12/30'][row_num]!='0':
        corr_a.append(df_assets['2016/12/30'][row_num])
corr_a

['496,304',
 '18,735,011',
 '3,034,012',
 '3,613,278',
 '2,005,151,504',
 '627,102,701',
 '17,649,615',
 '256,174,320',
 '304,636,779',
 '948,966,235',
 '892,197,509',
 '2,541,156,335',
 '8,093,377',
 '1,150,703',
 '285,011,594',
 '286,083,458',
 '9,605,668',
 '4,380,118',
 '62,171,843',
 '4,071,262',
 '301,026',
 '438,745',
 '4,492,717',
 '229,244',
 '4,553,289']

In [6]:
non_zero_comp = []
for i in range(len(df_assets)):
    if df_assets["2016/12/30"][i]!='0':
        non_zero_comp.append(df_assets.index[i])
        non_zero_comp.append(df_assets['2016/12/30'][i])
non_zero_comp[0::2][0]

'1210 大成'

<img src="https://i.imgur.com/YZ8f21o.jpg" height="500" width="500">
<img src="https://i.imgur.com/VRp7NCJ.jpg" height="500" width="500">
<img src="https://i.imgur.com/XmJpppE.jpg" height="500" width="500">
<img src="https://i.imgur.com/7H8eg1g.jpg" height="500" width="500">

<img src="https://i.imgur.com/1giFKNu.png" height="800" width="800">