<img src="teacher.jpg" height="400" width="400">
<img src="IMG_5317.JPG" height="400" width="400">

### 第一部分：
- 基本統計量說明: Raw Data在當沖前/當沖後的各種基本統計量陳述（平均數、中位數、min、max、標準差），對Raw Data有個基本的概念

### 第二部分：
- 當沖前/當沖後的各種基本統計量的檢定（ex:現股當沖比重、日報酬率、週報酬率、月報酬率），做平均數檢定、中位數檢定，看看當沖前/當沖後這些基本統計量有沒有顯著變化

### 第三部分：
- 政策面的研究。政府開放當沖是為了縮小spread、提高成交量、提高成交量週轉率。研究data做的回歸是不是符合政府所宣稱的?當沖真的有穩定市場嗎？對資本市場有貢獻嗎？

### 第四部分：
- 市場面研究。有沒有其他在市場上面的因素會影響我們的回歸式？要控制這些變數，放入回歸式（很多x)

### $ \Delta $ $上市公司日報酬率標準差_i$ = $a_0$ + $a_1$ * (上市公司現股當沖比重平均)

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.stats.api as sms
import statsmodels.api as sm
import statsmodels.formula.api as smf
import warnings
import datetime as datetime
import json
from datetime import timedelta
warnings.filterwarnings("ignore")
%matplotlib inline

from sklearn.linear_model import LinearRegression
from scipy import stats

### tw_day_trade class 

In [207]:
class tw_day_trade():
    def __init__(self):
        self.pos_1 = ""
        self.pos_2 = ""
        self.pos_3 = ""
        self.df_before = "" 
        self.df_after = ""  
        self.basic_stock_stats_info_dict = {}
        self.x = "" 
        self.y_daily = "" 
        self.y_weekly = "" 
        self.y_monthly = "" 

        """
        data basic info
        """
        self.day_trade_data = pd.read_csv("20130613_20190620_data_utf8.csv")
        self.company_num = len(self.day_trade_data)/4
        print("data 台灣上市公司總數: {} 間".format(self.company_num))
        print("data shape: {}".format(self.day_trade_data.shape))

        """
        當沖相關重要日期們
        """
        self.open_single_day_trade_date = '2014/1/6' # 開放單向當沖
        self.open_double_day_trade_date = '2014/6/30' # 開放雙向當沖
        self.tax_down_day_trade_date = '2017/4/28' # 調降當沖稅率
        
    
    def day_trade_split(self, date_1, date_2, date_3):
        """
        根據date_1, date_2, date_3來切割dataframe
        """
        tmp_arr = np.array(list(self.day_trade_data))
        self.pos_1 = np.where(tmp_arr==date_1)[0][0]
        self.pos_2 = np.where(tmp_arr==date_2)[0][0]
        self.pos_3 = np.where(tmp_arr==date_3)[0][0]
        print("date_1 pos: {}\ndate_2 pos: {}\ndate_3 pos: {}".format(self.pos_1,self.pos_2,self.pos_3))
        
        self.df_before = self.day_trade_data.iloc[:,self.pos_1:self.pos_2]
        self.df_after = self.day_trade_data.iloc[:,self.pos_2:(self.pos_3+1)]
        print("df_before shape: {}".format(self.df_before.shape))
        print("df after shape: {}".format(self.df_after.shape))
        
        
    def day_trade_stats(self):
        """
        第一部分：計算當沖前/當沖後的基本統計量，對Raw Data有個基本概念
        question: 銀華要的基本統計量的計算方式....???:)))???:)))
        """
        
        # 平均數 (axis=1橫條往右apply func., axis=0是直條往下apply func.)
        self.df_before['mean_before'] = self.df_before.mean(axis=1)
        self.df_after['mean_after'] = self.df_after.mean(axis=1)
        
        mean_before_daily = self.df_before['mean_before'][0::4].mean()
        mean_after_daily = self.df_after['mean_after'][0::4].mean()
        mean_before_weekly = self.df_before['mean_before'][1::4].mean()
        mean_after_weekly = self.df_after['mean_after'][1::4].mean()
        mean_before_monthly = self.df_before['mean_before'][2::4].mean()
        mean_after_monthly = self.df_after['mean_after'][2::4].mean()
        
        # 標準差 
        self.df_before['std_before'] = self.df_before.std(axis=1)
        self.df_after['std_after'] = self.df_after.std(axis=1)
        
        std_before_daily = self.df_before['std_before'][0::4].mean()
        std_after_daily = self.df_after['std_after'][0::4].mean()
        std_before_weekly = self.df_before['std_before'][1::4].mean()
        std_after_weekly = self.df_after['std_after'][1::4].mean()
        std_before_monthly = self.df_before['std_before'][2::4].mean()
        std_after_monthly = self.df_after['std_after'][2::4].mean()
        
        # 中位數
        self.df_before['median_before'] = self.df_before.median(axis=1)
        self.df_after['median_after'] = self.df_after.median(axis=1)
        
        median_before_daily = self.df_before['median_before'][0::4].mean()
        median_after_daily = self.df_after['median_after'][0::4].mean()
        median_before_weekly = self.df_before['median_before'][1::4].mean()
        median_after_weekly = self.df_after['median_after'][1::4].mean()
        median_before_monthly = self.df_before['median_before'][2::4].mean()
        median_after_monthly = self.df_after['median_after'][2::4].mean()
        
        # max
        max_before_daily = self.df_before['mean_before'][0::4].max()
        max_after_daily = self.df_after['mean_after'][0::4].max()
        max_before_weekly = self.df_before['mean_before'][1::4].max()
        max_after_weekly = self.df_after['mean_after'][1::4].max()
        max_before_monthly = self.df_before['mean_before'][2::4].max()
        max_after_monthly = self.df_after['mean_after'][2::4].max()
        
        # min
        min_before_daily = self.df_before['mean_before'][0::4].min()
        min_after_daily = self.df_after['mean_after'][0::4].min()
        min_before_weekly = self.df_before['mean_before'][1::4].min()
        min_after_weekly = self.df_after['mean_after'][1::4].min()
        min_before_monthly = self.df_before['mean_before'][2::4].min()
        min_after_monthly = self.df_after['mean_after'][2::4].min()
        
        
        self.basic_stock_stats_info_dict = {
            'mean':{'mean_before_daily':mean_before_daily,'mean_after_daily':mean_after_daily,
                   'mean_before_weekly':mean_before_weekly,'mean_after_weekly':mean_before_weekly,
                   'mean_before_monthly':mean_before_monthly,'mean_before_monthly':mean_before_monthly},
            
            'std':{'std_before_daily':std_before_daily, 'std_after_daily':std_after_daily,
                  'std_before_weekly':std_before_weekly, 'std_after_weekly':std_after_weekly,
                  'std_before_monthly':std_before_monthly, 'std_after_monthly':std_after_monthly},
            
            'median':{'median_before_daily':median_before_daily, 'median_after_daily':median_after_daily,
                  'median_before_weekly':median_before_weekly, 'median_after_weekly':median_after_weekly,
                  'median_before_monthly':median_before_monthly, 'median_after_monthly':median_after_monthly},
            
            'max':{'max_before_daily':max_before_daily, 'max_after_daily':max_after_daily,
                  'max_before_weekly':max_before_weekly, 'max_after_weekly':max_after_weekly,
                  'max_before_monthly':max_before_monthly, 'max_after_monthly':max_after_monthly},
            
            'min':{'min_before_daily':min_before_daily, 'min_after_daily':min_after_daily,
                  'min_before_weekly':min_before_weekly, 'min_after_weekly':min_after_weekly,
                  'min_before_monthly':min_before_monthly, 'min_after_monthly':min_after_monthly},

        }
        
        #print (json.dumps(self.basic_stock_stats_info_dict, indent=2))
        
    
    def day_trade_calculate(self):
        """
        準備OLS的x和y
        """
        self.x = self.df_after['mean_after'][3::4]-self.df_before['mean_before'][3::4]
        self.y_daily = self.df_after['std_after'][0::4]-self.df_before['std_before'][0::4]
        self.y_weekly = self.df_after['std_after'][1::4]-self.df_before['std_before'][1::4]
        self.y_monthly = self.df_after['std_after'][2::4]-self.df_before['std_before'][2::4]
    
    def day_trade_OLS(self, date_freq):
        """
        根據傳進的date_freq，計算OLS(日/週/月)
        date_freq's value: 'daily','weekly','monthly'
        """
        if date_freq=='daily':
            self.x = sm.add_constant(self.x)
            self.y_daily = list(self.y_daily)
            model_daily = sm.OLS(self.y_daily, self.x).fit()
            print(model_daily.summary())

        if date_freq=='weekly':
            self.x = sm.add_constant(self.x)
            self.y_weekly = list(self.y_weekly)
            model_weekly = sm.OLS(self.y_weekly, self.x).fit()
            print(model_weekly.summary())

        if date_freq=='monthly':
            self.x = sm.add_constant(self.x)
            self.y_monthly = list(self.y_monthly)
            model_monthly = sm.OLS(self.y_monthly, self.x).fit()
            print(model_monthly.summary())


### 2014/1/6開放單向當沖，前後各抓半年
#### **<span style="color:green">pos_1 = 2013/6/13, pos_2 = 2014/1/6, pos_3 = 2014/6/30</span>**

In [240]:
data_1=tw_day_trade()
data_1.day_trade_split('2013/6/13','2014/1/6','2014/6/30')
data_1.day_trade_stats()
data_1.day_trade_calculate()
data_1.day_trade_OLS('daily')
data_1.day_trade_OLS('weekly')
data_1.day_trade_OLS('monthly')

data 台灣上市公司總數: 949.0 間
data shape: (3796, 1481)
date_1 pos: 2
date_2 pos: 145
date_3 pos: 260
df_before shape: (3796, 143)
df after shape: (3796, 116)
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.010
Model:                            OLS   Adj. R-squared:                  0.009
Method:                 Least Squares   F-statistic:                     9.564
Date:                Fri, 19 Jul 2019   Prob (F-statistic):            0.00204
Time:                        16:22:30   Log-Likelihood:                -1117.2
No. Observations:                 949   AIC:                             2238.
Df Residuals:                     947   BIC:                             2248.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|     

### 2014/6/30 開放雙向當沖，前後各抓半年
#### **<span style="color:green">pos_1 = 2013/12/30, pos_2 = 2014/6/30, pos_3 = 2014/12/30</span>**

In [238]:
data_2 = tw_day_trade()
data_2.day_trade_split('2013/12/30','2014/6/30','2014/12/30')
data_2.day_trade_stats()
data_2.day_trade_calculate()
data_2.day_trade_OLS('daily')
data_2.day_trade_OLS('weekly')
data_2.day_trade_OLS('monthly')

data 台灣上市公司總數: 949.0 間
data shape: (3796, 1481)
date_1 pos: 141
date_2 pos: 260
date_3 pos: 389
df_before shape: (3796, 119)
df after shape: (3796, 130)
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.008
Model:                            OLS   Adj. R-squared:                  0.007
Method:                 Least Squares   F-statistic:                     7.689
Date:                Fri, 19 Jul 2019   Prob (F-statistic):            0.00567
Time:                        16:21:22   Log-Likelihood:                -1120.7
No. Observations:                 949   AIC:                             2245.
Df Residuals:                     947   BIC:                             2255.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|   

### 2014/6/30 開放雙向當沖，前後各抓一年
#### **<span style="color:green">pos_1 = 2013/6/28, pos_2 = 2014/6/30, pos_3 = 2015/6/30</span>**

In [236]:
data_3 = tw_day_trade()
data_3.day_trade_split('2013/6/28','2014/6/30','2015/6/30')
data_3.day_trade_stats()
data_3.day_trade_calculate()
data_3.day_trade_OLS('daily')
data_3.day_trade_OLS('weekly')
data_3.day_trade_OLS('monthly')

data 台灣上市公司總數: 949.0 間
data shape: (3796, 1481)
date_1 pos: 13
date_2 pos: 260
date_3 pos: 506
df_before shape: (3796, 247)
df after shape: (3796, 247)
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                 -0.001
Method:                 Least Squares   F-statistic:                    0.4533
Date:                Fri, 19 Jul 2019   Prob (F-statistic):              0.501
Time:                        16:20:14   Log-Likelihood:                -1049.4
No. Observations:                 949   AIC:                             2103.
Df Residuals:                     947   BIC:                             2113.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|    

### 2017/4/28 調降當沖稅率，前後各抓一年
#### **<span style="color:green">pos_1 = 2016/4/28, pos_2 = 2017/4/28, pos_3 = 2018/4/27</span>**

In [235]:
data_4 = tw_day_trade()
data_4.day_trade_split('2016/4/28','2017/4/28','2018/4/27')
data_4.day_trade_stats()
data_4.day_trade_calculate()
data_4.day_trade_OLS('daily')
data_4.day_trade_OLS('weekly')
data_4.day_trade_OLS('monthly')

data 台灣上市公司總數: 949.0 間
data shape: (3796, 1481)
date_1 pos: 709
date_2 pos: 953
date_3 pos: 1199
df_before shape: (3796, 244)
df after shape: (3796, 247)
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.050
Model:                            OLS   Adj. R-squared:                  0.049
Method:                 Least Squares   F-statistic:                     50.16
Date:                Fri, 19 Jul 2019   Prob (F-statistic):           2.77e-12
Time:                        16:19:19   Log-Likelihood:                -1420.3
No. Observations:                 949   AIC:                             2845.
Df Residuals:                     947   BIC:                             2854.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|  