## 一、获取并处理数据得到时间序列

In [5]:
import numpy as np
import pandas as pd
import tushare as ts

ts.set_token('094f15d71394516b730602faa77b1c708007b8d05df300590b4445ed')
pro = ts.pro_api()
# 多元化银行
diverse_bank_list = ['000001.SZ','600000.SH','600015.SH','600016.SH','600036.SH','601166.SH',
                     '601288.SH','601328.SH','601398.SH','601818.SH',
                     '601939.SH','601988.SH','601998.SH']
# 区域性银行
regional_bank_list = ['002142.SZ','002807.SZ','002839.SZ','002936.SZ','002948.SZ','002958.SZ',
                      '002966.SZ','600908.SH','600919.SH','600926.SH','600928.SH','601009.SH',
                      '601077.SH','601128.SH','601169.SH','601229.SH','601577.SH','601838.SH',
                      '601860.SH','601997.SH','603323.SH']
# df_1 = pro.daily(ts_code='000001.SZ', start_date='20190101', end_date='20191231')
# df_1.head()



def get_stock(name, start_date, end_date, N_amount=1):
    '''
    日交易额单位：千元（日成交额是否需要改变单位？）
    
    
    
    Args:
        name: stock name
        start_date: trade start date
        end_date: trade end date
        N_amount: 成交额的单位 
    
    '''
    df = pro.daily(ts_code=name, start_date=start_date, end_date=end_date)
    df['yield_rate'] = df['change']/df['pre_close']
    df['abs_yield_rate'] = abs(np.log(df['close']/df['pre_close']))
    df['amihud'] = df['abs_yield_rate']/df['amount']
    return df



def process_stock(name, df):
    df = df[['trade_date','yield_rate','amihud']]
    df.rename(columns={'yield_rate':'yield_rate_'+name, 'amihud':'amihud_'+name},inplace=True)
    return df
    
    
    
def create_time_series(stock_list, start_date, end_date):
    '''
    获取 stock_list 中每只 stock 的 yield rate 以及 amihud 指标并得到时间序列
    
    Args:
    
    '''
    # 先获取第一个
    df = get_stock(stock_list[0], start_date, end_date)
    df = process_stock(stock_list[0], df)
    # 将第一个和后面的合并
    for i in range(len(stock_list)-1):
        df_temp = get_stock(stock_list[i+1], start_date, end_date)
        df_temp = process_stock(stock_list[i+1], df_temp)
        df = pd.merge(df,df_temp)
    return df

def main():
    df = create_time_series(diverse_bank_list,'20190101','20191231')
    return df
    

if __name__ == '__main__':
    df = main()
    ## 改变顺序
    row_list = ['trade_date']
    for name in diverse_bank_list:
        row_list.append('yield_rate_'+name)
    for name in diverse_bank_list:
        row_list.append('amihud_'+name)
    df = df[row_list]
    df.to_csv('stock.csv', index=False)

In [99]:
df

Unnamed: 0,trade_date,yield_rate_000001.SZ,yield_rate_600000.SH,yield_rate_600015.SH,yield_rate_600016.SH,yield_rate_600036.SH,yield_rate_601166.SH,yield_rate_601288.SH,yield_rate_601328.SH,yield_rate_601398.SH,...,amihud_600016.SH,amihud_600036.SH,amihud_601166.SH,amihud_601288.SH,amihud_601328.SH,amihud_601398.SH,amihud_601818.SH,amihud_601939.SH,amihud_601988.SH,amihud_601998.SH
0,20191231,-0.007242,0.002431,0.002614,0.000000,-0.006609,0.002024,-0.008065,-0.001773,-0.001698,...,0.000000e+00,4.692472e-09,1.894025e-09,1.114428e-08,4.725278e-09,2.067840e-09,1.279284e-08,0.000000e+00,1.970576e-08,0.000000e+00
1,20191230,-0.003608,0.001623,0.002621,0.006380,-0.000792,-0.010020,-0.002681,0.003559,-0.001695,...,1.627543e-08,4.746218e-10,4.475719e-09,3.355331e-09,6.828854e-09,1.375220e-09,2.032792e-08,3.219215e-09,1.121564e-08,1.049884e-08
2,20191227,0.009715,0.002441,0.005270,0.003200,-0.006299,0.000000,0.005391,0.003571,0.006826,...,7.262913e-09,4.611380e-09,0.000000e+00,6.675341e-09,6.975019e-09,8.845601e-09,6.424002e-09,5.029993e-09,7.552942e-09,2.529905e-08
3,20191226,0.010429,0.004085,0.005298,0.003210,0.008203,0.010121,0.010899,0.005386,0.005146,...,1.246840e-08,1.122201e-08,1.471036e-08,2.305052e-08,1.943319e-08,9.803632e-09,4.665034e-08,2.437830e-08,2.462567e-08,1.132726e-07
4,20191225,-0.006098,-0.003257,-0.005270,-0.004792,-0.005788,-0.007035,-0.002717,-0.001792,-0.005119,...,2.345029e-08,6.526291e-09,9.272730e-09,7.595419e-09,8.495831e-09,1.123307e-08,1.159681e-08,9.227561e-09,1.247316e-08,5.852658e-08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239,20190108,-0.008214,-0.002004,-0.005369,-0.001736,-0.011755,0.003955,-0.011204,0.003431,-0.007634,...,6.969557e-09,1.738262e-08,1.144433e-08,1.635076e-08,1.503317e-08,1.305467e-08,3.257479e-08,0.000000e+00,2.045644e-08,2.446039e-08
240,20190107,-0.001026,0.002008,-0.005340,-0.003460,0.000392,-0.001974,-0.005571,-0.005119,-0.005693,...,8.719839e-09,3.371224e-10,3.586035e-09,6.899026e-09,1.793903e-08,1.086643e-08,0.000000e+00,8.195316e-09,6.571231e-09,4.186489e-08
241,20190104,0.050647,0.015291,0.019048,0.019400,0.025322,0.023569,0.014124,0.013841,0.013462,...,3.951219e-08,1.479300e-08,3.298557e-08,1.924617e-08,3.107921e-08,2.089480e-08,6.917013e-08,3.456520e-08,1.914039e-08,1.665676e-07
242,20190103,0.009793,0.011340,0.005472,0.003540,0.012617,0.007463,0.005682,0.019400,0.000000,...,1.042149e-08,1.349580e-08,1.871443e-08,1.091775e-08,5.185990e-08,0.000000e+00,1.054602e-07,6.801562e-09,8.208403e-09,2.614935e-08


## 二、Var 向量值回归 amihud 和 yield rate

In [7]:
import numpy as np
import pandas as pd
import tushare as ts
from statsmodels.tsa import stattools
import matplotlib as mpl
import matplotlib.pyplot as plt


ts.set_token('094f15d71394516b730602faa77b1c708007b8d05df300590b4445ed')
pro = ts.pro_api()

# 计算自相关性
# stattools.acf(df.pct_chg, nlags=10)
# 计算偏自相关性
# stattools.pacf(df.pct_chg, nlags=10)



# df_1 = pro.daily(ts_code='000001.SZ', start_date='20190101')
# df_1.index = pd.to_datetime(df_1.trade_date)
# df_1 = df_1.sort_index(ascending=True)
# df_1

df = pd.read_csv('stock.csv', index_col=0)
df

Unnamed: 0_level_0,yield_rate_000001.SZ,yield_rate_600000.SH,yield_rate_600015.SH,yield_rate_600016.SH,yield_rate_600036.SH,yield_rate_601166.SH,yield_rate_601288.SH,yield_rate_601328.SH,yield_rate_601398.SH,yield_rate_601818.SH,...,amihud_600016.SH,amihud_600036.SH,amihud_601166.SH,amihud_601288.SH,amihud_601328.SH,amihud_601398.SH,amihud_601818.SH,amihud_601939.SH,amihud_601988.SH,amihud_601998.SH
trade_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
20191231,-0.007242,0.002431,0.002614,0.000000,-0.006609,0.002024,-0.008065,-0.001773,-0.001698,0.004556,...,0.000000e+00,4.692472e-09,1.894025e-09,1.114428e-08,4.725278e-09,2.067840e-09,1.279284e-08,0.000000e+00,1.970576e-08,0.000000e+00
20191230,-0.003608,0.001623,0.002621,0.006380,-0.000792,-0.010020,-0.002681,0.003559,-0.001695,-0.011261,...,1.627543e-08,4.746218e-10,4.475719e-09,3.355331e-09,6.828854e-09,1.375220e-09,2.032792e-08,3.219215e-09,1.121564e-08,1.049884e-08
20191227,0.009715,0.002441,0.005270,0.003200,-0.006299,0.000000,0.005391,0.003571,0.006826,0.002257,...,7.262913e-09,4.611380e-09,0.000000e+00,6.675341e-09,6.975019e-09,8.845601e-09,6.424002e-09,5.029993e-09,7.552942e-09,2.529905e-08
20191226,0.010429,0.004085,0.005298,0.003210,0.008203,0.010121,0.010899,0.005386,0.005146,0.013730,...,1.246840e-08,1.122201e-08,1.471036e-08,2.305052e-08,1.943319e-08,9.803632e-09,4.665034e-08,2.437830e-08,2.462567e-08,1.132726e-07
20191225,-0.006098,-0.003257,-0.005270,-0.004792,-0.005788,-0.007035,-0.002717,-0.001792,-0.005119,-0.002283,...,2.345029e-08,6.526291e-09,9.272730e-09,7.595419e-09,8.495831e-09,1.123307e-08,1.159681e-08,9.227561e-09,1.247316e-08,5.852658e-08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20190108,-0.008214,-0.002004,-0.005369,-0.001736,-0.011755,0.003955,-0.011204,0.003431,-0.007634,-0.005208,...,6.969557e-09,1.738262e-08,1.144433e-08,1.635076e-08,1.503317e-08,1.305467e-08,3.257479e-08,0.000000e+00,2.045644e-08,2.446039e-08
20190107,-0.001026,0.002008,-0.005340,-0.003460,0.000392,-0.001974,-0.005571,-0.005119,-0.005693,0.000000,...,8.719839e-09,3.371224e-10,3.586035e-09,6.899026e-09,1.793903e-08,1.086643e-08,0.000000e+00,8.195316e-09,6.571231e-09,4.186489e-08
20190104,0.050647,0.015291,0.019048,0.019400,0.025322,0.023569,0.014124,0.013841,0.013462,0.024000,...,3.951219e-08,1.479300e-08,3.298557e-08,1.924617e-08,3.107921e-08,2.089480e-08,6.917013e-08,3.456520e-08,1.914039e-08,1.665676e-07
20190103,0.009793,0.011340,0.005472,0.003540,0.012617,0.007463,0.005682,0.019400,0.000000,0.027397,...,1.042149e-08,1.349580e-08,1.871443e-08,1.091775e-08,5.185990e-08,0.000000e+00,1.054602e-07,6.801562e-09,8.208403e-09,2.614935e-08


In [8]:
import statsmodels.api as sm
import statsmodels.stats.diagnostic

sm.tsa.stattools.adfuller(df['yield_rate_000001.SZ'])

(-7.860543452445326,
 5.298747073166892e-12,
 3,
 240,
 {'1%': -3.4578942529658563,
  '5%': -2.8736593200231484,
  '10%': -2.573228767361111},
 -1124.5563972469988)

In [13]:
result = sm.tsa.stattools.adfuller(df['yield_rate_000001.SZ'])
print(result[0])
print(result[1])
print(result[4]['1%'])

-7.860543452445326
5.298747073166892e-12
-3.4578942529658563


In [14]:
result

(-7.860543452445326,
 5.298747073166892e-12,
 3,
 240,
 {'1%': -3.4578942529658563,
  '5%': -2.8736593200231484,
  '10%': -2.573228767361111},
 -1124.5563972469988)

### 2.1 检验变量序列的平稳性

### 2.2 模型的参数估计

### 2.3 确定最优的滞后阶数

### 2.4 Granger 因果检验

### 2.5 检验 Var 模型的平稳性

In [None]:
# 商业银行
# 多元化银行
000001.SZ	平安银行
600000.SH	浦发银行
600015.SH	华夏银行
600016.SH	民生银行
600036.SH	招商银行
601166.SH	兴业银行
601288.SH	农业银行
601328.SH	交通银行
601398.SH	工商银行
# 601658.SH	邮储银行 时间过晚
601818.SH	光大银行
# 601916.SH	浙商银行 时间过晚
601939.SH	建设银行
601988.SH	中国银行
601998.SH	中信银行


# 区域银行
002142.SZ	宁波银行
002807.SZ	江阴银行
002839.SZ	张家港行
002936.SZ	郑州银行
002948.SZ	青岛银行
002958.SZ	青农商行
002966.SZ	苏州银行
600908.SH	无锡银行
600919.SH	江苏银行
600926.SH	杭州银行
600928.SH	西安银行
601009.SH	南京银行
601077.SH	渝农商行
601128.SH	常熟银行
601169.SH	北京银行
601229.SH	上海银行
601577.SH	长沙银行
601838.SH	成都银行
601860.SH	紫金银行
601997.SH	贵阳银行
603323.SH	苏农银行