In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

# pip install pandas_datareader
# pandas库提供了专门从财经网站获取金融数据的API接口，可作为量化交易股票数据获取的另一种途径
# 该接口在urllib3库基础上实现了以客户端身份访问网站的股票数据，需要注意的是目前模块已经迁徙到pandas-datareader包中

import pandas_datareader.data as web

In [2]:
''' Yahoo Finance已经不存在了，下面是原始获取数据的代码
all_data = {ticker: web.get_data_yahoo(ticker) for ticker in ['AAPL', 'IBM', 'MSFT', 'GOOG']}
price = pd.DataFrame({ticker: data['Adj Close'] for ticker, data in all_data.items()})
volume = pd.DataFrame({ticker: data['Volume'] for ticker, data in all_data.items()})
'''

price = pd.read_pickle('data/yahoo_price.pkl')
volume = pd.read_pickle('data/yahoo_volume.pkl')

In [3]:
returns = price.pct_change()  # 计算股价的百分比，基于时间序列的统计
returns.tail()

Unnamed: 0_level_0,AAPL,GOOG,IBM,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-10-17,-0.00068,0.001837,0.002072,-0.003483
2016-10-18,-0.000681,0.019616,-0.026168,0.00769
2016-10-19,-0.002979,0.007846,0.003583,-0.002255
2016-10-20,-0.000512,-0.005652,0.001719,-0.004867
2016-10-21,-0.00393,0.003011,-0.012474,0.042096


In [4]:
# Series的corr()方法是计算两个Series中重叠的、非NA的、按索引对齐的值的相关性
returns['MSFT'].corr(returns['IBM'])  # <==> returns.MSFT.corr(returns.IBM)

0.49976361144151166

In [5]:
# Series的cov()方法是计算协方差的
returns['MSFT'].cov(returns['IBM'])

8.870655479703549e-05

In [6]:
# DataFrame的corr()方法是以DataFrame的形式返回相关性矩阵
returns.corr()

Unnamed: 0,AAPL,GOOG,IBM,MSFT
AAPL,1.0,0.407919,0.386817,0.389695
GOOG,0.407919,1.0,0.405099,0.465919
IBM,0.386817,0.405099,1.0,0.499764
MSFT,0.389695,0.465919,0.499764,1.0


In [7]:
# DataFrame的cov()方法是以DataFrame的形式返回协方差矩阵
returns.cov()

Unnamed: 0,AAPL,GOOG,IBM,MSFT
AAPL,0.000277,0.000107,7.8e-05,9.5e-05
GOOG,0.000107,0.000251,7.8e-05,0.000108
IBM,7.8e-05,7.8e-05,0.000146,8.9e-05
MSFT,9.5e-05,0.000108,8.9e-05,0.000215


In [8]:
# DataFrame的corrwith()方法可以计算DataFrame中的行或列与另一个序列或DataFrame的相关性
# 传入一个Series将会返回一个相关系数值Series（针对各列进行计算）
returns.corrwith(returns.IBM)

AAPL    0.386817
GOOG    0.405099
IBM     1.000000
MSFT    0.499764
dtype: float64

In [9]:
# 传入一个DataFrame则会计算按列名配对的相关系数。这里计算百分比变化与成交量的相关系数
returns.corrwith(volume)

AAPL   -0.075565
GOOG   -0.007067
IBM    -0.204849
MSFT   -0.092950
dtype: float64