# 平滑法

## 安装软件包

In [None]:
import sys

In [None]:
!{sys.executable} -m pip install -r requirements.txt

In [None]:
import cvxpy as cvx
import numpy as np
import pandas as pd
import time
import os
import quiz_helper
import matplotlib.pyplot as plt

In [None]:
%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (14, 8)

### 数据包

In [None]:
import os
import quiz_helper
from zipline.data import bundles

In [None]:
os.environ['ZIPLINE_ROOT'] = os.path.join(os.getcwd(), '..', '..','data','module_4_quizzes_eod')
ingest_func = bundles.csvdir.csvdir_equities(['daily'], quiz_helper.EOD_BUNDLE_NAME)
bundles.register(quiz_helper.EOD_BUNDLE_NAME, ingest_func)
print('Data Registered')

### 构建管道引擎

In [None]:
from zipline.pipeline import Pipeline
from zipline.pipeline.factors import AverageDollarVolume
from zipline.utils.calendars import get_calendar

universe = AverageDollarVolume(window_length=120).top(500) 
trading_calendar = get_calendar('NYSE') 
bundle_data = bundles.load(quiz_helper.EOD_BUNDLE_NAME)
engine = quiz_helper.build_pipeline_engine(bundle_data, trading_calendar)

### 查看数据

构建管道引擎后，我们获取时段结束时股票池中的股票。我们将使用这些 ticker 生成风险模型的收益率数据。

In [None]:
universe_end_date = pd.Timestamp('2016-01-05', tz='UTC')

universe_tickers = engine\
    .run_pipeline(
        Pipeline(screen=universe),
        universe_end_date,
        universe_end_date)\
    .index.get_level_values(1)\
    .values.tolist()
    
universe_tickers

# 获取收益率数据

In [None]:
from zipline.data.data_portal import DataPortal

data_portal = DataPortal(
    bundle_data.asset_finder,
    trading_calendar=trading_calendar,
    first_trading_day=bundle_data.equity_daily_bar_reader.first_trading_day,
    equity_minute_reader=None,
    equity_daily_reader=bundle_data.equity_daily_bar_reader,
    adjustment_reader=bundle_data.adjustment_reader)

## 获取股价数据的辅助函数

In [None]:
def get_pricing(data_portal, trading_calendar, assets, start_date, end_date, field='close'):
    end_dt = pd.Timestamp(end_date.strftime('%Y-%m-%d'), tz='UTC', offset='C')
    start_dt = pd.Timestamp(start_date.strftime('%Y-%m-%d'), tz='UTC', offset='C')

    end_loc = trading_calendar.closes.index.get_loc(end_dt)
    start_loc = trading_calendar.closes.index.get_loc(start_dt)

    return data_portal.get_history_window(
        assets=assets,
        end_dt=end_dt,
        bar_count=end_loc - start_loc,
        frequency='1d',
        field=field,
        data_frequency='daily')

## 将股价数据放入 dataframe 中

In [None]:
returns_df = \
    get_pricing(
        data_portal,
        trading_calendar,
        universe_tickers,
        universe_end_date - pd.DateOffset(years=5),
        universe_end_date)\
    .pct_change()[1:].fillna(0) #convert prices into returns

returns_df

## 行业数据辅助函数

我们将为你创建一个对象，它会针对每支股票定义一个行业。行业由整数表示。我们继承了 Classifier 类。[Classifier 文档](https://www.quantopian.com/posts/pipeline-classifiers-are-here)以及 [Classifier 的源代码](https://github.com/quantopian/zipline/blob/master/zipline/pipeline/classifiers/classifier.py)

In [None]:
from zipline.pipeline.classifiers import Classifier
from zipline.utils.numpy_utils import int64_dtype
class Sector(Classifier):
    dtype = int64_dtype
    window_length = 0
    inputs = ()
    missing_value = -1

    def __init__(self):
        self.data = np.load('../../data/project_4_sector/data.npy')

    def _compute(self, arrays, dates, assets, mask):
        return np.where(
            mask,
            self.data[assets],
            self.missing_value,
        )

In [None]:
sector = Sector()

## 我们将使用 2 年的数据计算因子

**注意：**2 年前的日期是休市日期。管道软件包不会处理开始或结束日期是休市的日期。为了解决这个问题，我们再往回推 2 天，这时候市场没有休市。

In [None]:
factor_start_date = universe_end_date - pd.DateOffset(years=2, days=2)
factor_start_date

## 查看 SimpleMovingAverage 函数

[这是 SimpleMovingAverage 文档](https://www.zipline.io/appendix.html#zipline.pipeline.factors.SimpleMovingAverage)，并且摘抄在下面：

In [None]:
class zipline.pipeline.factors.SimpleMovingAverage(*args, **kwargs)[source]
Average Value of an arbitrary column

Default Inputs: None

Default Window Length: None

注意，说明部分没有演示 Inputs 和 Window Length 参数的语法。查看[源代码](https://www.zipline.io/_modules/zipline/pipeline/factors/basic.html#SimpleMovingAverage)后可以看出，SimpleMovingAverage 是继承自 CustomFactor 的类。

这是 [CustomFactor 的文档](https://www.zipline.io/appendix.html?highlight=customfactor#zipline.pipeline.CustomFactor)。注意，它包含参数 `inputs` 和 `window_length`。

## 小测验 1
请创建一个一年收益率因子，已经去均值并排名，然后转换为 z 分数。

将此因子作为输入放入 `SimpleMovingAverage` 函数中，窗口长度为 1 周（5 个交易日）。然后对这个平滑后的因子进行排名并计算 z 分数。注意，不需要使其变成行业中性，因为原始因子已经按照行业去均值。

## 答案 1

In [None]:
#TODO: import Returns from zipline

# TODO: import SimpleMovingAverage from zipline


#TODO
# create a pipeline called p

# create a factor of one year returns, deman by sector, then rank


# TODO
# Use this factor as input into SimpleMovingAverage, with a window length of 5
# Also rank and zscore (don't need to de-mean by sector, s)

# add the unsmoothed factor to the pipeline

# add the smoothed factor to the pipeline too


## 可视化管道

注意，如果图片在 notebook 中加载很慢，请右击并在单独的标签页中查看图片。

In [None]:
p.show_graph(format='png')

## 运行管道并查看因子数据

In [None]:
df = engine.run_pipeline(p, factor_start_date, universe_end_date)

In [None]:
df.head()

## 我们看看一支股票的一些数据

In [None]:
# these are the index values for all the stocks (index level 1)
df.index.get_level_values(1)[0:5]

## 小测验 2
获取 APPL 股票的索引值

## 答案 2：

In [None]:
# TODO
# get the level value for AAPL (it's at row index 3)


print(type(stock_index_name))
print(stock_index_name)

### 关于如何使用 numpy.in1d 的 Stack overflow 示例

https://stackoverflow.com/questions/25224545/filtering-muliple-items-in-a-multi-index-python-panda-dataframe

In [None]:
#notice, we'll put the stock_index_name inside of a list
single_stock_df = df[np.in1d(df.index.get_level_values(1), [stock_index_name])]
single_stock_df.head()

In [None]:
single_stock_df['Momentum_Factor'].plot()
single_stock_df['Smoothed_Momentum_Factor'].plot(style='--')

## 小测验 3
平滑后的因子值与未平滑的因子值相比，有何区别？

## 答案 3：

## 解答 notebook
[解答 notebook](smoothing_solution.ipynb)