# Japan challenge submission template

In [None]:
from quantopian.pipeline import Pipeline, CustomFactor
from quantopian.pipeline.data import EquityPricing, factset
from quantopian.pipeline.factors import Returns, SimpleMovingAverage
from quantopian.pipeline.data.factset.estimates import PeriodicConsensus
from quantopian.pipeline.data.factset.estimates import Actuals

from quantopian.pipeline.domain import (
    AT_EQUITIES, # Austria
    AU_EQUITIES, # Australia
    BE_EQUITIES, # Belgium
    BR_EQUITIES, # Brazil
    CA_EQUITIES, # Canada
    CH_EQUITIES, # Switzerland
    CN_EQUITIES, # China
    DE_EQUITIES, # Germany
    DK_EQUITIES, # Denmark
    ES_EQUITIES, # Spain
    FI_EQUITIES, # Finland
    FR_EQUITIES, # France
    GB_EQUITIES, # Great Britain
    HK_EQUITIES, # Hong Kong
    IE_EQUITIES, # Ireland
    IN_EQUITIES, # India
    IT_EQUITIES, # Italy
    JP_EQUITIES, # Japan
    KR_EQUITIES, # South Korea
    NL_EQUITIES, # Netherlands
    NO_EQUITIES, # Norway
    NZ_EQUITIES, # New Zealand
    PT_EQUITIES, # Portugal
    SE_EQUITIES, # Sweden
    SG_EQUITIES, # Singapore
    US_EQUITIES, # United States
)
from quantopian.research import run_pipeline

import pandas as pd
import numpy as np

import time

import matplotlib.pyplot as plt
import seaborn as sns

import empyrical as ep
import alphalens as al
import pyfolio as pf

Helper functions

In [None]:
def evaluate_factor(factor, 
                    domain, 
                    start_date, 
                    end_date,
                    factor_screen=None,
                    quantiles=5,
                    returns_lengths=(1, 5, 10)):
    """Analyze a Pipeline Factor using Alphalens.
    
    Parameters
    ----------
    factor : quantopian.pipeline.factors.Factor
        Factor producing scores to be evaluated.
    domain : quantopian.pipeline.domain.Domain
        Domain on which the factor should be evaluated.
    start_date : str or pd.Timestamp
        Start date for evaluation period.
    end_date : str or pd.Timestamp
        End date for evaluation period.
    standardize : 
    factor_screen : quantopian.pipeline.filters.Filter, optional
        Filter defining which assets ``factor`` should be evaluated on.
        Default is ``factor.notnull()``.
    quantiles : int, optional
        Number of buckets to use for quantile groups. Default is 5
    returns_lengths : sequence[int]
        Forward-returns horizons to use when evaluating ``factor``. 
        Default is 1-day, 5-day, and 10-day returns.
        
    Returns
    -------
    factor_data : pd.DataFrame
        A (date, asset)-indexed DataFrame with the following columns:
            'factor': float64
                Values produced by ``factor``.
            'factor_quantiles': int64
                Daily quantile label for each
    """
    calendar = domain.calendar
    # Roll input dates to the next trading session.
    start_date = calendar.minute_to_session_label(pd.Timestamp(start_date, tz='UTC'))
    end_date = calendar.minute_to_session_label(pd.Timestamp(end_date, tz='UTC'))
    
    if factor_screen is None:
        factor_screen = factor.notnull()
        
    # Run pipeline to get factor values and quantiles.
    factor_pipe = Pipeline(
        {'factor': factor, 
         'factor_quantile': factor.quantiles(quantiles, mask=factor_screen)},
        screen=factor_screen,
        domain=domain,
    )
    factor_results = run_pipeline(factor_pipe, start_date, end_date, chunksize=250)
    
    column_order = []
    returns_cols = {}
    for length in returns_lengths:
        colname = '{}D'.format(length)
        column_order.append(colname)
        # Here we are not computing cumulative returns, this could be done
        # more efficiently
        returns_cols[colname] = Returns(window_length=2)
    returns_pipe = Pipeline(returns_cols, domain=domain)
    
    # Compute returns for the period after the factor pipeline, then 
    # shift the results back to align with our factor values.
    returns_start_date = start_date
    returns_end_date = end_date + domain.calendar.day * max(returns_lengths)
    raw_returns = run_pipeline(returns_pipe, returns_start_date, returns_end_date, chunksize=500)
    
    shifted_returns = {}
    for name, length in zip(column_order, returns_lengths):
        # Shift 1-day returns back by a day, 5-day returns back by 5 days, etc.
        raw = raw_returns[name]
        shifted_returns[name] = backshift_returns_series(raw, length)
        
    # Merge backshifted returns into a single frame indexed like our desired output.
    merged_returns = pd.DataFrame(
        data=shifted_returns, 
        index=factor_results.index, 
        columns=column_order,
    )
    
    # Concat factor results and forward returns column-wise.
    merged = pd.concat([factor_results, merged_returns], axis=1)
    merged.index.set_names(['date', 'asset'], inplace=True)
    
    # Drop NaNs
    merged = merged.dropna(how='any')
    
    # Add a Business Day Offset to the DateTimeIndex
    merged.index.levels[0].freq = pd.tseries.offsets.BDay()
    
    return merged

def backshift_returns_series(series, N):
    """Shift a multi-indexed series backwards by N observations in the first level.
    
    This can be used to convert backward-looking returns into a forward-returns series.
    """
    ix = series.index
    dates, sids = ix.levels
    date_labels, sid_labels = map(np.array, ix.labels)
    # Output date labels will contain the all but the last N dates.
    new_dates = dates[:-N]
    # Output data will remove the first M rows, where M is the index of the
    # last record with one of the first N dates.
    cutoff = date_labels.searchsorted(N)
    new_date_labels = date_labels[cutoff:] - N
    new_sid_labels = sid_labels[cutoff:]
    new_values = series.values[cutoff:]
    assert new_date_labels[0] == 0
    new_index = pd.MultiIndex(
        levels=[new_dates, sids],
        labels=[new_date_labels, new_sid_labels],
        sortorder=1,
        names=ix.names,
    )
    return pd.Series(data=new_values, index=new_index)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

import empyrical as ep
import alphalens as al
import pyfolio as pf

def compute_turnover(df):
    return df.dropna().unstack().dropna(how='all').fillna(0).diff().abs().sum(1)

def get_max_median_position_concentration(expos):
    longs = expos.loc[expos > 0]
    shorts = expos.loc[expos < 0]

    return expos.groupby(level=0).quantile([.05, .25, .5, .75, .95]).unstack()

def compute_factor_stats(factor_data_total, periods=range(1, 15)):
    portfolio_returns_total = al.performance.factor_returns(factor_data_total)
    portfolio_returns_total.columns = portfolio_returns_total.columns.map(lambda x: int(x[:-1]))
    for i in portfolio_returns_total.columns:
        portfolio_returns_total[i] = portfolio_returns_total[i].shift(i)

    delay_sharpes_total = portfolio_returns_total.apply(ep.sharpe_ratio)
    
    factor = factor_data_total.factor
    turnover = compute_turnover(factor)
    n_holdings = factor.groupby(level=0).count()
    perc_holdings = get_max_median_position_concentration(factor)
    
    return {'factor_data_total': factor_data_total, 
            'portfolio_returns_total': portfolio_returns_total,
            'delay_sharpes_total': delay_sharpes_total,
            'turnover': turnover,
            'n_holdings': n_holdings,
            'perc_holdings': perc_holdings,
    }

def plot_overview_tear_sheet(factor_data, periods=range(1, 15)):
    # We assume portfolio weights, so make sure factor scores sum to 1
    factor_data['factor'] = factor_data.factor.div(factor_data.abs().groupby(level='date').sum()['factor'])
    
    fig = plt.figure(figsize=(16, 16))
    gs = plt.GridSpec(3, 4)
    ax1 = plt.subplot(gs[0:2, 0:2])
    
    factor_stats = compute_factor_stats(factor_data, periods=periods)
                         
    pd.DataFrame({'total': factor_stats['delay_sharpes_total']}).plot.bar(ax=ax1)
    ax1.set(xlabel='delay', ylabel='IR')

    ax2a = plt.subplot(gs[0:2, 2:4])
    delay_cum_rets_total = factor_stats['portfolio_returns_total'][list(range(1, 5))].apply(ep.cum_returns)
    delay_cum_rets_total.plot(ax=ax2a)
    ax2a.set(title='Total returns', ylabel='Cumulative returns')
    
    ax6 = plt.subplot(gs[-1, 0:2])
    factor_stats['n_holdings'].plot(color='b', ax=ax6)
    ax6.set_ylabel('# holdings', color='b')
    ax6.tick_params(axis='y', labelcolor='b')
    
    ax62 = ax6.twinx()
    factor_stats['turnover'].plot(color='r', ax=ax62)
    ax62.set_ylabel('turnover', color='r')
    ax62.tick_params(axis='y', labelcolor='r')
    
    ax7 = plt.subplot(gs[-1, 2:4])
    factor_stats['perc_holdings'].plot(ax=ax7)
    ax7.set(ylabel='Long/short perc holdings')
    
    gs.tight_layout(fig)
    
    return fig, factor_stats

Universe definition

In [None]:
# Custom factor that gets the minimum volume traded over the last two weeks.
class MinVolume(CustomFactor):
    inputs=[EquityPricing.volume]
    window_length=10
    def compute(self, today, asset_ids, out, values):
        # Calculates the column-wise standard deviation, ignoring NaNs
        out[:] = np.min(values, axis=0)

# Create a volume and price filter that filters for stocks in the top 30%.
# We multiply by price to rule out penny stocks that trade in huge volume.
volume_min = MinVolume()
price = EquityPricing.close.latest
univ_filter = ((price * volume_min).percentile_between(70, 100, mask=(volume_min > 0)))

Enter your alpha factor here. Make sure to delete the following cell before making your submission!

In [None]:
# Our alpha factor is a size-based factor.
alpha_factor = -factset.Fundamentals.mkt_val.latest.log1p()
alpha_winsorized = alpha_factor.winsorize(min_percentile=0.05,
                                          max_percentile=0.95,
                                          mask=univ_filter)
    
# Zscore to get long and short (positive and negative) alphas to use as weights
alpha_zscore = alpha_winsorized.zscore()

In [None]:
net_inc_cf_qf =  factset.Fundamentals.net_inc_cf_qf.latest
mkt_val_public = factset.Fundamentals.mkt_val_public.latest
per = mkt_val_public / net_inc_cf_qf

fq1_eps_cons_mean = PeriodicConsensus.slice('EPS', 'qf', 1).mean.latest
close = EquityPricing.close.latest
per_predict = close / fq1_eps_cons_mean  

# Acutual 
fq0_eps_act = Actuals.slice('EPS', 'qf', 0).actual_value.latest
fq1_eps_act = Actuals.slice('EPS', 'qf', -1).actual_value.latest
fq2_eps_act = Actuals.slice('EPS', 'qf', -2).actual_value.latest
fq3_eps_act = Actuals.slice('EPS', 'qf', -3).actual_value.latest

myfilter = (fq0_eps_act > fq1_eps_act) #& (fq1_eps_act > fq2_eps_act)


alpha_factor =  fq0_eps_act / fq1_eps_act
alpha_winsorized = alpha_factor.winsorize(min_percentile=0.05,
                                          max_percentile=0.95,
                                          mask=univ_filter #& myfilter
                                         )

alpha_zscore = alpha_winsorized.zscore()


In [None]:
# Our alpha factor is a size-based factor.
# from https://www.quantopian.com/posts/arithmetic-on-pipeline-factors
# from https://www.quantopian.com/docs/data-reference/estimates_long_term
import quantopian.pipeline.data.factset.estimates as fe

# Slice the LongTermConsensus dataset family into datasets
# for price target and long term EPS growth.

# 6-12ヶ月の株価予測
price_tgt_cons = fe.LongTermConsensus.slice('PRICE_TGT')
# 3-5年のEPSの成長予測
eps_gr_cons = fe.LongTermConsensus.slice('EPS_LTG')

# Get the latest mean consensus price target and EPS growth.
price_tgt_mean = price_tgt_cons.mean.latest
price_tgt_low = price_tgt_cons.low.latest
mean_low = (price_tgt_mean + price_tgt_low) / 2 
eps_gr_mean = eps_gr_cons.mean.latest

# Define an estimated price growth factor by taking the relative
# difference between the 6-12 month price target and yesterday's
# close price.
# 終値
yesterday_close = EquityPricing.close.latest

fq1_eps_cons_mean = PeriodicConsensus.slice('EPS', 'qf', 1).mean.latest
fq0_eps_act = Actuals.slice('EPS', 'qf', 0).actual_value.latest

per_predict = yesterday_close / fq1_eps_cons_mean  


# 株価予測と終値の比較()
alpha_factor_est_price_growth = np.log1p((price_tgt_mean - yesterday_close) / yesterday_close)
# 時価総額
mkt_val = factset.Fundamentals.mkt_val.latest.log10() 
#
alpha_factor_roe = factset.Fundamentals.roe_af.latest.log1p()

# alpha_factor = 0.4 * alpha_factor_mkt_val + \
#                0.6 * (20 * alpha_factor_est_price_growth + alpha_factor_roe)
#バックテストしながら適当に重みづけ Searching appropriate weight with Backtesting

alpha_factor = price_tgt_mean / yesterday_close #* alpha_factor_roe
alpha_factor = mean_low / yesterday_close


alpha_winsorized = alpha_factor.winsorize(min_percentile=0.05,
                                          max_percentile=0.95,
                                          mask=univ_filter)

alpha_zscore = alpha_winsorized.zscore()




In [None]:
# Call evaluate_factor on our factor to get Alphalens-formatted data.
al_data = evaluate_factor(
    alpha_zscore, #alpha_factor, #alpha_zscore, 
    JP_EQUITIES, 
    '2015-06-1', #'2015-06-1', 
    '2018-10-1',
    factor_screen=univ_filter,
    returns_lengths=range(1, 15),
)


In [None]:
fig, factor_stats = plot_overview_tear_sheet(al_data);

In [None]:
factor_stats["factor_data_total"]

In [None]:


# PER 
net_inc_cf_qf =  factset.Fundamentals.net_inc_cf_qf.latest
mkt_val_public = factset.Fundamentals.mkt_val_public.latest
mini_mkt_val_public =  (10000000000 < mkt_val_public) & (mkt_val_public < 30000000000 )
per = mkt_val_public / net_inc_cf_qf

# 予想PER 
# 次の四半期のEPS予想と
fq1_eps_cons_mean = PeriodicConsensus.slice('EPS', 'qf', 1).mean.latest
close = EquityPricing.close.latest
per_predict = close / fq1_eps_cons_mean   

# Acutual 
fq0_eps_act = Actuals.slice('EPS', 'qf', 0).actual_value.latest
fq1_eps_act = Actuals.slice('EPS', 'qf', -1).actual_value.latest
fq2_eps_act = Actuals.slice('EPS', 'qf', -2).actual_value.latest
fq3_eps_act = Actuals.slice('EPS', 'qf', -3).actual_value.latest

# 経常利益
ordinary_inc_qf =  factset.Fundamentals.ordinary_inc_qf.latest
alpha_factor_mkt_val = factset.Fundamentals.mkt_val.latest.log10() 


factor_pipe = Pipeline(
    {'net_inc_cf_qf': net_inc_cf_qf, 
     'mkt_val_public': mkt_val_public, 
     'alpha_factor_mkt_val':alpha_factor_mkt_val,
     'per': per, 
     'fq1_eps_cons_mean':fq1_eps_cons_mean,
     'close': close,
     'per_predict':per_predict, 
     'fq0_eps_act':fq0_eps_act,
     'fq1_eps_act':fq1_eps_act,
     'fq2_eps_act':fq2_eps_act,
     'fq3_eps_act':fq3_eps_act,
     'ordinary_inc_qf':ordinary_inc_qf,
     
     
     #'factor_quantile': factor.quantiles(quantiles, mask=factor_screen)
    },
    domain=JP_EQUITIES,
    screen=mini_mkt_val_public,
)
factor_results = run_pipeline(factor_pipe, "2018-1-1", "2018-10-1", chunksize=250)
    

In [None]:
(factor_results["mkt_val_public"].unstack().iloc[1] / 100000000).hist(bins=100)

In [None]:
(factor_results["alpha_factor_mkt_val"].unstack().iloc[0] ).hist(bins=100)