In [14]:
from quantopian.pipeline import Pipeline
from quantopian.pipeline.filters import Q500US
from quantopian.pipeline.factors import CustomFactor
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.research import run_pipeline

import alphalens

# These imports can be found in the store panel for each dataset
# (https://www.quantopian.com/data). Note that not all store datasets
# can be used in pipeline yet.
from quantopian.pipeline.data.alpha_vertex import (
    # Top 100 Securities
    precog_top_100 as dataset_100,
    # Top 500 Securities
    precog_top_500 as dataset_500
)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt



class PredictionQuality(CustomFactor):
    """
    create a customized factor to calculate the prediction quality
    for each stock in the universe.
    
    compares the percentage of predictions with the correct sign 
    over a rolling window (3 weeks) for each stock
   
    """
    # data used to create custom factor
    inputs = [dataset_500.predicted_five_day_log_return, USEquityPricing.close]
    
    # change this to what you want
    window_length = 15

    def compute(self, today, assets, out, pred_ret, px_close):
        # actual returns
        px_close_df = pd.DataFrame(data=px_close)
        pred_ret_df = pd.DataFrame(data=pred_ret)
        log_ret5_df = np.log(px_close_df) - np.log(px_close_df.shift(5))

        log_ret5_df = log_ret5_df.iloc[5:].reset_index(drop=True)
        n = len(log_ret5_df)
        
        # predicted returns
        pred_ret_df = pred_ret_df.iloc[:n]

        # number of predictions with incorrect sign
        err_df = (np.sign(log_ret5_df) - np.sign(pred_ret_df)).abs() / 2.0

        # custom quality measure
        pred_quality = (1 - pd.ewma(err_df, min_periods=n, com=n)).iloc[-1].values
        
        out[:] = pred_quality

        
        
class NormalizedReturn(CustomFactor):
    """
    Custom Factor to calculate the normalized forward return 
       
    scales the forward return expecation by the historical volatility
    of returns
    
    """

    # data used to create custom factor
    inputs = [dataset_500.predicted_five_day_log_return, USEquityPricing.close]
    
    # change this to what you want
    window_length = 10

    def compute(self, today, assets, out, pred_ret, px_close):
        # mean return 
        avg_ret = np.nanmean(pred_ret[-1], axis =0)
        
        # standard deviation of returns
        std_ret = np.nanstd(pred_ret[-1], axis=0)

        # normalized returns
        norm_ret = (pred_ret[-1] - avg_ret) / std_ret

        out[:] = norm_ret



START = '2018-02-13'
END = '2018-02-13'

MORNINGSTAR_SECTOR_CODES = {
     -1: 'Misc',
    101: 'Basic Materials',
    102: 'Consumer Cyclical',
    103: 'Financial Services',
    104: 'Real Estate',
    205: 'Consumer Defensive',
    206: 'Healthcare',
    207: 'Utilities',
    308: 'Communication Services',
    309: 'Energy',
    310: 'Industrials',
    311: 'Technology' ,    
}

Collecting quantopian
[31m  Could not find a version that satisfies the requirement quantopian (from versions: )[0m
[31mNo matching distribution found for quantopian[0m


ModuleNotFoundError: No module named 'quantopian'

In [None]:
# get stocks covered in the Q500 that have recent prediction data in AlphaVertex
covered_stocks = Q500US() & dataset_500.predicted_five_day_log_return.latest.notnull()

prediction_quality = PredictionQuality(mask=covered_stocks)
quality = prediction_quality > 0.65
normalized_return = NormalizedReturn(mask=quality)

# create a pipeline of only stocks that are covered above
pipe = Pipeline(
    columns={
        'predicted 5 day returns' : dataset_500.predicted_five_day_log_return.latest,
        'normalized returns': normalized_return,
        'sector' : Sector(mask=covered_stocks)
    },
    screen=covered_stocks
)

# run the pipeline
pipe_output = run_pipeline(pipe, start_date=START, end_date=END)

In [None]:
finance = {}
technology = {}
utilities = {}
energy = {}
for i in range(len(pipe_output['sector'])):
    if(pipe_output['sector'][i] == 103 ):
        finance[pipe_output.index[i]] = pipe_output['predicted 5 day returns'][i]
    elif(pipe_output['sector'][i] == 311 ):
        technology[pipe_output.index[i]] = pipe_output['predicted 5 day returns'][i]
    elif(pipe_output['sector'][i] == 309 ):
        energy[pipe_output.index[i]] = pipe_output['predicted 5 day returns'][i]    
    elif(pipe_output['sector'][i] == 207 ):
        finance[pipe_output.index[i]] = pipe_output['predicted 5 day returns'][i]

In [None]:
sorted(finance.iteritems(), key=lambda (k,v): (v,k))[-10:]
sorted(technology.iteritems(), key=lambda (k,v): (v,k))[-10:]
sorted(utilities.iteritems(), key=lambda (k,v): (v,k))[-10:]
sorted(energy.iteritems(), key=lambda (k,v): (v,k))[-10:]