In [None]:
import quantopian.algorithm as algo
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.filters import QTradableStocksUS

from quantopian.algorithm import attach_pipeline, pipeline_output
from quantopian.pipeline.data.user_5e1782a618a716004cc8996f import tnic
from quantopian.pipeline.filters import Q1500US, Q500US
from quantopian.pipeline import CustomFactor
from quantopian.pipeline.data import morningstar
import numpy as np
import pandas as pd
import statsmodels
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint
from operator import itemgetter
from quantopian.pipeline.data.morningstar import Fundamentals
from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.pipeline.domain import US_EQUITIES




def initialize(context):
    # Potential pairs of stocks to trade
    context.pairs_stocks = []
    # Information on potential pairs of stocks to trade
    context.pairs_stocks_info = []
    
    # List of pairs of stocks that will be deleted
    context.pairs_discard = []
    context.pairs_discard_per = []
    context.pairs_discard_old = []
    # Pairs of stocks that are traded
    context.pairs_holding = {}
    # Pairs of stocks that are traded from the previous month
    context.pairs_holding_old = {}
    
    # Some parameters to fetch data
    context.ticker = Fundamentals.symbol.latest
    # Similarity score
    context.score = tnic.score.latest
    # Tickers of companies that a company is linked to
    context.closest_com = tnic.closest_com.latest
    context.unvierse = Q1500US()
    
    # How many trading days we look back when we find cointegrated pairs
    context.stationary_window = 250

    
    attach_pipeline(make_pipeline(context), 'pipeline')
    
    # Update pairs of stocks to trade at the beginning of every month.
    schedule_function(update_pairs,date_rules.month_start(),time_rules.market_open()) 
    
    # Order and rebalance the portfolio everyday.
    schedule_function(check_status,date_rules.every_day(),time_rules.market_open(hours=1)) 
    
    
    # Trading cost module.
    set_commission(commission.PerShare(cost=0.0, min_trade_cost=0))
    #set_slippage(slippage.VolumeShareSlippage(volume_limit=0.50, price_impact=0.0))    
 

    #get historical data 
def hist_price(context, data):
    
    context.pipe_results = pipeline_output('pipeline')
    context.pipe_results.loc[:,"sid"] = context.pipe_results.index

    # Get historical data
    context.price_hist = data.history(context.pipe_results['sid'], "price", context.stationary_window, '1d')
    #print('history')
    
    return None

    # Update pairs of stocks to trade
def update_pairs(context, data):
    
    # Get history price
    hist_price(context, data)
    context.pairs_stocks = []
    context.pairs_stocks_info = []
    
    # Intialize a dict of traded pairs from the preivous month
    for pair, val in context.pairs_holding_old.items():
        order(val[0], -get_order(val[5]).amount)
        order(val[2], -get_order(val[6]).amount)
    context.pairs_holding_old = {}
    
    
    n = len(context.pipe_results['sid'])
    
    # Find cointegrated pairs
    for i in range(n):
        text = context.pipe_results['closest_com'][i]
        if not pd.isnull(text):
            list_com = text.strip('[]').split(', ')
            S1_id = context.pipe_results['sid'][i]
            for j, com in enumerate(list_com):

                S2_id = context.pipe_results[context.pipe_results['ticker'] == com.strip("' '")]['sid']
                
                if not S2_id.empty:
                    S1 = context.price_hist[S1_id]
                    S2 = context.price_hist[S2_id[0]]
                    result = coint(S1, S2)
                    pvalue = result[1]
                    score = float(context.pipe_results['score'][i].strip('[]').split(',')[j].strip("'"))
                    if pvalue < 0.01:
                        result = sm.OLS(S1, S2).fit()
                        weight = result.params[S2_id[0]]
                        diff = S1 - weight*S2
                        pair = (S1_id, S2_id[0])
                        pair_info = (S1_id, S2_id[0], score, 1/(1+weight), weight/(1+weight), diff.mean(), diff.std(), weight)
                        pair_re = (S2_id[0], S1_id)
              
                        if pair_re not in context.pairs_stocks:
                            context.pairs_stocks.append(pair)
                            context.pairs_stocks_info.append(pair_info)
                            # Sort in terms of similarity score of text analysis on 10K
                            context.pairs_stocks_info = sorted(context.pairs_stocks_info, key=itemgetter(2), reverse=True)
        # Get top 20 pairs
    context.pairs_stocks_info = context.pairs_stocks_info[:20]
        

    # Put pairs of stocks that are currently traded to the new dict  
    for pair, val in context.pairs_holding.items():
        context.pairs_holding_old[pair] = val
        
    context.pairs_holding = {}

def make_pipeline(context):


    return Pipeline(
        columns={
            'ticker':context.ticker,
            'score': context.score,   
            'closest_com': context.closest_com,  
        },  
        screen = context.unvierse 
    )

    
    
    # Signal for entering (if the spread between the two stocks is within +- 1.25sd to +- 2.5sd away from the mean you will enter the position)
def sd_signal_open(context, pair, data):
    if data.current(pair[0],'price') - pair[7]*data.current(pair[1],'price') > pair[5] + 1.25*pair[6] and data.current(pair[0],'price') - pair[7]*data.current(pair[1],'price') < pair[5] + 2.5*pair[6] and pair[7]>0 and data.can_trade(pair[0]) and data.can_trade(pair[1]):
        return pair[0], pair[3], pair[1], pair[4], 0
    elif data.current(pair[0],'price') - pair[7]*data.current(pair[1],'price') < pair[5] - 1.25*pair[6] and data.current(pair[0],'price') - pair[7]*data.current(pair[1],'price') > pair[5] - 2.5*pair[6] and pair[7]>0 and data.can_trade(pair[0]) and data.can_trade(pair[1]):
        return pair[1], pair[4], pair[0], pair[3], 1
    else:
        return False
    
    
    # Signal for closing (if the spread between the two stocks is less than 0.25 sd away from the mean or  the mean you will close the position)
def sd_signal_close(context, pair, pair_info, data):
      if data.current(pair[0],'price') - pair[7]*data.current(pair[1],'price') < pair[5] + 0.25*pair[6] and pair_info[4] == 0:
        return True
      if data.current(pair[0],'price') - pair[7]*data.current(pair[1],'price') > pair[5] - 0.25*pair[6] and pair_info[4] == 1:
        return True
       
      else:
        return False
    
    
    # Signal for cutting loss (if the spread between the two stocks is below or higher than the mean by + 1.5sd you will cut the loss)
def cut_loss(context, pair, pair_info, data):
    if data.current(pair[0],'price') - pair[7]*data.current(pair[1],'price') > pair[5] + 1.5*pair[6] and pair_info[4] == 0:
        return True
    if data.current(pair[0],'price') - pair[7]*data.current(pair[1],'price') < pair[5] - 1.5*pair[6] and pair_info[4] == 1:
        return True
    else:
        return False
    
    
    # Signal for cutting pair from the pairs we consider trading (if the spread between the two stocks is below or higher than the mean by + 2.5sd you will not consider the pair anymore to be traded)
def cut_pair(context, pair, data):
    if data.current(pair[0],'price') - pair[7]*data.current(pair[1],'price') > pair[5] + 2.5*pair[6]:
        return True
    if data.current(pair[0],'price') - pair[7]*data.current(pair[1],'price') < pair[5] - 2.5*pair[6]:
        return True
    else:
        return False
    
    
    # Order securities based on signals and rebalance
def check_status(context, data):
    
    for pair in context.pairs_stocks_info:
        if cut_pair(context, pair, data):
            context.pairs_stocks.remove((pair[0], pair[1]))
            context.pairs_discard_per.append(pair)
        if pair in context.pairs_holding:
            if sd_signal_close(context, pair, context.pairs_holding[pair], data):
                order(context.pairs_holding[pair][0], -get_order(context.pairs_holding[pair][5]).amount)
                order(context.pairs_holding[pair][2], -get_order(context.pairs_holding[pair][6]).amount)
                del context.pairs_holding[pair]
            elif cut_loss(context, pair, context.pairs_holding[pair], data):
                order(context.pairs_holding[pair][0], -get_order(context.pairs_holding[pair][5]).amount)
                order(context.pairs_holding[pair][2], -get_order(context.pairs_holding[pair][6]).amount)
                del context.pairs_holding[pair]

        else:
            signal = sd_signal_open(context, pair, data)
            if signal != False:
                num_holding = 10
                long_or_short = signal[4]
                order_id_1 = order_percent(signal[0], -signal[1]/num_holding)
                
                order_id_2 = order_percent(signal[2], signal[3]/num_holding)
                context.pairs_holding[pair] = (signal[0],-signal[1]/num_holding, signal[2],signal[3]/num_holding, long_or_short, order_id_1, order_id_2)
                
    for pair, val in context.pairs_holding_old.items():
        if sd_signal_close(context, pair, val, data):
            order(val[0], -get_order(val[5]).amount)
            order(val[2], -get_order(val[6]).amount)
            context.pairs_discard_old.append(pair)
        elif cut_loss(context, pair, val, data):
            order(val[0], -get_order(val[5]).amount)
            order(val[2], -get_order(val[6]).amount)
            context.pairs_discard_old.append(pair)
            
    for pair in context.pairs_discard_per:
        context.pairs_stocks_info.remove(pair)
    context.pairs_discard_per = []            
    
    for pair in context.pairs_discard_old:
        del context.pairs_holding_old[pair]
    context.pairs_discard_old = []