In [None]:
from datetime import timedelta
from pytz import timezone
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn import preprocessing
from collections import Counter
import talib
import statsmodels.api as sm
import numpy 

import quantopian.algorithm as algo
import quantopian.experimental.optimize as opt

from quantopian.pipeline import Pipeline
from quantopian.pipeline.data import builtin, morningstar as mstar
from quantopian.pipeline.factors import AverageDollarVolume
from quantopian.pipeline.factors.morningstar import MarketCap
from quantopian.pipeline.classifiers.morningstar import Sector

from quantopian.pipeline.filters.morningstar import Q1500US
from quantopian.pipeline.data.sentdex import sentiment
from quantopian.pipeline.data.morningstar import operation_ratios

# Algorithm Parameters

UNIVERSE_SIZE = 500
MIN_MARKET_CAP_PERCENTILE = 50
LIQUIDITY_LOOKBACK_LENGTH = 100

# Constraint Parameters
MAX_GROSS_LEVERAGE = 1.0
MAX_SHORT_POSITION_SIZE = 0.02  # 2%
MAX_LONG_POSITION_SIZE = 0.02   # 2%

# Scheduling Parameters
MINUTES_AFTER_OPEN_TO_TRADE = 60
BASE_UNIVERSE_RECALCULATE_FREQUENCY = 'month_start'  # {week,quarter,year}_start are also valid


def initialize(context):
    set_slippage(slippage.FixedSlippage(spread=0.00))  
    set_commission(commission.PerShare(cost=0, min_trade_cost=0))  
    
    testing_factor1 = operation_ratios.revenue_growth.latest
    testing_factor2 = operation_ratios.operation_margin.latest
    testing_factor3 = sentiment.sentiment_signal.latest
    
    universe = (Q1500US() & 
                testing_factor1.notnull() &
               testing_factor2.notnull() &
               testing_factor3.notnull())
    testing_factor1 = testing_factor1.rank(mask=universe, method ='average')
    testing_factor2 = testing_factor2.rank(mask=universe, method ='average')
    testing_factor3 = testing_factor3.rank(mask=universe, method ='average')
    
    combined_alpha = testing_factor1 + testing_factor2 +testing_factor3

    
    pipe = Pipeline(
        columns={
            'alpha': combined_alpha,
            'sector': Sector(),
        },
        # combined_alpha will be NaN for all stocks not in our universe,
        screen=combined_alpha.notnull() & Sector().notnull(),
    )
    algo.attach_pipeline(pipe, 'pipe')

    # Schedule a function, 'do_portfolio_construction', to run once a week
    # ten minutes after market open.
    algo.schedule_function(
        do_portfolio_construction,
        date_rule=algo.date_rules.week_start(),
        time_rule=algo.time_rules.market_open(minutes=MINUTES_AFTER_OPEN_TO_TRADE),
        half_days=False,
    )
    
    return pipe

def before_trading_start(context, data):
    # Call pipeline_output in before_trading_start so that pipeline
    # computations happen in the 5 minute timeout of BTS instead of the 1
    # minute timeout of handle_data/scheduled functions.
    context.pipeline_data = algo.pipeline_output('pipe')
    
def do_portfolio_construction(context, data):
    pipeline_data = context.pipeline_data
    todays_universe = pipeline_data.index

    objective = opt.MaximizeAlpha(pipeline_data.alpha)

    constrain_gross_leverage = opt.MaxGrossLeverage(MAX_GROSS_LEVERAGE)

    constrain_pos_size = opt.PositionConcentration.with_equal_bounds(
        -MAX_SHORT_POSITION_SIZE,
        MAX_LONG_POSITION_SIZE,
    )


    market_neutral = opt.DollarNeutral()
    
    # Constrain ourselve to have a net leverage of 0.0 in each sector.
    sector_neutral = opt.NetPartitionExposure.with_equal_bounds(
        labels=pipeline_data.sector,
        min=-0.0001,
        max=0.0001,
    )

    # Run the optimization. This will calculate new portfolio weights and
    # manage moving our portfolio toward the target.
    algo.order_optimal_portfolio(
        objective=objective,
        constraints=[
            constrain_gross_leverage,
            constrain_pos_size,
            market_neutral,
            sector_neutral,
        ],
        universe=todays_universe,
    )


def myfunc(context, data):
        price_history = data.history(context.security_list, fields="price", bar_count=100, frequency="1d")
        
        try: 
            # For loop for each stock traded everyday:
            for s in context.security_list:
                
                start_bar = context.feature_window
                price_list = price_history[s].tolist()
                past = data.current(s,'past_data')
                pastlist=custom_split(past)
                #print isinstance(past, str)
                #print isinstance(custom_split(past), list)
                 
                print pastlist 
                print len(past)
                print len(pastlist)
                print len(price_list)
                #print past[1:-1]
                
                X = []
                y = []
        
                bar= start_bar
                
                # Loop for each machine learning data set
                while bar < len(price_list)-1:
   
                # print s," price: ",data.history(s, 'price', 100 , "1d")
                    try: 
                        end_price = price_list[bar]
                        start_price = price_list[bar-1]
                
                        features = pastlist[(bar-3)*4: bar*4]
                        # Featuers are the attribute value used for machine learning.
                        #print(features)
                
                        if end_price > start_price:
                            label = 1
                        else:
                            label = -1
                        # Label is the indicator of whether this stock will rise or fall
                        bar +=1 
                
                        X.append(features)
                        y.append(label)
                    
                        #print X 
                        #print y
             
                    except Exception as e:
                
                        bar +=1
                        print(('feature creation', str(e)))
                
                print ('len(X1)',len(X))
                
                # Call the machined learning model
                clf1 = RandomForestClassifier(n_estimators=100)
                clf2 = LinearSVC()
                clf3 = NuSVC()
                clf4 = LogisticRegression()
                
                # Prepare the attribute information for prediction
                current_features=pastlist[384:396]
                
                X.append(current_features)
                print ('len(X2)',len(X))
                
                # Recall all the data
                X = preprocessing.scale(X)
        
                current_features = X[-1:]
                X = X[:-1]
                
                print current_features
                print ('len(X)',len(X))
                print ('len(y)',len(y))
                
                # Build the model
                clf1.fit(X,y)
                clf2.fit(X,y)
                clf3.fit(X,y)
                clf4.fit(X,y)
        
                # Predict the results 
                p1 = clf1.predict(current_features)[0]
                p2 = clf2.predict(current_features)[0]
                p3 = clf3.predict(current_features)[0]
                p4 = clf4.predict(current_features)[0]
         
                # If 3 out of 4 prediction votes for one same results, this results will be promted to be the one I will use. 
                if Counter([p1,p2,p3,p4]).most_common(1)[0][1] >= 3:
                    p = Counter([p1,p2,p3,p4]).most_common(1)[0][0]
            
                else: 
                    p = 0
            
                print(('Prediction',p))         
                
                current_price = data.current(s, 'price')
                current_position = context.portfolio.positions[s].amount
                cash = context.portfolio.cash
                
                # Add one more feature: moving average
                print('price_list', price_list)
                sma_50 = numpy.mean(price_list[-50:])
                sma_20 = numpy.mean(price_list[-20:])
                print('sma_20', sma_20)
                print('sma_50', sma_50)
                
                open_orders = get_open_orders()
                
                # Everyday's trading activities: 
                if (p == 1) or (sma_20 > sma_50):
                    if s not in open_orders:
                        order_target_percent(s, context.weight, style=StopOrder(context.stop_loss_pct*current_price))
                        cash-=context.investment_size
                elif (p == -1) or (sma_50 > sma_20):
                    if s not in open_orders:
                        order_target_percent(s,-context.weight)
       
        except Exception as e:
            print(str(e))    
    
def handle_data(context, data):
    #Plot variables at the end of each day.
    
    long_count = 0
    short_count = 0

    for position in context.portfolio.positions.itervalues():
        if position.amount > 0:
            long_count += 1
        if position.amount < 0:
            short_count += 1
            
    record(num_long=long_count, num_short=short_count, leverage=context.account.leverage)