In [15]:
%matplotlib inline

import pandas as pd
import numpy as np
import datetime as dt
import os
from util import get_data, plot_data
import matplotlib.pyplot as plt
import seaborn as sns

    

def compute_portvals2(orders_file="./orders/orders.csv", start_val=1000000):
    orders_df = pd.read_csv(orders_file, index_col='Date', parse_dates=True, na_values=['nan'])

    # GET PRICES OF ALL USED SYMBOLS
    start_date = orders_df.index[0]
    end_date = orders_df.index[-1]
    list_symbols = [i for i in orders_df.Symbol.unique()]
    all_symbols = get_data(list_symbols, pd.date_range(start_date, end_date))
    orders_df_full = pd.DataFrame(index=all_symbols.index)  # SAVE ALL DAYS DATA

    # ADD ALL SYMBOLS IN orders_df
    for sym in list_symbols:
        orders_df.ix[0, sym] = "0"
        orders_df_full.ix[0, sym] = "0"

    # POPULATE THE
    orders_df.ix[0, "cash"] = start_val
    orders_df.ix[0, "value"] = start_val
    orders_df_full.ix[0, "cash"] = start_val
    orders_df_full.ix[0, "Symbol"] = ''
    orders_df_full.ix[0, "Order"] = ''
    orders_df_full.ix[0, "Shares"] = ''
    orders_df_full.ix[0, "stock_price"] = ''
    orders_df_full.ix[0, "cash_used"] = ''
    orders_df_full.ix[0, "value"] = ''
    orders_df_full.ix[0, "leverage"] = ''
    orders_df_full = orders_df_full.fillna(0)
    orders_df = orders_df.fillna(0)

    # SAME DAY ORDER
    for i in range(orders_df.shape[0]):
        if i > 0:
            if orders_df.index[i] == orders_df.index[i - 1] and orders_df.ix[i, "Symbol"] == orders_df.ix[
                        i - 1, "Symbol"]:
                sym = orders_df.ix[i, "Symbol"]
                prev_add_amt = float(orders_df.ix[i - 1, "Shares"]) if orders_df.ix[
                                                                           i - 1, 'Order'] == 'BUY' else -float(
                    orders_df.ix[i - 1, "Shares"])
                add_amt = float(orders_df.ix[i, "Shares"]) if orders_df.ix[i, 'Order'] == 'BUY' else -float(
                    orders_df.ix[i, "Shares"])

                orders_df.ix[i - 1, "Shares"] = 0
                final_amt = add_amt + prev_add_amt
                orders_df.ix[i, "Shares"] = np.absolute(final_amt)
                orders_df.ix[i, "Order"] = 'BUY' if final_amt >= 0 else 'SELL'

    # COMPRESS ORDER
    for i in range(orders_df.shape[0]):
        sym = orders_df.ix[i, "Symbol"]
        add_amt = float(orders_df.ix[i, "Shares"]) if orders_df.ix[i, 'Order'] == 'BUY' else -float(
            orders_df.ix[i, "Shares"])
        orders_df.ix[orders_df.index[i], sym] = add_amt

    # REMOVE DUPLICATE DAYS
    orders_df = orders_df.groupby(orders_df.index).first()
    orders_df.ix[:, 'date1'] = orders_df.index
    orders_df_full.ix[:, 'date1'] = orders_df_full.index

    # COUNT ORDER VALUE
    for i in range(orders_df.shape[0]):
        stock_value = 0
        leverage_stocks = 0
        for sym in list_symbols:
            stock_value = stock_value + float(orders_df.ix[i, sym]) * float(
                all_symbols.ix[orders_df.index[i], sym]) * -1
            leverage_stocks = leverage_stocks + np.absolute(float(orders_df.ix[i, sym])) * float(
                all_symbols.ix[orders_df.index[i], sym])
            orders_df.ix[i, sym + '_p'] = float(orders_df.ix[i, sym]) * float(
                all_symbols.ix[orders_df.index[i], sym]) * -1

        orders_df.ix[i, 'cash_impact'] = stock_value
        orders_df.ix[i, 'leverage_stocks'] = leverage_stocks

    def get_order_values(df_row):
        stock_value = 0
        leverage_stocks = 0
        for sym in list_symbols:
            if pd.isnull(df_row[sym]) == False:
                stock_value = stock_value + float(df_row[sym]) * float(all_symbols.ix[df_row.ix['date1'], sym])
                leverage_stocks = leverage_stocks + np.absolute(
                    float(df_row[sym]) * float(all_symbols.ix[df_row.ix['date1'], sym]))
        return stock_value, leverage_stocks

    leverage = 0
    stock_value = 0
    cash = start_val

    # init first row
    orders_df_full.ix[0, :] = orders_df.ix[0, :]
    current_stock_value, current_leverage = get_order_values(orders_df_full.loc[orders_df_full.index[0], :])
    orders_df_full.ix[0, "cash"] = orders_df_full.ix[0, "cash"] + orders_df.ix[0, 'cash_impact']
    orders_df_full.ix[0, "value"] = current_stock_value + orders_df_full.ix[0, "cash"]
    orders_df_full.ix[0, "leverage"] = current_leverage / orders_df_full.ix[0, "value"]
    if orders_df_full.ix[0, "leverage"] > 1.5:
        for sym in list_symbols:
            orders_df_full.ix[0, sym] = 0
        orders_df_full.ix[0, 'overleverage'] = orders_df_full.ix[0, "leverage"]
        orders_df_full.ix[0, "cash"] = start_val

    for i in range(1, orders_df_full.shape[0]):

        # copy down all symbol holdings
        for sym in list_symbols:
            orders_df_full.ix[i, sym] = orders_df_full.ix[i - 1, sym]
            orders_df_full.ix[i, "cash"] = orders_df_full.ix[i - 1, "cash"]

        # ADD NEW ORDER STOCKS
        if orders_df_full.index[i] in orders_df.index:
            for sym in list_symbols:
                orders_df_full.ix[i, sym] = float(orders_df_full.ix[i, sym]) + float(
                    orders_df.ix[orders_df_full.index[i], sym])
            orders_df_full.ix[i, "cash"] = orders_df_full.ix[i, "cash"] + orders_df.ix[
                orders_df_full.index[i], 'cash_impact']
            # check overleverage
            current_stock_value, current_leverage = get_order_values(orders_df_full.loc[orders_df_full.index[i], :])
            order_total_value = current_stock_value + orders_df_full.ix[i, "cash"]
            order_total_leverage = current_leverage / order_total_value
            if order_total_leverage > 1.5:
                for sym in list_symbols:
                    orders_df_full.ix[i, sym] = orders_df_full.ix[i - 1, sym]
                orders_df_full.ix[i, 'overleverage'] = order_total_leverage
                orders_df_full.ix[i, "cash"] = orders_df_full.ix[i - 1, "cash"]

        current_stock_value, current_leverage = get_order_values(orders_df_full.loc[orders_df_full.index[i], :])
        orders_df_full.ix[i, "value"] = current_stock_value + orders_df_full.ix[i, "cash"]
        orders_df_full.ix[i, "leverage"] = current_leverage / orders_df_full.ix[i, "value"]

    #orders_df_full.to_csv("orders_df_full.csv")
    return orders_df_full.value


def test_code(DRAW=True,VERBOSE=False,YBUY= 0.1,YSELL= -0.1,BAGS = 20):
    of = "qqq.csv"
    sv = 100000

    # Process orders
    portvals = compute_portvals2(orders_file = of, start_val = sv)
    portvals = portvals/portvals.ix[0,0]
    
    if isinstance(portvals, pd.DataFrame):
        portvals = portvals[portvals.columns[0]] # just get the first column
    else:
        "warning, code did not return a DataFrame"
            
    print 'Cumulative return of the benchmark: ',portvals[-1]

      

test_code(VERBOSE=True,DRAW=True)
    




Cumulative return of the benchmark:  0.0734463536454


In [4]:
# ML4T MC3P4 Testing Utility
# Christian Stober

import pandas as pd
import datetime as dt
import util
import StrategyLearner as sl
import marketsim as ms
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import traceback
import pdb
import time

# overwrite to allow testing vs. old project specs
sl.TRADE_SIZE = 200

# how much do you have to beat another metric by to "significantly outperform" it?
OUTPERFORM = 0.1  # 10%

IN_SAMPLE_START_DATE = dt.datetime(2008, 1, 1)
IN_SAMPLE_END_DATE = dt.datetime(2009, 12, 31)
OUT_OF_SAMPLE_START_DATE = dt.datetime(2010, 1, 1)
OUT_OF_SAMPLE_END_DATE = dt.datetime(2011, 12, 31)
START_CASH = 100000

ORDERS_FNAME = 'orders.csv'
VERBOSE = False

# launch debugger on exceptions
DEBUGGER = False

# plotting parameters
PLOT_RESULTS = False  # set to True to generate performance plots
PORTFOLIO_COLOR = 'b'  # blue
BENCHMARK_COLOR = 'k'  # black
TARGET_COLOR = '#ff0000'  # red
SAVE_PLOTS = False  # True = save; False = show plots one by one
PLOT_FNAME = 'test%02d.png'

# Tests from rubric 4/14/17
# Test 1 - For ML4T-220, the trained policy should provide a cumulative return greater than 400% in sample (20 points)
# Test 2 - For ML4T-220, the trained policy should provide a cumulative return greater than 100% out of sample (20 points)
# Test 3 - For IBM, the trained policy should significantly outperform the benchmark in sample (20 points)
# Test 4 - For SINE_FAST_NOISE, the trained policy should provide a cumulative return greater than 200% in sample (20 points)
# Test 5 - For UNH, the trained policy should significantly outperform the benchmark in sample (20 points)

TESTS = [
    # test number, train symbol, train start, train end, test symbol, test start, test end, target
    # target = absolute growth goal; if none then goal is to significantly outperform benchmark
    # test numbers must be unique
    [1, 'ML4T-220', IN_SAMPLE_START_DATE, IN_SAMPLE_END_DATE, 'ML4T-220', IN_SAMPLE_START_DATE, IN_SAMPLE_END_DATE,
     1.0],
    [2, 'ML4T-220', IN_SAMPLE_START_DATE, IN_SAMPLE_END_DATE, 'ML4T-220', OUT_OF_SAMPLE_START_DATE,
     OUT_OF_SAMPLE_END_DATE, 1.0],
    [3, 'AAPL', IN_SAMPLE_START_DATE, IN_SAMPLE_END_DATE, 'AAPL', IN_SAMPLE_START_DATE, IN_SAMPLE_END_DATE, None],
    [4, 'SINE_FAST_NOISE', IN_SAMPLE_START_DATE, IN_SAMPLE_END_DATE, 'SINE_FAST_NOISE', IN_SAMPLE_START_DATE,
     IN_SAMPLE_END_DATE, 2.0],
    [5, 'UNH', IN_SAMPLE_START_DATE, IN_SAMPLE_END_DATE, 'UNH', IN_SAMPLE_START_DATE, IN_SAMPLE_END_DATE, None]]


def run_tests():
    results = pd.DataFrame(index=[test[0] for test in TESTS], columns=['return', 'target', 'status'])
    results.index.name = 'Test No.'


    for testno, train_symbol, train_start_date, train_end_date, test_symbol, test_start_date, test_end_date, target in TESTS:
        start = time.time()
        # instantiate a strategy learner
        # could probably do this just once beforehand, unless addEvidence is implemented to be truly additive (vs. destructive)
        learner = sl.StrategyLearner(verbose=VERBOSE)

        learner.addEvidence(symbol=train_symbol, sd=train_start_date, ed=train_end_date, sv=START_CASH)
        trades = learner.testPolicy(symbol=test_symbol, sd=test_start_date, ed=test_end_date, sv=START_CASH)

        # sanity check - this should be impossible
        maxposition = int(abs(trades.cumsum()).max())
        if maxposition > sl.TRADE_SIZE:
            raise ValueError("Maximum allowable position of %d exceeded in Test %d" % (sl.TRADE_SIZE, testno))


        # generate orders file
        with open(ORDERS_FNAME, 'w') as ofp:
            ofp.write('Date,Symbol,Order,Shares\n')  # header
            for date, delta in trades.itertuples():
                if delta == 0:
                    continue
                date = str(date).split()[0]
                if delta > 0:
                    action = 'BUY'
                else:
                    action = 'SELL'
                quantity = abs(delta)
                ofp.write('%s,%s,%s,%d\n' % (date, test_symbol, action, quantity))

        portfolio_results = ms.compute_portvals(orders_file=ORDERS_FNAME, start_val=START_CASH)
        portfolio_results /= portfolio_results.ix[0]  # normalize

        if target is None or PLOT_RESULTS:
            benchmark = util.get_data([test_symbol], pd.date_range(test_start_date, test_end_date))
            benchmark = benchmark[[test_symbol]]  # del benchmark['SPY']
            benchmark *= sl.TRADE_SIZE  # account for how many shares we have (scaling from one share)
            benchmark += START_CASH - benchmark.ix[0]  # add in the cash we didn't spend on the lone transaction
            benchmark /= benchmark.ix[0]  # normalize

        cumulative_return = float(portfolio_results.ix[-1]) - 1
        if target is None:
            benchmark_cumulative_return = float(benchmark.ix[-1]) - 1
            target = benchmark_cumulative_return + OUTPERFORM

        if cumulative_return > target:
            status = "passed"
        else:
            status = "failed"

        results.loc[testno] = cumulative_return, target, status
        print("total time", str(time.time() - start))

        if PLOT_RESULTS:
            plt.plot(benchmark, BENCHMARK_COLOR)
            plt.plot(portfolio_results, PORTFOLIO_COLOR)
            plt.axhline(target + 1.0, color=TARGET_COLOR)

            # labels
            plt.title('Test %d: Strategy vs. Benchmark' % testno)
            plt.xlabel('Date')
            plt.ylabel('Normalized Performance')

            # create legend
            benchmark_line = mlines.Line2D([], [], color=BENCHMARK_COLOR, label='Benchmark')
            portfolio_line = mlines.Line2D([], [], color=PORTFOLIO_COLOR, label='Strategy')
            target_line = mlines.Line2D([], [], color=TARGET_COLOR, label='Target Returns')
            plt.legend(handles=[portfolio_line, benchmark_line, target_line], loc='upper left', fancybox=True)

            if SAVE_PLOTS:
                plt.savefig(PLOT_FNAME % testno)
                plt.close()
            else:
                plt.show()
                plt.close()


    print results


if __name__ == '__main__':
    try:
        run_tests()
    except:
        if DEBUGGER:
            traceback.print_exc()
            pdb.post_mortem()
        else:
            raise


('total time', '8.7132461071')
('total time', '10.3126292229')
('total time', '12.2725241184')
('total time', '12.5147309303')
('total time', '19.4685709476')
            return   target  status
Test No.                           
1          1.43775        1  passed
2          1.43211        1  passed
3          0.18582  0.13164  passed
4        -0.933898        2  failed
5          0.08646  0.04952  passed


In [None]:
('total time', '10.174672842')
('total time', '11.1741588116')
('total time', '10.0817780495')
('total time', '9.97241401672')
('total time', '15.1485102177')
            return   target  status
Test No.                           
1          1.17273        1  passed
2         0.866322        1  failed
3          0.19786  0.13164  passed
4        -0.909362        2  failed
5           0.0593  0.04952  passed