In [59]:
from datetime import datetime
import numpy as np
import pandas as pd
import string

In [60]:
df = pd.read_csv('data_subset.csv')
df = df.drop('Unnamed: 0', axis=1)

In [61]:
df['year'] = df['year'].map(lambda x: str(x))
df['month'] = df['month'].map(lambda x: str(x))
df['-'] = '-'
df['timestemp'] = df['year'] + df['-'] + df['month']
df['timestemp'] = pd.to_datetime(df['timestemp'])
df = df.set_index('timestemp')
df = df.drop(['active', 'year', 'month', '-', 'date'], axis=1)
df['volume_monthly'] = df['csho_1yr_avg']/4
df['EBIT'] = df['saleq_ttm'] - df['cogsq_ttm'] - df['xsgaq_ttm']
df['EBIT/EV'] = df['EBIT'] / df['entval']

df['indicator'] = df['EBIT/EV'].shift(periods = -12)

df.drop(['gics-sector', 'mom1m', 'mom3m', 'mom6m', 'mom9m', 'mrkcap',
       'entval', 'saleq_ttm', 'cogsq_ttm', 'xsgaq_ttm', 'oiadpq_ttm',
       'niq_ttm', 'cheq_mrq', 'rectq_mrq', 'invtq_mrq', 'acoq_mrq',
       'ppentq_mrq', 'aoq_mrq', 'dlcq_mrq', 'apq_mrq', 'txpq_mrq', 'lcoq_mrq',
       'ltq_mrq', 'seqq_mrq', 'atq_mrq', 'csho_1yr_avg',
       'prccm', 'ajexm', 'EBIT', 'EBIT/EV'], axis=1, inplace=True)

In [62]:
df = df[df.index >datetime(2001,1,1)]

In [63]:
N = len(df['gvkey'].unique())

In [64]:
df = df.pivot(columns = 'gvkey', values = ['adjusted_price','indicator','volume_monthly'])

In [65]:
tickers = df.columns.get_level_values(1).unique().values

In [66]:
n = 10  # top 10 stocks
periods = 100
prices = df['adjusted_price'].values
indicators = df['indicator'].values
volume_monthly = df['volume_monthly'].values

In [67]:
class algo(object):
    def __init__(self, initial_prices, initial_indicators, initial_volume): 
                     # intial_prices, initial_indicators, initial_volume are N-d arrays
        self.months = 0
        self.avail_capital = 100000000
        
        self.price_arr = initial_prices
        self.indicator_arr = initial_indicators
        self.volume_arr  = initial_volume
        # Initialize portfolio
        self.top_indices = np.argsort(-self.indicator_arr)[:n] # argsort is ascending so negate indicator_arr
        
        self.entry_months = np.array([-1]*N) # -1 indicates there is no position so no entry month
        self.entry_months[self.top_indices] = 0
        
        self.shares = np.array([0]*N)
        self.shares[self.top_indices] = (self.avail_capital/n/self.price_arr[self.top_indices]).astype(int)
        self.avail_capital = self.avail_capital - np.sum(self.price_arr*self.shares)
        # Add portfolio value variable
        #self.port_value = []
        
    def process_new_data(self, new_prices, new_indicators, new_volumes):
        self.months += 1
        self.price_arr = np.vstack((self.price_arr, new_prices))
        self.indicator_arr = np.vstack((self.indicator_arr, new_indicators))
        self.volume_arr = np.vstack((self.volume_arr, new_volumes))
        
        self.update_portfolio()
        
        # Update portf_value, prices * positions + avail_capital
        #self.port_value = 
        
    def update_portfolio(self):
        self.new_top_indices = np.argsort(-self.indicator_arr[-1])[:n]
        sell_indices = []
        buy_indices = []
        for x in self.top_indices:# Try to add new indices not currently in portfolio to buy_indices
            if (self.months-self.entry_months[x] >= 12) and (x not in self.new_top_indices): # if held for >=1yr and not in new top indices
                sell_indices += [x]
        
        if sell_indices:
            for y in self.new_top_indices: # Add k best new indices to portfolio, k = number of stocks we will sell
                if y not in self.top_indices:
                    buy_indices += [y]
                    if len(buy_indices) == len(sell_indices): # Stop once we have same number of buy as sell
                        break
        
            self.execute_sell(sell_indices)
            self.execute_buy(buy_indices)
    
    def execute_sell(self, sell_indices):
        print('selling', tickers[sell_indices])
        for i in sell_indices:
            sell_gain = self.price_arr[-1, i] *self.shares[i]
            transaction_cost = 0.01 * self.shares[i]
            slip_cost = 0
            if self.shares[i] >= 0.1 * self.volume_arr[-1, i]:
                slip_cost = self.price_arr[-1, i] * 0.01 * self.shares[i]
            
            self.avail_capital += sell_gain - transaction_cost - slip_cost # Update capital
            self.shares[i] = 0 # Reset shares
            self.entry_months[i] = -1 # Reset entry_mont to -1 to denote no position
            
        
    def execute_buy(self, buy_indices):
        print('buying', tickers[buy_indices])
        for i in buy_indices:
            buy_money = self.avail_capital/len(buy_indices) # Allocate avail_capital equally 
            buy_price = self.price_arr[-1, i] + 0.01
            n_shares = int(buy_money/buy_price)
            if n_shares >= 0.1 * self.volume_arr[-1, i]:
                buy_price = self.price_arr[-1, i] + 0.01 + 0.1*self.price_arr[-1, i]
                n_shares = int(buy_money/buy_price)
                
            self.shares[i] = n_shares
            self.entry_months[i] = self.months
            self.avail_capital = self.avail_capital - n_shares*buy_price
        

In [68]:
m1 = algo(prices[0, :], indicators[0, :], volume_monthly[0, :])



In [69]:
for i in range(1, periods):
    print(i)
    m1.process_new_data(prices[i, :], indicators[i, :], volume_monthly[i, :])

1
2
3
4
5
6
7
8
9
10
11
12
selling [145049 146171   7762  12250  10816  13709  11670   5250]
buying [ 29127 125276 138126 124355  60902  62217 130498  25331]
13
selling [145049 146171   7762  12250  10816  13709  11670   5250]
buying [125276 133468 130498 124355  25331  60902  62217   5342]
14
selling [145049 146171   7762  12250  10816  13709  11670   5250]
buying [125276 138122  25430   5342  10795  27914 136186   5050]
15
selling [145049 146171   7762  12250  10816  13709  11670   5250]
buying [138122 138126  17239  10795   5050  29127  62592  25331]
16
selling [145049 146171   7762  12250  10816  13709  11670   5250]
buying [10795  5050 24293 25331 61490 17239  7344 60902]
17
selling [145049 146171  12250  13709]
buying [25331 10795  5050 24293]
18
selling [145049 146171   7762  12250  13709  11670   5250]
buying [ 5050 10795 24293 60902 25331 61490 31424]
19
selling [145049 146171   7762  12250  13709  11670   5250]
buying [ 5050 10795 24293 61490 60902 24844 25331]
20
selling [14

selling [145049   3469 116104 146171   7762  12250  10816  13709  11670   5250]
buying [118122  62396  27914  14924   7881   2573   1076   2337   4194  13712]
