In [8]:

import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import QuantileTransformer
from joblib import load

from sqlalchemy import create_engine
import pymssql
from datetime import datetime

from alpaca.trading.client import TradingClient
from alpaca.data import StockHistoricalDataClient
from alpaca.data.requests import StockLatestQuoteRequest
from alpaca.trading.requests import MarketOrderRequest, GetAssetsRequest
from alpaca.trading.enums import OrderSide, TimeInForce, AssetClass

KEY = "PKX210XR6N17LF7WIWC8"
SECRET_KEY = "Cc6VXW58xRrsMQ98kbkZvw5dAOKKPMZaiRpLGraG"
numstocks = 200

qt = QuantileTransformer(output_distribution="normal")
pipe = load("files/linear_model_2023-01-20.joblib")

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [2]:
server = 'fs.rice.edu'
database = 'stocks'
username = 'stocks'
password = '6LAZH1'
string = "mssql+pymssql://" + username + ":" + password + "@" + server + "/" + database 
conn = create_engine(string).connect()

unused code to calculate nonsingular covariance matrix when N>T

    def mycov(R):
        U, S, Vh = np.linalg.svd(R-R.mean(), full_matrices=False)
        B = pd.DataFrame(Vh.T, index=rets.columns)
        numdays = R.shape[0]
        C = B @ np.diag(S*S/numdays) @ B.T
        return C + np.diag(R.var() - np.diag(C))

## Get tickers and returns

In [3]:


ticks = pd.read_sql("select ticker from today", conn)
ticks = ticks["ticker"].to_list()

prices = pd.read_sql("select ticker, date, close_ from sep order by ticker, date", conn)
prices = prices[(prices.ticker.isin(ticks)) & (prices.date.astype(str)>="2022-03-14")]
prices = prices.set_index(["ticker", "date"]).unstack()
prices = prices.dropna()

ticks = prices.index.to_list()

rets = prices.T.pct_change().iloc[1:]

## Rank stocks

#### Get features

In [4]:
df = pd.read_sql(
    """
    select ticker, date, bm, mom12m, roeq, mve, famaindustry
    from today
    where price > 5
    """, 
    conn
)
conn.close()


df = df.dropna()
df = df[df.ticker.isin(ticks)]
df = df.set_index("ticker")
df = df.sort_values(by="mve")
df = df.iloc[:-500]

features = ["bm", "mom12m", "roeq"]

#### Predict and rank

In [5]:
trans_features = qt.fit_transform(df[features])
trans_features = pd.DataFrame(trans_features, columns=features)
df["predict"] = pipe.predict(trans_features)
df["rnk"] = df.predict.rank(method="first")

for f in features:
    df["t"+f] = qt.fit_transform(df[f].to_numpy().reshape(-1,1))

## Get data from Alpaca

#### Get account equity

In [9]:
trading_client = TradingClient(KEY, SECRET_KEY, paper=True)
account = trading_client.get_account()
equity = float(account.equity)

#### Get tradeable and shortable stocks

In [10]:
assets = trading_client.get_all_assets()

assets = [
    x for x in assets 
    if (x.asset_class[:]=='us_equity') 
    and (x.symbol in df.index) 
    and (x.status[:]=='active')
]
symbols = [x.symbol for x in assets]
tradable = [x.tradable for x in assets]
shortable = [x.shortable for x in assets]

df["tradable"]= pd.Series(tradable, index=symbols)
df["shortable"] = pd.Series(shortable, index=symbols)

#### Get quotes

In [11]:
data_client = StockHistoricalDataClient(KEY, SECRET_KEY)
params = StockLatestQuoteRequest(symbol_or_symbols=df.index.to_list())
quotes = data_client.get_stock_latest_quote(params)

df["ask"] = [quotes[x].ask_price for x in df.index]
df["bid"] = [quotes[x].bid_price for x in df.index]

#### Get positions

In [12]:
positions = trading_client.get_all_positions()
if len(positions) > 0:
    positions = {x.symbol: int(x.qty) for x in positions}
    positions = pd.Series(positions)
    df["current"] = positions
    df["current"] = df.current.fillna(0)
else:
    df["current"] = 0

### Decide longs and shorts

In [97]:
numlong = 5
numshort = 5
df = df.sort_values(by="rnk")

short_cutoff = df[df.shortable & (df.bid>0)].rnk.iloc[numshort-1]
long_cutoff = df[df.tradable & (df.ask>0)].rnk.iloc[-numlong]
df["short"] = df.shortable & (df.bid>0) & (df.rnk<=short_cutoff)
df["long"] = df.tradable & (df.ask>0) & (df.rnk>=long_cutoff) 

shorts = df[df.short].index.to_list()
longs = df[df.long].index.to_list()

In [98]:
longs

['SBOW', 'GLRE', 'TPH', 'BRY', 'IMMR']

In [99]:
from cvxopt import matrix
from cvxopt.solvers import qp as Solver, options as SolverOptions
SolverOptions['show_progress'] = True



In [112]:
SolverOptions['show_progress'] = True

def gmv(cov, shorts=True):
    n = cov.shape[0]
    Q = matrix(cov, tc="d")
    p = matrix(np.zeros((n, 1)), tc="d")
    G = matrix(np.zeros((n,n)), tc="d") if shorts else matrix(-np.ones((n,n)), tc="d")
    h = matrix(np.zeros((n, 1)), tc="d")
    A = matrix(np.ones((1, n)), tc="d")
    b = matrix(np.ones((1, 1)), tc="d")
    return Solver(Q, p, G, h, A, b)

cov = 100*100*rets[longs+shorts].cov().to_numpy()

gmv(cov, shorts=False)

     pcost       dcost       gap    pres   dres
 0:  5.1285e-04 -9.6504e-04  3e-03  1e+00  3e+01
 1:  7.1510e-05  1.0726e-03  3e-05  1e+00  3e-01
 2:  7.1455e-05  1.0971e-03  5e-07  1e+00  6e-02
 3:  1.7149e-03  5.9360e-03  4e-07  1e+00  2e-01
 4:  4.5827e+00 -4.7314e+00  1e+01  9e-01  2e+00
 5:  3.8652e-01 -1.7238e+00  1e+00  5e-01  6e-01
 6:  1.2458e-02 -1.4264e+00  2e-01  9e-01  1e-01
 7:  4.6785e-05 -1.4149e+00  4e-03  1e+00  3e-02
 8:  1.8117e-06 -1.4149e+00  4e-05  1e+00  9e-03
 9:  1.8168e-06 -1.4149e+00  4e-07  1e+00  9e-03
10:  1.8169e-06 -1.4149e+00  4e-09  1e+00  9e-03
11:  1.8169e-06 -1.4149e+00  4e-11  1e+00  9e-03
12:  1.8169e-06 -1.4149e+00  4e-13  1e+00  9e-03
13:  1.8169e-06 -1.4149e+00  4e-15  1e+00  9e-03
14:  1.8169e-06 -1.4149e+00  4e-17  1e+00  9e-03
15:  1.8169e-06 -1.4149e+00  5e-19  1e+00  9e-03
Terminated (singular KKT matrix).


{'x': <10x1 matrix, tc='d'>,
 'y': <1x1 matrix, tc='d'>,
 's': <10x1 matrix, tc='d'>,
 'z': <10x1 matrix, tc='d'>,
 'status': 'unknown',
 'gap': 4.558597551933468e-19,
 'relative gap': None,
 'primal objective': 1.8169448269280287e-06,
 'dual objective': -1.4149026041905612,
 'primal infeasibility': 1.0,
 'dual infeasibility': 0.009029208167914748,
 'primal slack': 3.155326180177797e-19,
 'dual slack': 0.14143571390494783,
 'iterations': 15}

In [103]:
ratio = 0.5

numlongs = len(longs)
numshorts = len(shorts)
num = numlongs+numshorts
R = rets[longs+shorts]
if num <= R.shape[0]:
    cov = R.cov().to_numpy()
else:
    U, S, Vh = np.linalg.svd(R-R.mean(), full_matrices=False)
    C = Vh.T[:, :20] @ np.diag(S[:20]*S[:20]/R.shape[0]) @ Vh[:20, :]
    cov = C + np.diag(R.var() - np.diag(C))
Q = matrix(100*100*cov, tc="d")
p = matrix(np.zeros(num), (num, 1), tc="d")

# long wts >= 0, short wts <= 0
diag = np.concatenate((-np.ones(numlongs), np.ones(numshorts)))
G = matrix(np.diag(diag), tc="d")
h = matrix(np.zeros(num), (num, 1), tc="d")

# long wts sum to 1+ratio, short wts sum to -ratio
    
A = np.zeros((2, num))
A[0, :numlongs] = 1
A[1, -numshorts:] = 1
A = matrix(A, tc="d")
b = matrix([1+ratio, -ratio], (2, 1), tc="d")
sol = Solver(Q, p, G, h, A, b)

     pcost       dcost       gap    pres   dres
 0:  6.0617e-01  1.9232e+00  2e+01  4e+00  6e+00
 1:  1.9418e+00  3.6923e+00  4e+00  1e+00  9e+00
 2:  2.6114e+00  5.1028e+01  4e+00  1e+00  1e+02
 3:  3.3761e+00  1.7798e+05  5e+00  1e+00  5e+05
 4:  4.3422e+00  6.6345e+10  2e+03  1e+00  2e+11
 5:  5.5148e+00  2.4732e+18  7e+08  1e+00  7e+18
 6:  6.8924e+00  9.2196e+27  2e+16  1e+00  2e+28
 7:  8.4692e+00  3.4371e+39  9e+25  1e+00  9e+39
 8:  1.0234e+01  1.2795e+53  3e+37  1e+00  3e+53
 9:  1.1588e+01  3.3605e+68  9e+50  1e+00  9e+68
10:  4.2264e+01  1.2889e+87  2e+72  1e+00  3e+87
11:  4.5840e+01  7.6368e+101  1e+85  1e+00 2e+102
12:  4.5862e+02  7.6752e+101  1e+85  9e+00 2e+102
13:  1.6070e+04  8.5669e+101  1e+85  5e+01 2e+102
14:  4.8901e+04  1.3929e+121  2e+104  8e+01 4e+121
15:  2.0130e+06  1.3929e+121  2e+104  8e+02 4e+121
16:  1.1534e+13  1.3929e+121  2e+104  1e+06 4e+121
17:  1.5723e+13  2.1233e+144  4e+127  1e+06 6e+144
18:  6.9214e+14  2.1233e+144  4e+127  1e+07 6e+144
19:  2.9

ValueError: domain error

In [95]:
np.array(sol["x"]).flatten()

array([ 0.78839543,  0.71160457, -0.19494301, -0.30505699])

In [30]:
sol

{'x': <400x1 matrix, tc='d'>,
 'y': <2x1 matrix, tc='d'>,
 's': <400x1 matrix, tc='d'>,
 'z': <400x1 matrix, tc='d'>,
 'status': 'unknown',
 'gap': 2.5762998102341293e+30,
 'relative gap': 5.507118002528709e-06,
 'primal objective': 1.968716444471006e+30,
 'dual objective': 4.678127123935182e+35,
 'primal infeasibility': 1.74134513748151e+17,
 'dual infeasibility': 7.970446636185377e+35,
 'primal slack': 3.0079283894484595e-17,
 'dual slack': 1110890116732.6858,
 'iterations': 100}

## Trade

#### Calculate target positions

In [None]:
long_per_stock = 1.3*equity / numstocks
short_per_stock = 0.3*equity / numstocks

df = df.sort_values(by="rnk")

try:
    short_cutoff = df[df.shortable & (df.bid>0)].rnk.iloc[numstocks-1]
    long_cutoff = df[df.tradable & (df.ask>0)].rnk.iloc[-numstocks]
    df["target"] = np.where(
        df.shortable & (df.bid>0) & (df.rnk<=short_cutoff),
        -short_per_stock/df.bid, 
        0
    )
    df["target"] = np.where(
        df.tradable & (df.ask>0) & (df.rnk>=long_cutoff), 
        long_per_stock/df.ask, 
        df.target
    )
    df["target"] = df.target.astype(int)
except:
    df["target"] = 0

#### Calculate trades

Using a simple but suboptimal protocol: trade to target positions without trying to minimize the number of round trips we might eventually make.

In [None]:
df["trade"] = df.target - df.current

#### Make trades

In [None]:
for tick in df.index: 
    if df.loc[tick, "trade"]<0:
        try:
            market_order_data = MarketOrderRequest(
                symbol=tick,
                qty=-df.loc[tick, "trade"],
                side=OrderSide.SELL,
                time_in_force=TimeInForce.DAY
            )
            market_order = trading_client.submit_order(
                order_data=market_order_data
            )
        except:
            print(f"sell order for {tick} failed")
    elif df.loc[tick, "trade"]>0:
        try:
            market_order_data = MarketOrderRequest(
                symbol=tick,
                qty=df.loc[tick, "trade"],
                side=OrderSide.BUY,
                time_in_force=TimeInForce.DAY
            )
            market_order = trading_client.submit_order(
                order_data=market_order_data
            )
        except:
            print(f"buy order for {tick} failed")



## Save data

In [None]:
today = datetime.today().strftime("%Y-%m-%d")

df["date"] = today

try:
    d = pd.read_csv("files/trade_data.csv", index_col="ticker")
    d = d[d.date != today]
    df = pd.concat((d, df))
    df.to_csv("files/trade_data.csv")
except:
    df.to_csv("files/trade_data.csv")

In [None]:
account = trading_client.get_account()
account = dict(account)
account = pd.DataFrame(pd.Series(account)).T
account["date"] = today

try:
    d = pd.read_csv("files/account.csv")
    d = d[d.date != today]
    account = pd.concat((d, account))
    account.to_csv("files/account.csv")
except:
    account.to_csv("files/account.csv")


In [None]:
positions = trading_client.get_all_positions()
positions = {x.symbol: x.qty for x in positions}
positions = pd.DataFrame(pd.Series(positions))
positions["date"] = today

try:
    d = pd.read_csv("files/positions.csv")
    d = d[d.date != today]
    positions = pd.concat((d, positions))
    positions.to_csv("files/positions.csv")
except:
    positions.to_csv("files/positions.csv")

  positions = pd.DataFrame(pd.Series(positions))
