In [1]:
#create submission file based on file with predictions for each stock
#ranking using returns
#portfolio allocation with simple distribution based on returns clipped to -1,1

In [2]:
import numpy as np
import pandas as pd
from math import fsum

In [3]:
pd.set_option("display.max_columns", 500)
pd.set_option("display.max_rows", 500)

In [4]:
data_in = "../DataRaw/"
data_folder = "../DataWork/"

In [5]:
assets = pd.read_csv(data_in + "M6_Universe.csv")
symbols = assets.symbol.to_list()

In [6]:
preds = pd.read_csv(data_folder + "return_predictions.csv").set_index('Stocks')
preds

Unnamed: 0_level_0,Prediction
Stocks,Unnamed: 1_level_1
ABBV,0.020246
ACN,0.01069
AEP,0.011774
AIZ,0.005801
ALLE,0.021437
AMAT,0.014777
AMP,0.01132
AMZN,0.030448
AVB,0.025454
AVY,0.008364


In [7]:
preds.describe()

Unnamed: 0,Prediction
count,100.0
mean,0.008928
std,0.013603
min,-0.043693
25%,0.003773
50%,0.007735
75%,0.013465
max,0.089452


In [8]:
def invest_decision(x):
    clipped_x = round(x.clip(-1,1),2)
    sumw = fsum(abs(clipped_x))
    dec = round(clipped_x/sumw,2) + 0. #prevent negative 0
    #handle rounding to keep 100% allocation, TBD more refined alg
    over1 = fsum(abs(dec)) - 1
    while abs(over1) > 0:
        if over1 > 0:
            loc2change = dec.argmax()
        else:
            loc2change = dec.argmin()
        dec[loc2change] = np.round(dec[loc2change] - 0.01 * np.sign(over1) + 0.,2)
        over1 = fsum(abs(dec)) - 1
    return dec

In [9]:
decision = invest_decision(preds.Prediction)

In [10]:
#can't get float numerical precision, assertion below seems to suffice, checked in output file in competition
sum(abs(decision.values))

1.0000000000000007

In [11]:
assert(np.sum(abs(decision.values))==1)

In [12]:
df = preds
df['Decision'] = decision

In [13]:
df

Unnamed: 0_level_0,Prediction,Decision
Stocks,Unnamed: 1_level_1,Unnamed: 2_level_1
ABBV,0.020246,0.02
ACN,0.01069,0.01
AEP,0.011774,0.01
AIZ,0.005801,0.01
ALLE,0.021437,0.02
AMAT,0.014777,0.01
AMP,0.01132,0.01
AMZN,0.030448,0.02
AVB,0.025454,0.02
AVY,0.008364,0.01


In [14]:
ranked = pd.qcut(df.Prediction,5,labels=np.arange(1,6))
ranked

Stocks
ABBV      5
ACN       4
AEP       4
AIZ       3
ALLE      5
AMAT      4
AMP       4
AMZN      5
AVB       5
AVY       3
AXP       3
BDX       3
BF-B      4
BMY       2
BR        5
CARR      5
CDW       5
CE        3
CHTR      5
CNC       2
CNP       5
COP       4
CTAS      5
CZR       5
DG        4
DPZ       5
DRE       1
DXC       4
EWA       4
EWC       5
EWG       2
EWH       1
EWJ       3
EWL       1
EWQ       1
EWT       3
EWU       1
EWY       2
EWZ       5
FB        4
FTV       2
GOOG      4
GPC       4
GSG       1
HIG       1
HIGH.L    2
HST       3
HYG       3
IAU       1
ICLN      1
IEAA.L    2
IEF       1
IEFM.L    2
IEMG      2
IEUS      2
IEVL.L    1
IGF       2
INDA      2
IUMO.L    4
IUVL.L    2
IVV       3
IWM       4
IXN       3
JPEA.L    1
JPM       2
KR        3
LQD       2
MCHI      3
MVEU.L    3
OGN       1
PG        3
PPL       5
PRU       4
PYPL      5
RE        2
REET      2
ROL       5
ROST      5
SEGA.L    1
SHY       1
SLV       1
SPMV.L    4
TLT      

In [15]:
df = df.merge(pd.get_dummies(ranked, prefix="Rank", prefix_sep=""),left_index=True, right_index=True)
df

Unnamed: 0_level_0,Prediction,Decision,Rank1,Rank2,Rank3,Rank4,Rank5
Stocks,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ABBV,0.020246,0.02,0,0,0,0,1
ACN,0.01069,0.01,0,0,0,1,0
AEP,0.011774,0.01,0,0,0,1,0
AIZ,0.005801,0.01,0,0,1,0,0
ALLE,0.021437,0.02,0,0,0,0,1
AMAT,0.014777,0.01,0,0,0,1,0
AMP,0.01132,0.01,0,0,0,1,0
AMZN,0.030448,0.02,0,0,0,0,1
AVB,0.025454,0.02,0,0,0,0,1
AVY,0.008364,0.01,0,0,1,0,0


In [16]:
df.index.rename("ID",inplace=True)

In [17]:
new_cols = df.columns[2:].append(df.columns[[1]])
df = df[new_cols].reset_index()
df

Unnamed: 0,ID,Rank1,Rank2,Rank3,Rank4,Rank5,Decision
0,ABBV,0,0,0,0,1,0.02
1,ACN,0,0,0,1,0,0.01
2,AEP,0,0,0,1,0,0.01
3,AIZ,0,0,1,0,0,0.01
4,ALLE,0,0,0,0,1,0.02
5,AMAT,0,0,0,1,0,0.01
6,AMP,0,0,0,1,0,0.01
7,AMZN,0,0,0,0,1,0.02
8,AVB,0,0,0,0,1,0.02
9,AVY,0,0,1,0,0,0.01


In [18]:
df.iloc[:,1:6].sum(axis=1).value_counts()

1    100
dtype: int64

In [19]:
assert(np.sum(df.Decision.abs()) == 1)

In [20]:
df.describe()

Unnamed: 0,Rank1,Rank2,Rank3,Rank4,Rank5,Decision
count,100.0,100.0,100.0,100.0,100.0,100.0
mean,0.2,0.2,0.2,0.2,0.2,0.008
std,0.402015,0.402015,0.402015,0.402015,0.402015,0.010249
min,0.0,0.0,0.0,0.0,0.0,-0.04
25%,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.01
75%,0.0,0.0,0.0,0.0,0.0,0.01
max,1.0,1.0,1.0,1.0,1.0,0.02


In [21]:
df.to_csv(data_folder + "submit_" + pd.to_datetime('today').strftime('%Y_%m_%d') + ".csv", index=False)

In [22]:
df

Unnamed: 0,ID,Rank1,Rank2,Rank3,Rank4,Rank5,Decision
0,ABBV,0,0,0,0,1,0.02
1,ACN,0,0,0,1,0,0.01
2,AEP,0,0,0,1,0,0.01
3,AIZ,0,0,1,0,0,0.01
4,ALLE,0,0,0,0,1,0.02
5,AMAT,0,0,0,1,0,0.01
6,AMP,0,0,0,1,0,0.01
7,AMZN,0,0,0,0,1,0.02
8,AVB,0,0,0,0,1,0.02
9,AVY,0,0,1,0,0,0.01


In [23]:
df.Decision.abs().sum()

1.0