In [1]:
#create submission file based on file with predictions for each stock
#ranking using returns
#portfolio allocation with simple distribution based on returns clipped to -1,1

In [2]:
import numpy as np
import pandas as pd

In [3]:
data_in = "../DataRaw/"
data_folder = "../DataWork/"

In [4]:
assets = pd.read_csv(data_in + "M6_Universe.csv")
symbols = assets.symbol.to_list()

In [5]:
preds = pd.read_csv(data_folder + "return_predictions.csv", header=None).rename(columns={0:"Return"})

In [6]:
preds.index = symbols
preds

Unnamed: 0,Return
ABBV,0.001077
ACN,0.010767
AEP,0.009152
AIZ,0.007286
ALLE,0.015186
...,...
XLC,0.007047
XLU,0.008295
XLP,0.008641
XLB,0.009209


In [7]:
preds.describe()

Unnamed: 0,Return
count,100.0
mean,0.007356
std,0.012269
min,-0.040782
25%,0.00248
50%,0.006933
75%,0.01044
max,0.077427


In [8]:
def invest_decision(x):
    clipped_x = round(x.clip(-1,1),2)
    sumw = np.sum(abs(clipped_x))
    dec = round(clipped_x/sumw,2)
    dec[-1] = round(1 - np.sum(abs(dec[:-1])),2)
    return dec

In [9]:
decision = invest_decision(preds.Return)

In [10]:
df = preds
df['Decision'] = decision

In [11]:
df

Unnamed: 0,Return,Decision
ABBV,0.001077,0.00
ACN,0.010767,0.01
AEP,0.009152,0.01
AIZ,0.007286,0.01
ALLE,0.015186,0.02
...,...,...
XLC,0.007047,0.01
XLU,0.008295,0.01
XLP,0.008641,0.01
XLB,0.009209,0.01


In [12]:
ranked = pd.qcut(df.Return,5,labels=np.arange(1,6))
ranked

ABBV    1
ACN     4
AEP     4
AIZ     3
ALLE    5
       ..
XLC     3
XLU     4
XLP     4
XLB     4
VXX     3
Name: Return, Length: 100, dtype: category
Categories (5, int64): [1 < 2 < 3 < 4 < 5]

In [13]:
df = df.merge(pd.get_dummies(ranked, prefix="Rank", prefix_sep=""),left_index=True, right_index=True)
df

Unnamed: 0,Return,Decision,Rank1,Rank2,Rank3,Rank4,Rank5
ABBV,0.001077,0.00,1,0,0,0,0
ACN,0.010767,0.01,0,0,0,1,0
AEP,0.009152,0.01,0,0,0,1,0
AIZ,0.007286,0.01,0,0,1,0,0
ALLE,0.015186,0.02,0,0,0,0,1
...,...,...,...,...,...,...,...
XLC,0.007047,0.01,0,0,1,0,0
XLU,0.008295,0.01,0,0,0,1,0
XLP,0.008641,0.01,0,0,0,1,0
XLB,0.009209,0.01,0,0,0,1,0


In [14]:
df.index.rename("ID",inplace=True)

In [15]:
new_cols = df.columns[2:].append(df.columns[[1]])
df = df[new_cols].reset_index()
df

Unnamed: 0,ID,Rank1,Rank2,Rank3,Rank4,Rank5,Decision
0,ABBV,1,0,0,0,0,0.00
1,ACN,0,0,0,1,0,0.01
2,AEP,0,0,0,1,0,0.01
3,AIZ,0,0,1,0,0,0.01
4,ALLE,0,0,0,0,1,0.02
...,...,...,...,...,...,...,...
95,XLC,0,0,1,0,0,0.01
96,XLU,0,0,0,1,0,0.01
97,XLP,0,0,0,1,0,0.01
98,XLB,0,0,0,1,0,0.01


In [16]:
df.iloc[:,1:6].sum(axis=1).value_counts()

1    100
dtype: int64

In [17]:
df.describe()

Unnamed: 0,Rank1,Rank2,Rank3,Rank4,Rank5,Decision
count,100.0,100.0,100.0,100.0,100.0,100.0
mean,0.2,0.2,0.2,0.2,0.2,0.0078
std,0.402015,0.402015,0.402015,0.402015,0.402015,0.013527
min,0.0,0.0,0.0,0.0,0.0,-0.04
25%,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.01
75%,0.0,0.0,0.0,0.0,0.0,0.01
max,1.0,1.0,1.0,1.0,1.0,0.08


In [18]:
df.to_csv(data_folder + "submit_" + pd.to_datetime('today').strftime('%Y_%m_%d') + ".csv", index=False)