In [None]:
import pandas as pd
pd.set_option('display.max_rows',500)
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
plt.style.use("fivethirtyeight")
%matplotlib inline

# For reading stock data from yahoo
import pandas_datareader.data as pdr
from pandas_datareader.data import DataReader
import yfinance as yf

# For time stamps
import datetime

In [None]:
# download stocks
file = pd.read_csv('M6_Universe.csv')
stocks_list = file['symbol'].values.tolist()

start = datetime.datetime(2022,9,18)
far_back = start - datetime.timedelta(days=6)
end = datetime.datetime(2022,10,9)

submission_file = 'submission.csv'

## Download 100 assets

In [None]:
all_stocks = DataReader(stocks_list, 'yahoo', far_back, end)['Adj Close']
all_stocks.describe()

In [None]:
# get previous day closing price
current_date = start
current_col = current_date.strftime('%Y-%m-%d')
while current_col not in all_stocks.T.columns: # checks if current date is trading day (ie exist in stocks time stamp)
    current_date = current_date - datetime.timedelta(days=1)
    current_col = current_date.strftime('%Y-%m-%d')
    if current_date < far_back:
        print("Shouldn't have got this far")
        error
    


all_stocks = all_stocks[all_stocks.index >= current_date]
all_stocks = all_stocks[all_stocks.index <= end]
all_stocks

In [None]:
current_date, end

In [None]:
start = current_date
start_col = start.strftime('%Y-%m-%d')
end_col = all_stocks.dropna(axis=0, thresh=50).T.columns[-1].strftime('%Y-%m-%d')
start_col, end_col

In [None]:
# extract total returns

total_returns = all_stocks.T[[start_col,end_col]].T.pct_change().round(4).T[[end_col]]
total_returns

In [None]:
# extract total ranks
def generate_rank(df):
    ranks = pd.DataFrame(index=df.index, columns=df.columns)
    vector_ranks = pd.DataFrame(index=df.index, columns=df.columns)
    
    for col in df.columns:
        if df[col].isnull().all():
            continue
        ranks[col], vector_ranks[col] = generate_rank_from_values(df[col].values)
        
    return ranks, vector_ranks

def generate_rank_from_values(value_list):
    if len(value_list) != 100:
        print('Values list not 100 in number')
        return
    df = pd.DataFrame(value_list, columns=['data'])
    orig_index = df.index
    #df[241] = [random.randint(1,20) for i in range(100)]
    df = df.sort_values('data',ascending=False)
    df['my100rank'] = range(100,0,-1)
    df['my5rank'] = [5]*20 + [4]*20 + [3]*20 + [2]*20 + [1]*20
    df['pyrank'] = df['data'].rank()
    df['5rank'] = pd.qcut(df['data'], 5, labels=False, precision=1) + 1
    df['diff'] = df['my5rank'] != df['5rank']
    uniq_ranks = df[df['diff'] == True]['pyrank'].unique()
    df['adjusted5rank'] = df['my5rank']
    df['rankvector'] = np.nan
    
    df = df.join(pd.get_dummies(df['my5rank']))
    
    
    # ties on the margins of the classes
    for rank in uniq_ranks:
        rank_index = df[df['pyrank']==rank].index
        population = df['my5rank'][rank_index]
        df.loc[rank_index,'adjusted5rank'] = round(population.mean(),2)
        for p in population:
            df.loc[rank_index, p] = round(sum(population == p) / len(population),2)
            
    
    
    #print(df.to_string())
    ranks = df['adjusted5rank'][orig_index].values.tolist()
    vector_ranks = []
    for index, row in df.loc[orig_index].iterrows():
        vector_ranks.append([row[1],row[2],row[3],row[4],row[5]])
    
    # clean house
    for p in [1,2,3,4,5]:
        df[f'Rank {p}'] = df[p]
    df = df.drop(columns=[1,2,3,4,5])
    return ranks, vector_ranks

ranks, vector_ranks = generate_rank(total_returns)
ranks

In [None]:
vector_ranks

In [None]:
# Forecast Performance

def RPS(df, predictions):
    result = pd.DataFrame(index=df.index, columns=df.columns)
    
    for col in df.columns:
        if df[col].isnull().all():
            continue
        result[col] = RPS_T(df[col].values, predictions)
        
    return result

def RPS_T(actual, predictions):
    result = []
    for x,y in zip(actual, predictions):
        result.append(RPS_i_T(x,y))
        
    return result

def RPS_i_T(actual, predictions):
    actual, predictions = np.array(actual), np.array(predictions)
    return np.mean((predictions - actual) ** 2)

In [None]:
# load forecast ranks from file
file = pd.read_csv(submission_file)
file['Symbols'] = file[file.columns[0]]
forecast_vranks = vector_ranks.copy().T

for index,row in file.iterrows():
    ticker = row['Symbols']
    ranks = [row['Rank 1'], row['Rank 2'], row['Rank 3'], row['Rank 4'], row['Rank 5']]
    forecast_vranks[ticker] = [ranks]

forecast_vranks = forecast_vranks.T
forecast_vranks

In [None]:
rps = np.mean(RPS_T(vector_ranks[end_col].values.tolist(), forecast_vranks[end_col].values.tolist()))
rps

### Portfolio performance

In [None]:
# load portfolio strategy from file
strategy = pd.read_csv(submission_file)
strategy['Symbols'] = strategy[strategy.columns[0]]
strategy = strategy.set_index('Symbols')[['Decision']]
strategy

In [None]:
# evaluate daily returns

daily_returns = all_stocks[all_stocks.index >= start].dropna(axis=0, how='all').pct_change().round(4).T

decision = strategy['Decision']
portfolio_returns = daily_returns.copy()
for col in portfolio_returns.columns:
    portfolio_returns[col] = portfolio_returns[col] * decision.T

portfolio_returns.loc['Total'] = portfolio_returns.sum()
RET = portfolio_returns.loc[['Total'],:]
RET

ret = np.log(1 + RET).T
total_ret = np.sum(ret.values)
total_ret

sdp = np.std(ret.values)
sdp

ir = total_ret / sdp
(total_ret, sdp, ir)

In [None]:
RET

In [None]:
(start_col, end_col, rps, ir)