# Objective Function

In [2]:
import numpy as np
from typing import Tuple
def evalfunc(portfolio: np.ndarray, ret: np.ndarray, pi: float, theta: float) -> float:
    """
    Task 1: the objective function
    (Remember to vectorize as much as possible)


    Parameters
    --------------
    portfolio: np.ndarray: the portfolio vector i.e. x

    ret: np.ndarray: the (T, 3) numpy array containing all asset returns

    pi: float: the exponent parameter of the objective

    theta: the risk-aversion parameter of the objective


    Returns
    --------------
    float: the objective value.
    """
    # compute mean returns first. ret_mu shape should be (3,)
    ret_mu = ret.mean(axis=0)

    # first part
    drift = -ret_mu.dot(portfolio)

    # second part
    # weighed deviation from mean (part within []^pi)
    deviation = (ret - ret_mu).dot(portfolio)
    risk = theta * (
        (np.abs(deviation)**pi).mean()
    )
    return drift + risk

# Gradient

Following matrix notations, the objective function can be rewritten as
\begin{align*}
	\text{minimize} \ -\bar{\textbf{r}}'\textbf{x}
	+
	\frac{\theta}{T}
		||
			(\textbf{r} - \bar{\textbf{r}})'\textbf{x}
		||_{\pi}^{\pi}
\end{align*}
where $||\cdot||_\pi$ is the $L^\pi$ norm. Here we must investigate how to take the derivative of a $L^p$ norm with respect to the function argument. According to
[Wikipedia - Norm](https://en.wikipedia.org/wiki/Norm_(mathematics)),

\begin{align*}
\frac{\partial ||\textbf{x}||_p}{\partial \textbf{x}}&=\frac{\textbf{x}\circ |\textbf{x}|^{p-2}}{||\textbf{x}||_p^{p-1}}\\
\implies 
\frac{\partial ||\textbf{x}||_p^p}{\partial \textbf{x}}&=p\cdot||\textbf{x}||_p^{p-1}\cdot\frac{\textbf{x}\circ |\textbf{x}|^{p-2}}{||\textbf{x}||_p^{p-1}}\\
&=p\cdot \textbf{x}\circ |\textbf{x}|^{p-2}
\end{align*}
where $\circ$ is element-wise matrix multiplication, and $|\textbf{x}| = (|x_1|, \ldots, |x_n|)$ is the element-wise absolute value. In other words, 
\begin{align*}
\textbf{x}\circ |\textbf{x}|^{p-2}&=[x_1,\ldots, x_n]\circ [|x_1|^{p-2},\ldots, |x_n|^{p-2}]\\
&=[(x_1\cdot |x_1|^{p-2}),\ldots, (x_n\cdot |x_n|^{p-2})]
\end{align*}
Therefore, if we were to take derivative of the objective function with respect to $\textbf{x}$ can be obtained via chain rule:
\begin{align*}
\frac{\partial \text{obj}}{\partial \textbf{x}}
&=-\bar{\textbf{r}} + \frac{\pi\theta}{T}\cdot [(\textbf{r} - \bar{\textbf{r}})'\textbf{x}]\circ |(\textbf{r} - \bar{\textbf{r}})'\textbf{x}|^{\pi -2}\cdot (\textbf{r}-\bar{\textbf{r}})
\end{align*}

In [3]:
def evalgrad(portfolio: np.ndarray, ret: np.ndarray, pi: float, theta: float) -> np.ndarray:
    """
    Task 1: the objective function gradient


    Parameters
    --------------
    portfolio: np.ndarray: the portfolio vector i.e. x

    ret: np.ndarray: the (T, 3) numpy array containing all asset returns

    pi: float: the exponent parameter of the objective

    theta: the risk-aversion parameter of the objective


    Returns
    --------------
    float: the objective gradient vector
    """
    T = ret.shape[0]
    ret_mu = ret.mean(axis=0)
    delta = ret - ret_mu
    dev = delta.dot(portfolio)
    nom = dev * np.absolute(dev)**(pi-2)
    return -ret_mu + (
        (theta * pi/ T)*nom
    ).dot(delta)


# Data processing

In [6]:
pd.read_csv("../data/project_4_data/ABR.csv")

  pd.read_csv("../data/project_4_data/ABR.csv")


Unnamed: 0,BarTp,Trade,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,BarTp.1,...,Unnamed: 16,Unnamed: 17,BarTp.2,Ask,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25
0,,,,,,,,,,,...,,,,,,,,,,
1,ABR US Equity,,,,,,,,,ABR US Equity,...,,,ABR US Equity,,,,,,,
2,Dates,Open,Close,High,Low,Value,Volume,Number Ticks,,Dates,...,Number Ticks,,Dates,Open,Close,High,Low,Value,Volume,Number Ticks
3,2021-02-01 09:30:00,14.39,14.3601,14.44,14.3601,253092,17587,5,,2021-02-01 03:59:01,...,2,,2021-02-01 03:59:01,15.73,15.73,15.73,15.73,31.46,2,1
4,2021-02-01 09:31:00,14.36,14.35,14.36,14.35,11743.3008,818,4,,2021-02-01 06:59:01,...,2,,2021-02-01 06:59:01,15.72,14.7,15.72,14.7,807.32,54,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58807,,,,,,,,,,2021-08-13 16:45:00,...,1,,,,,,,,,
58808,,,,,,,,,,2021-08-13 16:54:00,...,8,,,,,,,,,
58809,,,,,,,,,,2021-08-13 18:32:00,...,2,,,,,,,,,
58810,,,,,,,,,,2021-08-13 18:57:00,...,1,,,,,,,,,


In [34]:
import pandas as pd
import datetime

def is_open_or_noon(dt: datetime.datetime) -> bool:
    """Task 1: read helper function for datetime parsing"""
    t = dt.time()
    return t == datetime.time(9,30) or t == datetime.time(12,0)

def is_open(dt:datetime.datetime) -> bool:
    """Task 1: read helper function for datetime parsing"""
    return dt.time() == datetime.time(9,30)

def my_dt_parser(s: str) -> datetime.datetime:
    """Task 1: custom datetime parser. 5x faster than pd.to_datetime"""
    # 2021-02-01 09:30:00
    date, time = s.split()
    if "/" in date:
        m, d, y = date.split("/")
        year = 2000 + int(y)
    elif "-" in date:
        y, m, d = date.split("-")
        year = int(y)
    else:
        raise Exception("DATETIME PARSING ERROR!")

    HM = time.split(":")
    H, M = HM[0], HM[1]
    return datetime.datetime(
        year = year,
        month = int(m),
        day = int(d),
        hour = int(H),
        minute = int(M)
    )

def read_asset(asset:str, data_dir: str="../data/project_4_data/") -> pd.DataFrame:
    """
    Task 1: reads a single asset.


    Parameters
    --------------
    asset: str: asset name

    data_dir: str: local data directory


    Returns
    --------------
    pd.DataFrame: pandas dataframe containing asset returns and price
    """
    # static var to mark missing data
    missing = -999.
    # read csv
    csv_path = data_dir + asset + ".csv"
    df = pd.read_csv(csv_path, low_memory=False, header=3).loc[:, ["Dates", "Close"]]
    # read up to empty entries
    df = df.iloc[:df["Close"].isna().argmax()]

    # deal with first row missing date
    df.loc[0,"Dates"] = df.loc[1,"Dates"].replace("31", "30")

    # extract open or noon data
    df["dt"] = df["Dates"].apply(my_dt_parser)
    df["Date"] = df["dt"].apply(lambda dt: dt.date())
    open_or_noon = df["dt"].apply(is_open_or_noon)
    df = df.loc[open_or_noon]

    # compute daily return
    ret = df.loc[:, ["Close","Date"]].groupby("Date").apply(
        lambda x: x["Close"].iloc[1]/x["Close"].iloc[0]-1 if len(x) == 2 else missing
    ).values

    ret = ret[~np.isnan(ret)]

    # return along with daily open price
    df = df.loc[df["dt"].apply(is_open)]
    df["ret"] = ret

    # filter out bad dates with missing data (particularly at noon)
    df = df.loc[df.ret > missing + 1]
    df.set_index("Date", inplace=True)
    df = df[["ret", "Close"]]
    df.rename(columns = {"ret": f"{asset}_ret", "Close": f"{asset}_price"}, inplace = True)
    return df

assets = ["ABR", "AMZN", "GS", "NFLX", "NIO", "NVDA", "TSLA", "UBS"]

for a in assets:
    asset = read_asset(a)
    print("Name:", a)
    print(asset)
    print()

Name: ABR
             ABR_ret  ABR_price
Date                           
2021-02-02  0.014195    14.5830
2021-02-04  0.034797    14.8000
2021-02-05 -0.002571    15.5600
2021-02-08  0.006283    15.1200
2021-02-09  0.007262    15.1053
...              ...        ...
2021-08-09  0.007619    18.2311
2021-08-10  0.006014    18.2900
2021-08-11 -0.004968    18.6200
2021-08-12 -0.005870    18.7400
2021-08-13 -0.000545    18.3600

[128 rows x 2 columns]

Name: AMZN
            AMZN_ret  AMZN_price
Date                            
2021-01-04 -0.027139     3262.80
2021-01-05  0.006993     3174.80
2021-01-06  0.010673     3146.17
2021-01-07  0.010303     3162.20
2021-01-08 -0.005051     3173.77
...              ...         ...
2021-07-07 -0.004655     3729.24
2021-07-08  0.004955     3652.90
2021-07-09  0.002678     3718.54
2021-07-12 -0.011032     3746.21
2021-07-13  0.011858     3708.82

[130 rows x 2 columns]

Name: GS
              GS_ret  GS_price
Date                          
2021-01-11  0

In [33]:
def read_all(data_dir: str = "../data/project_4_data/", T: int = 100) -> Tuple[pd.DataFrame,pd.DataFrame]:
    """
    Task 1.5: reads all asset returns and sever into train and test.


    Parameters
    --------------
    data_dir: str: local data directory

    T: int: size of the traning period.


    Returns
    --------------
    Tuple[pd.DataFrame, pd.DataFrame]: train and test pandas dataframe containing returns and prices
    """
    assets = ["ABR", "AMZN", "GS", "NFLX", "NIO", "NVDA", "TSLA", "UBS"]

    dfs = [read_asset(asset, data_dir) for asset in assets]

    df = dfs[0]
    for i in range(1, len(assets)):
        df = df.join(dfs[i])

    print(df)
    df = df.dropna()
    return df.iloc[:T], df.iloc[T:]

train, test = read_all()
train

             ABR_ret  ABR_price  AMZN_ret  AMZN_price    GS_ret  GS_price  \
Date                                                                        
2021-02-02  0.014195    14.5830 -0.000326     3400.00  0.017026   279.580   
2021-02-04  0.034797    14.8000 -0.001582     3324.26  0.014105   290.325   
2021-02-05 -0.002571    15.5600  0.004461     3318.00 -0.005064   295.225   
2021-02-08  0.006283    15.1200 -0.012793     3358.78  0.007524   296.370   
2021-02-09  0.007262    15.1053 -0.000314     3314.70  0.004766   298.990   
...              ...        ...       ...         ...       ...       ...   
2021-08-09  0.007619    18.2311       NaN         NaN       NaN       NaN   
2021-08-10  0.006014    18.2900       NaN         NaN       NaN       NaN   
2021-08-11 -0.004968    18.6200       NaN         NaN       NaN       NaN   
2021-08-12 -0.005870    18.7400       NaN         NaN       NaN       NaN   
2021-08-13 -0.000545    18.3600       NaN         NaN       NaN       NaN   

Unnamed: 0_level_0,ABR_ret,ABR_price,AMZN_ret,AMZN_price,GS_ret,GS_price,NFLX_ret,NFLX_price,NIO_ret,NIO_price,NVDA_ret,NVDA_price,TSLA_ret,TSLA_price,UBS_ret,UBS_price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2021-02-05,-0.002571,15.5600,0.004461,3318.00,-0.005064,295.225,-0.003134,551.388,-0.020202,57.8895,-0.005866,547.24,0.015920,846.400,-0.004330,15.0100
2021-02-08,0.006283,15.1200,-0.012793,3358.78,0.007524,296.370,-0.005190,552.065,0.033702,56.9700,0.033550,552.01,-0.006341,869.705,0.005302,15.0900
2021-02-09,0.007262,15.1053,-0.000314,3314.70,0.004766,298.990,0.013961,547.610,0.042395,58.8980,0.000912,575.50,-0.008471,857.000,-0.002064,15.3116
2021-02-10,0.007847,15.0370,-0.011969,3308.49,-0.002121,301.810,-0.009385,563.160,-0.001286,62.2300,0.010873,580.35,-0.042379,843.555,-0.007083,15.5300
2021-02-12,0.004912,15.2700,0.002014,3248.02,0.001596,303.326,-0.009429,560.000,0.010814,60.1000,0.008142,604.88,0.010654,795.000,0.005232,15.2900
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-07-07,-0.000852,17.6100,-0.004655,3729.24,0.001377,366.725,-0.010433,542.510,-0.062784,50.2800,-0.012048,831.64,-0.023799,662.620,-0.000901,15.0900
2021-07-08,0.025225,17.2450,0.004955,3652.90,0.003137,360.270,0.000978,531.740,0.030331,43.8500,0.010348,796.29,0.027443,628.141,0.002033,14.7600
2021-07-09,0.020774,17.5700,0.002678,3718.54,0.011939,365.190,0.008598,530.350,-0.033952,46.8100,-0.000439,797.23,-0.006081,656.190,0.006040,14.9000
2021-07-12,0.002475,18.1800,-0.011032,3746.21,0.034291,369.190,-0.004737,538.320,-0.013590,46.2900,0.005378,811.61,0.024046,663.903,0.015304,14.8850
