In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [4]:
# DEVELOP OUR OWN FUNCTION OF CALCULATING ANNUALIZED RETURN
# Make sure that the returns are in decimal formats
def r_ann(r, periods_per_year):
    obs = r.shape[0]
    hpr = (1 + r).prod()            # print this if we need to display HPR
    r_compound = hpr**(1/obs) - 1   # print this if we need to display monthly compound return
    return (1 + r_compound) ** 12 - 1


# DEVELOP OUR OWN FUNCTION OF CALCULATING ANNUALIZED VOLATILITY
def std_ann(r, periods_per_year):
    sd = r.std()                    # print this if we need to display monthly standard deviation
    return sd * np.sqrt(12)

In [6]:
# DEVELOP OUR OWN FUNCTION OF CALCULATING THE SHARPE RATIO
def sharpe_ratio(r, risk_free, periods_per_year):
    r_annualized = r_ann(r = r, periods_per_year = periods_per_year)
    sd_annualized = std_ann(r = r, periods_per_year = periods_per_year)
    r_excess =  r_annualized - risk_free
    return r_excess / sd_annualized

# DEVELOP OUR OWN FUNCTION OF CALCULATING MAX DRAWDOWN
def drawdown(return_series: pd.Series):
    wealth_index = 1000*(1 + return_series).cumprod()
    previous_peak = wealth_index.cummax()
    drawdown =  (wealth_index - previous_peak)/previous_peak
    df = pd.DataFrame({
    'Wealth' : wealth_index,
    'Peaks' : previous_peak,
    'Drawdowns' : drawdown })
    return df

In [8]:
# DEVELOP OUR OWN FUNCTION OF CALCULATING SKEWNESS AND KURTOSIS FOR HISTORICAL DATA
def skewness_his(r):
    demeaned_r = r - r.mean()
    # use the population sd, so set dof = 0 (i.e. divide by n - 1)
    sigma_r = r.std(ddof = 0)      # var
    exp =  (demeaned_r**3).mean() # numerator
    return exp/sigma_r**3


def kurtosis_his(r):   # just need to change the power from 3 to 4
    demeaned_r = r - r.mean()
    sigma_r = r.std(ddof = 0)
    exp =  (demeaned_r**4).mean()
    return exp/sigma_r**4


# DEVELOP THE JARQUE BERA TEST FOR NORMALITY
# For this test we set at 1% by default
def is_normal(r, level = 0.01):
    statistic, p_value = scipy.stats.jarque_bera(r)
    return p_value > level
# Example: mnp.is_normal(df) -> this will work on the whole data set; df.aggregate(is_normal) -> this will work by each column

In [10]:
# DEVELOP OUR OWN FUNCTION OF CALCULATING SEMI-DEVIATION
def semi_dev(r):
    excess= r-r.mean()                                        # We demean the returns
    excess_negative = excess[excess<0]                        # We take only the returns below the mean
    excess_negative_square = excess_negative**2               # We square the demeaned returns below the mean
    n_negative = (excess<0).sum()                             # number of returns under the mean
    return (excess_negative_square.sum()/n_negative)**0.5     # semideviation

In [12]:
# DEVELOP OUR OWN FUNCTION OF CALCULATING HISTORIC VaR
# Here we need to def a function using if isinstance because the module np.percentile by default will take all the data as 1 array
def var_his(r, level):                             # level inputs range from 0 to 100
    if isinstance(r, pd.DataFrame):                # if r has the format of a DataFrame
        return r.aggregate(var_his, level = level) # employ the aggregate function
    elif isinstance(r, pd.Series):                 # if r has the format of a Series
        return -np.percentile(r, level)            # employ np.percentile, we add '-' as VaR is commonly presented as positive values
    else:                                          # otherwise return an error message as below
        raise TypeError('Expected r to be a Series or DataFrame')


# DEVELOP OUR OWN FUNCTION OF CALCULATING PARAMETRIC VaR - GAUSSIAN
def var_gaussian(r, level): # level inputs take the decimal values e.g. 0.05
    z = norm.ppf(level)
    return -(r.mean() + z * r.std(ddof = 0))


# DEVELOP OUR OWN FUNCTION OF CALCULATING VaR - MODIFLED CORNISH-FISHER
def var_cofi(r, level): # level inputs take the decimal values e.g. 0.01
    z = norm.ppf(level)
    s = skewness_his(r) # we have built this above, or we can use scipy.stats
    k = kurtosis_his(r) # we have built this above, , or we can use scipy.stats
    z = (z +  
             (z**2 - 1) * s/6 +
             (z**3 - 3*z) * (k - 3)/24 -
             (2*z**3 - 5*z) * (s**2)/36 )
    return -(r.mean() + z * r.std(ddof = 0))


# DEVELOP OUR OWN FUNCTION OF DISPLAYING AT VaR APPROACHES:
def var_comparison(r, level):  # level inputs take the decimal values e.g. 0.01
    var_list = [var_his(r, level = level * 100), var_gaussian(r, level), var_cofi(r, level)]
    var_tbl = pd.concat(var_list, axis = 1)
    var_tbl.columns = ['Historical VaR', 'Parametric Gaussian VaR', 'Cornish-Fisher VaR']
    print(var_tbl)
    var_tbl.plot.bar()
    plt.title(f'VaR COMPARISON ACCROSS APPROACHES AT {level*100}%', fontsize = 16, color = 'red')
    plt.ylabel('VaR', fontsize = 14, color = 'navy')
    plt.xticks(rotation = 45, ha = 'right')


# DEVELOP OUR OWN FUNCTION OF CALCULATING CVaR:
# Basically CVaR is the average returns of those worst than the VaR. 
# Here we need to use if isinstance because the relevant method .mean() will take the whole data set
def cvar_his(r, level):  # level takes decimal value e.g. 0.05
    if isinstance(r, pd.Series):
        is_beyond = r <= -var_his(r, level = level*100) # find the return that is worst than VaR at this level
        return -r[is_beyond].mean()                     # take the mean of those filtered
    elif isinstance(r, pd.DataFrame):                   # if the input is a DF, then we need aggregate to apply the operations by each column
        return r.aggregate(cvar_his, level = level)
    else:
        raise TypeError('Expected r to be a Series or DataFrame')