# Efficient frontier

In [None]:
import pandas as pd
import numpy as np

# 0. Load the Data and pre-process

In [None]:
ind=pd.read_csv('../input/edhec-data-for-portfolio-construction-with-python/ind30_m_ew_rets.csv',header=0,index_col=0,parse_dates=True)/100

In [None]:
# convert the index column to the date type
ind.index=pd.to_datetime(ind.index,format='%Y%m').to_period('M')
ind

In [None]:
# remove the embeded space in the column name
ind.columns=ind.columns.str.strip()
ind

In [None]:
# make it a function
def get_ind_returns():
    ind=pd.read_csv('../input/edhec-data-for-portfolio-construction-with-python/ind30_m_ew_rets.csv',header=0,index_col=0,parse_dates=True)/100
    # convert the index column to the date type
    ind.index=pd.to_datetime(ind.index,format='%Y%m').to_period('M')
    # remove the embeded space in the column name
    ind.columns=ind.columns.str.strip()
    return ind

In [None]:
ind=get_ind_returns()
ind.shape

# 1. Some pre-defined functions

In [None]:
def skewness(r):
    """
    Alternative to scipy.stats.skew()
    Computes the skewness of the supplied Series or DataFrame
    Returns a float or a Series
    """
    demeaned_r = r - r.mean()
    # use the population standard deviation, so set dof=0
    sigma_r = r.std(ddof=0)
    exp = (demeaned_r**3).mean()
    return exp/sigma_r**3


def kurtosis(r):
    """
    Alternative to scipy.stats.kurtosis()
    Computes the kurtosis of the supplied Series or DataFrame
    Returns a float or a Series
    """
    demeaned_r = r - r.mean()
    # use the population standard deviation, so set dof=0
    sigma_r = r.std(ddof=0)
    exp = (demeaned_r**4).mean()
    return exp/sigma_r**4

def compound(r):
    """
    returns the result of compounding the set of returns in r
    """
    return np.expm1(np.log1p(r).sum())
                         
                         
def annualize_rets(r, periods_per_year):
    """
    Annualizes a set of returns
    We should infer the periods per year
    but that is currently left as an exercise
    to the reader :-)
    """
    compounded_growth = (1+r).prod()
    n_periods = r.shape[0]
    return compounded_growth**(periods_per_year/n_periods)-1


def annualize_vol(r, periods_per_year):
    """
    Annualizes the vol of a set of returns
    We should infer the periods per year
    but that is currently left as an exercise
    to the reader :-)
    """
    return r.std()*(periods_per_year**0.5)


def sharpe_ratio(r, riskfree_rate, periods_per_year):
    """
    Computes the annualized sharpe ratio of a set of returns
    """
    # convert the annual riskfree rate to per period
    rf_per_period = (1+riskfree_rate)**(1/periods_per_year)-1
    excess_ret = r - rf_per_period
    ann_ex_ret = annualize_rets(excess_ret, periods_per_year)
    ann_vol = annualize_vol(r, periods_per_year)
    return ann_ex_ret/ann_vol


import scipy.stats
def is_normal(r, level=0.01):
    """
    Applies the Jarque-Bera test to determine if a Series is normal or not
    Test is applied at the 1% level by default
    Returns True if the hypothesis of normality is accepted, False otherwise
    """
    if isinstance(r, pd.DataFrame):
        return r.aggregate(is_normal)
    else:
        statistic, p_value = scipy.stats.jarque_bera(r)
        return p_value > level


def drawdown(return_series: pd.Series):
    """Takes a time series of asset returns.
       returns a DataFrame with columns for
       the wealth index, 
       the previous peaks, and 
       the percentage drawdown
    """
    wealth_index = 1000*(1+return_series).cumprod()
    previous_peaks = wealth_index.cummax()
    drawdowns = (wealth_index - previous_peaks)/previous_peaks
    return pd.DataFrame({"Wealth": wealth_index, 
                         "Previous Peak": previous_peaks, 
                         "Drawdown": drawdowns})


def semideviation(r):
    """
    Returns the semideviation aka negative semideviation of r
    r must be a Series or a DataFrame, else raises a TypeError
    """
    if isinstance(r, pd.Series):
        is_negative = r < 0
        return r[is_negative].std(ddof=0)
    elif isinstance(r, pd.DataFrame):
        return r.aggregate(semideviation)
    else:
        raise TypeError("Expected r to be a Series or DataFrame")


def var_historic(r, level=5):
    """
    Returns the historic Value at Risk at a specified level
    i.e. returns the number such that "level" percent of the returns
    fall below that number, and the (100-level) percent are above
    """
    if isinstance(r, pd.DataFrame):
        return r.aggregate(var_historic, level=level)
    elif isinstance(r, pd.Series):
        return -np.percentile(r, level)
    else:
        raise TypeError("Expected r to be a Series or DataFrame")


def cvar_historic(r, level=5):
    """
    Computes the Conditional VaR of Series or DataFrame
    """
    if isinstance(r, pd.Series):
        is_beyond = r <= var_historic(r, level=level)
        return -r[is_beyond].mean()
    elif isinstance(r, pd.DataFrame):
        return r.aggregate(cvar_historic, level=level)
    else:
        raise TypeError("Expected r to be a Series or DataFrame")


from scipy.stats import norm
def var_gaussian(r, level=5, modified=False):
    """
    Returns the Parametric Gauusian VaR of a Series or DataFrame
    If "modified" is True, then the modified VaR is returned,
    using the Cornish-Fisher modification
    """
    # compute the Z score assuming it was Gaussian
    z = norm.ppf(level/100)
    if modified:
        # modify the Z score based on observed skewness and kurtosis
        s = skewness(r)
        k = kurtosis(r)
        z = (z +
                (z**2 - 1)*s/6 +
                (z**3 -3*z)*(k-3)/24 -
                (2*z**3 - 5*z)*(s**2)/36
            )
    return -(r.mean() + z*r.std(ddof=0))



# 2. Explore the data

## 2.1 Explore the Drawdown

In [None]:
# plot the Food column Drawdown
drawdown(ind['Food'])['Drawdown'].plot.line(figsize=(12,6))

## 2.2 Explore the VaR (Cornish-Fisher)

In [None]:
# explore the Cornish-Fisher VaR
# firsst set the interest cols
cols_of_interest=['Food','Smoke','Coal','Beer','Fin']
var_gaussian(ind[cols_of_interest],modified=True)

In [None]:
# all the cols
# tail() =  last 5
var_gaussian(ind,modified=True).sort_values().tail()

In [None]:
# so mine have very high VaR, 10.39% per month

In [None]:
# plot the barchart for the VaR
var_gaussian(ind,modified=True).sort_values().plot.barh(figsize=(6,10))

## 2.3 Some other statistics

![](https://a.c-dn.net/c/content/dam/publicsites/igcom/uk/images/ContentImage/Sharpe%20ratio.png)

In [None]:
# the sharpe ratio
sharpe_ratio(ind,0.03,12).sort_values().plot.barh(figsize=(6,10),title='Industry Sharpe Ratio',color='green')

In [None]:
# the smoke, food and health have very good Sharpe Ratio,
# while the coal, Games have bad Sharpe Ratio

In [None]:
# Look at sharpe ratio of specific period
sharpe_ratio(ind['2000':],0.03,12).sort_values().plot.barh(figsize=(6,10),title='Industry Sharpe Ratio 2000-2018',color='orange')

# 3. Compute the Efficient Frontier

* expected returns
* covariance matrix
    * correlations
    * volatilites

## 3.1 Expected returns 

In [None]:
# instead of the future, we use the in sample method
# what was the efficient frontier for given period (history), e.g. 1995-2000

er=annualize_rets(ind['1995':'2000'],12)
er.sort_values().plot.bar(figsize=(12,6))

## 3.2 Covariance matrix

In [None]:
cov=ind['1995':'2000'].cov()

In [None]:
cov.shape

In [None]:
cov