# Loading

In [96]:
import pandas as pd
import numpy as np
from datetime import datetime
from typing import Callable, Iterable, Tuple
from dataclasses import dataclass, asdict

In [47]:
table = pd.read_csv('input/Quant_Invest_Fundusze.csv', sep=';', index_col='Daty')
table.index = pd.to_datetime(table.index)

In [48]:
table.head()

Unnamed: 0_level_0,AP,ARR,ARW,G,OP,ORR,ORW
Daty,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-12-31,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
2018-12-28,1000.03,993.71,997.19,999.92,999.81,999.12,1000.59
2018-12-27,991.61,989.56,987.57,999.9,999.53,998.71,999.69
2018-12-24,994.43,960.86,986.28,999.82,999.79,998.09,1000.25
2018-12-21,994.46,976.19,990.47,999.74,999.6,996.63,999.91


# Validation definitions

In [191]:
@dataclass
class Portfolio(object):
    AP: float
    ARR: float
    ARW: float
    G: float
    OP: float
    ORR: float
    ORW: float

    def as_weights(self):
        return np.array(list(asdict(self).values()))

In [313]:
def calculate_returns(portfolio: Portfolio, test_table: pd.DataFrame) -> float:
    weights = portfolio.as_weights()
    date1, date2 = test_table.index.min(), test_table.index.max()
    date1_values, date2_values = test_table.loc[date1].values.ravel(), test_table.loc[date2].values.ravel()
    return_values = (date2_values - date1_values) / date1_values
    weighted_returns = return_values * weights
    return sum(weighted_returns) / sum(weights)  # in case weights don't sum up to 1.0


def calculate_volatility(portfolio: Portfolio, test_table: pd.DataFrame) -> float:
    return None  # TODO

class PortfolioValidator(object):
    def __init__(self, table: pd.DataFrame, test_years: Iterable[int]=range(2001, 2019)):
        self.test_years = test_years
        self.scores = pd.DataFrame()
        self.table = table.sort_index().copy()
        self.table.index = pd.to_datetime(self.table.index)

    @property
    def score_summary(self):
        return self.scores[
            [col for col in self.scores.columns if not col == 'modelling_years']
        ].agg(['min', 'max', 'mean', 'std'])
    
    def inspect(self):
        return self.scores.style.bar(align='mid', color=['#d65f5f', '#5fba7d'])

    def validate(self, portfolio_builder: Callable[[pd.DataFrame], Tuple[dict, dict]]) -> pd.DataFrame:
        _scores = []
        for test_year in self.test_years:
            model_table = table[table.index.year < test_year]
            portfolio, portfolio_meta = portfolio_builder(model_table)
            test_table = table[table.index.year == test_year]
            return_rate = calculate_returns(portfolio, test_table)
            volatility = calculate_volatility(portfolio, test_table)
            result_df = pd.DataFrame({
                'modelling_years': test_year - 2000,
                'actual_returns': return_rate,
                'actual_volatility': volatility,
                **portfolio_meta,
                **asdict(portfolio)
            }, index=[test_year])
            _scores.append(result_df)
        self.scores = pd.concat(_scores)
        self.scores.index.name = 'test_year'
        return self.score_summary

### Testing

In [314]:
dummy_builder = lambda df: (Portfolio(0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.4), dict())

In [315]:
pf = PortfolioValidator(table)

In [316]:
pf.validate(dummy_builder)

Unnamed: 0,actual_returns,actual_volatility,AP,ARR,ARW,G,OP,ORR,ORW
min,-0.159778,,0.1,0.1,0.1,0.1,0.1,0.1,0.4
max,0.251632,,0.1,0.1,0.1,0.1,0.1,0.1,0.4
mean,0.070239,,0.1,0.1,0.1,0.1,0.1,0.1,0.4
std,0.094737,,1.4280130000000002e-17,1.4280130000000002e-17,1.4280130000000002e-17,1.4280130000000002e-17,1.4280130000000002e-17,1.4280130000000002e-17,5.71205e-17


In [317]:
pf.inspect()

Unnamed: 0_level_0,modelling_years,actual_returns,actual_volatility,AP,ARR,ARW,G,OP,ORR,ORW
test_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2001,1,0.0446432,,0.1,0.1,0.1,0.1,0.1,0.1,0.4
2002,2,0.0590086,,0.1,0.1,0.1,0.1,0.1,0.1,0.4
2003,3,0.202361,,0.1,0.1,0.1,0.1,0.1,0.1,0.4
2004,4,0.118831,,0.1,0.1,0.1,0.1,0.1,0.1,0.4
2005,5,0.127181,,0.1,0.1,0.1,0.1,0.1,0.1,0.4
2006,6,0.126543,,0.1,0.1,0.1,0.1,0.1,0.1,0.4
2007,7,0.077722,,0.1,0.1,0.1,0.1,0.1,0.1,0.4
2008,8,-0.159778,,0.1,0.1,0.1,0.1,0.1,0.1,0.4
2009,9,0.251632,,0.1,0.1,0.1,0.1,0.1,0.1,0.4
2010,10,0.096669,,0.1,0.1,0.1,0.1,0.1,0.1,0.4


In [318]:
Portfolio(**{
    'AP': 0.1,
    'ARR': 0.1,
    'ARW': 0.1,
    'G': 0.1,
    'OP': 0.1,
    'ORR': 0.1,
    'ORW': 0.4,
})

Portfolio(AP=0.1, ARR=0.1, ARW=0.1, G=0.1, OP=0.1, ORR=0.1, ORW=0.4)

# Validating portfolios created so far

In [319]:
from pypfopt.expected_returns import mean_historical_return
from pypfopt.risk_models import CovarianceShrinkage
from pypfopt.efficient_frontier import EfficientFrontier

In [344]:
pf = PortfolioValidator(table, test_years=range(2004, 2019))

In [350]:
def _ef_builder(table):
    """ Builder for all kinds of efficient frontier models. """
    table = table.copy().groupby(by=[table.index.year, table.index.month]).tail(n=1)
    mu = mean_historical_return(table)
    S = CovarianceShrinkage(table).ledoit_wolf()
    return EfficientFrontier(mu, S)

def _ef_meta_builder(ef):
    """ Common metadata for all kinds of efficient frontier models. """
    ear, av, srp = ef.portfolio_performance()
    return {
        'expected_returns': ear,
        'expected_volatility': av,
        'sharpe_ratio': srp,
    }

def ef_max_sharpe(table):
    ef = _ef_builder(table)
    weights = ef.max_sharpe()
    return Portfolio(**weights), _ef_meta_builder(ef)

def ef_min_volatility(table):
    ef = _ef_builder(table)
    weights = ef.min_volatility()
    return Portfolio(**weights), _ef_meta_builder(ef)

In [351]:
pf.validate(ef_max_sharpe)

Unnamed: 0,actual_returns,actual_volatility,expected_returns,expected_volatility,sharpe_ratio,AP,ARR,ARW,G,OP,ORR,ORW
min,0.006994,,1.119308,0.098243,11.180734,0.0,0.0,0.0,0.575407,0.120922,0.073209,0.023371
max,0.065866,,2.027712,0.130408,17.890008,0.006651,0.022239,0.035302,0.677694,0.227317,0.157965,0.073498
mean,0.032288,,1.47368,0.109931,13.225301,0.000747,0.00505,0.013557,0.622953,0.184012,0.130734,0.042946
std,0.01764,,0.272647,0.009977,2.324558,0.001966,0.00809,0.010116,0.032161,0.025617,0.024842,0.014414


In [352]:
pf.inspect()

Unnamed: 0_level_0,modelling_years,actual_returns,actual_volatility,expected_returns,expected_volatility,sharpe_ratio,AP,ARR,ARW,G,OP,ORR,ORW
test_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2006,6,0.0428101,,2.02771,0.112167,17.89,0.0,1.02046e-15,0.0164753,0.652626,0.171098,0.0863029,0.0734979
2007,7,0.0415841,,1.87188,0.108491,17.0596,0.0066505,0.0,0.019952,0.676884,0.157467,0.0732091,0.0658382
2008,8,0.0340191,,1.69848,0.100653,16.6661,0.0030668,0.0,0.0353024,0.677694,0.120922,0.121301,0.041714
2009,9,0.0443942,,1.59163,0.130408,12.0434,0.0,0.0,0.0159399,0.575407,0.227317,0.157965,0.0233711
2010,10,0.043329,,1.5418,0.125409,12.1264,1.35048e-14,2.10062e-14,0.020339,0.591648,0.202098,0.141292,0.0446234
2011,11,0.0405266,,1.48098,0.11915,12.2534,4.31291e-14,8.50627e-14,0.0217955,0.594904,0.197926,0.143418,0.0419563
2012,12,0.0658655,,1.42009,0.112651,12.4201,2.06451e-14,1.64923e-14,0.0133716,0.603668,0.187516,0.148175,0.0472696
2013,13,0.0121125,,1.42599,0.10899,12.8918,1.50923e-14,1.28482e-14,0.0111099,0.598017,0.199533,0.13914,0.0521994
2014,14,0.0420604,,1.33013,0.107822,12.1418,1.25498e-16,0.00129663,0.0137451,0.632299,0.192672,0.128365,0.031623
2015,15,0.00699355,,1.28064,0.103297,12.1955,4.63788e-16,0.0096718,0.00820411,0.605389,0.195999,0.154522,0.0262142


In [353]:
pf.validate(ef_min_volatility)

Unnamed: 0,actual_returns,actual_volatility,expected_returns,expected_volatility,sharpe_ratio,AP,ARR,ARW,G,OP,ORR,ORW
min,0.006716,,0.99658,0.090911,10.631232,0.0,0.045211,0.0,0.493502,0.041854,0.281205,0.0
max,0.053217,,1.593853,0.120525,15.790913,0.005384,0.068104,0.000627,0.610809,0.11469,0.328707,1.005929e-17
mean,0.030157,,1.242888,0.10116,12.135502,0.000885,0.052809,4.8e-05,0.560011,0.078084,0.308162,2.611102e-18
std,0.014482,,0.173492,0.009418,1.86209,0.001828,0.009252,0.000174,0.034161,0.023503,0.015566,3.299671e-18


In [354]:
pf.inspect()

Unnamed: 0_level_0,modelling_years,actual_returns,actual_volatility,expected_returns,expected_volatility,sharpe_ratio,AP,ARR,ARW,G,OP,ORR,ORW
test_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2006,6,0.0354636,,1.59385,0.0996167,15.7909,0.00192505,0.0653764,0.000626619,0.541062,0.0625937,0.328416,0.0
2007,7,0.0387583,,1.47014,0.0954451,15.1851,0.00419457,0.0681044,0.0,0.554116,0.0525047,0.32108,0.0
2008,8,0.0340698,,1.38719,0.0909113,15.0301,0.00538409,0.0668046,1.3552499999999998e-20,0.563216,0.0418545,0.322741,0.0
2009,9,0.0359245,,1.33076,0.120525,10.868,0.0,0.0631016,0.0,0.493502,0.11469,0.328707,0.0
2010,10,0.0382489,,1.29524,0.116452,10.9431,0.0,0.0515964,0.0,0.521473,0.111315,0.315616,0.0
2011,11,0.0414058,,1.25489,0.110747,11.1426,1.27324e-18,0.0483288,0.0,0.534064,0.104776,0.312832,3.22632e-18
2012,12,0.0532172,,1.22238,0.104827,11.462,9.845400000000002e-19,0.0466697,7.80496e-20,0.549163,0.0953825,0.308784,6.338820000000001e-18
2013,13,0.0220716,,1.21233,0.100419,11.8653,1.42731e-18,0.0465942,1.06998e-18,0.555403,0.0919705,0.306032,1.00593e-17
2014,14,0.043678,,1.15332,0.0993658,11.3969,0.0,0.0467316,0.0,0.575203,0.0766095,0.301456,0.0
2015,15,0.00734839,,1.13606,0.096478,11.5594,0.0,0.0454994,0.0,0.57584,0.0768461,0.301814,6.2916600000000005e-18
