In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly
import time
from cvxopt import matrix,solvers
from sklearn.covariance import EmpiricalCovariance, MinCovDet
import yfinance as yf
solvers.options['show_progress'] = False
solvers.options['refinement'] = 2
solvers.options['abstol'] = 1e-13
solvers.options['reltol'] = 1e-13
solvers.options['feastol'] = 1e-13

import random
import scipy

In [None]:
dates = opt.data["date"]
indexdates = []
for i in dates:
    index = np.array(data.history(start=i,end=i)["Close"])
    if index.size > 0:
        indexdates.append(i)
indexdates

In [None]:
optbench.data

In [None]:
a = np.array([1,4,3])
b = 2
np.transpose(a).dot(a)
np.append(a,np.eye(3))

In [None]:
a[1:None]

In [None]:
opt.cardinality_opt(M,0,0,cost_func="volatility",limit_attempts=False,attempts=5)

In [None]:
def power_set(input):
        # returns a list of all subsets of the list a
        if (len(input) == 0):
            return [[]]
        main_subset = [ ]
        for small_subset in power_set(input[1:]):
            main_subset += [small_subset]
            main_subset += [[input[0]] + small_subset]
        return main_subset

In [None]:
sets = power_set(np.arange(0,10))
exactsets = []
for i in sets:
    if len(i) == 5:
        exactsets.append(i)

In [None]:
def getPCA(matrix):
# Get eVal,eVec from a Hermitian matrix
    eVal,eVec=np.linalg.eigh(matrix)
    indices=eVal.argsort()[::-1] # arguments for sorting eVal desc
    eVal,eVec=eVal[indices],eVec[:,indices]
    eVal=np.diagflat(eVal)
    return eVal,eVec

def fitKDE(obs,bWidth=.25,kernel='gaussian',x=None):
    # Fit kernel to a series of obs, and derive the prob of obs
    # x is the array of values on which the fit KDE will be evaluated
    if len(obs.shape)==1:obs=obs.reshape(-1,1)
    kde=KernelDensity(kernel=kernel,bandwidth=bWidth).fit(obs)
    if x is None:x=np.unique(obs).reshape(-1,1)
    if len(x.shape)==1:x=x.reshape(-1,1)
    logProb=kde.score_samples(x) # log(density)
    pdf=pd.Series(np.exp(logProb),index=x.flatten())
    return pdf

def errPDFs(var,eVal,q,bWidth,pts=1000):
    # Fit error
    pdf0=mpPDF(var,q,pts) # theoretical pdf
    pdf1=fitKDE(eVal,bWidth,x=pdf0.index.values) # empirical pdf
    sse=np.sum((pdf1-pdf0)**2)
    return sse

def findMaxEval(eVal,q,bWidth):
    # Find max random eVal by fitting Marcenko’s dist
    out=minimize(lambda *x:errPDFs(*x),.5,args=(eVal,q,bWidth),
    bounds=((1E-5,1-1E-5),))
    if out['success']:var=out["x"][0]
    else:var=1
    eMax=var*(1+(1./q)**.5)**2
    return eMax,var

def denoisedCorr(eVal,eVec,nFacts):
    # Remove noise from corr by fixing random eigenvalues
    eVal_=np.diag(eVal).copy()
    eVal_[nFacts:]=eVal_[nFacts:].sum()/float(eVal_.shape[0]-nFacts)
    eVal_=np.diag(eVal_)
    corr1=np.dot(eVec,eVal_).dot(eVec.T)
    corr1=cov2corr(corr1)
    return corr1
    #- - - - - - - -- - - - - - - - - - - - -- - - - - - - - - - - - - -- - - - - - - - - - - - -- - -
    corr1=denoisedCorr(eVal0,eVec0,nFacts0)
    eVal1,eVec1=getPCA(corr1)
    
def atf(x):
    if isinstance(x,np.ndarray):
        x = x.item()
    return x

def mpPDF(var,q,pts=1000):
    # Marcenko-Pastur pdf
    # q=T/N
    eMin=atf(var*(1-(1/q)**.5)**2)
    eMax=atf(var*(1+(1/q)**.5)**2)

    eVal=np.linspace(eMin,eMax,pts)
    #display(eVal)
    pdf=q/(2*np.pi*var*eVal)*((eMax-eVal)*(eVal-eMin))**.5
    pdf=pd.Series(pdf,index=eVal)
    return pdf

def cov2corr(cov):
    # Derive the correlation matrix from a covariance matrix
    std=np.sqrt(np.diag(cov))
    corr=cov/np.outer(std,std)
    corr[corr<-1],corr[corr>1]=-1,1 # numerical error
    return corr


In [None]:
class Optimiser:
    def __init__(self):
        pass
    def load_feather(self,file1,file2="",T0=0,N0=0,N=48,T=-1,normalize=False):
        self.data = pd.read_feather(file1).iloc[T0:T]
        self.universe = pd.read_feather(file2).iloc[N0:N]
        self.assets = self.data.columns[1+N0:N+1]
        # T x N matrix of closing prices 
        self.prices = np.zeros([len(self.data),len(self.assets)])            
        for i in range(len(self.assets)):
            self.prices[:,i] = self.data[self.assets[i]]
            if normalize == True:
                self.prices[:,i] = self.prices[:,i]/self.prices[0,i]
        self.price_data = self.prices
        
        # mean returns, averaged logarithmic returns
        self.mean_returns = (self.prices[-1,:] - self.prices[0,:])/len(self.prices[:,0])*1000
        self.log_returns = np.mean(np.log(self.prices[1:,:]/self.prices[:-1,:]),axis=0)
        self.naive_cov = np.cov(np.transpose(self.prices))
        #self.robust_cov = MinCovDet().fit(opt.prices).covariance_
        self.cov = self.naive_cov
        self.diffs = self.prices[1:]-self.prices[:-1]
        
    def simulate_portfolios(self, N=100, norm = 1,method="mean"):
        mean = np.zeros(len(self.assets))
        mean = opt.maximum_sharpe()[0]
        #mean = opt.minimum_variance()[0]
        lr = []
        vol = []
        r = np.random.uniform(-1,1,size=[len(self.assets),N])
        for i in range(N):
            r[:,i] = mean + r[:,i]/norm
            lr.append(Optimiser.mean_returns(self.prices,r[:,i],method))
            vol.append(Optimiser.naive_vol(self.prices,r[:,i]))
        data = {
            'returns': lr,
            'volatility': vol
        }
        return pd.DataFrame(data)
    
    def minimum_variance(self):
        var = np.cov(np.transpose(self.prices))
        a = np.ones(len(self.assets))
        x = np.linalg.solve(var,a)
        allocation = x/a.dot(x)
        return allocation, Optimiser.mean_returns(self.prices,allocation,"mean"), Optimiser.naive_vol(self.prices,allocation)
    
    def maximum_sharpe(self):
        var = np.cov(np.transpose(self.prices))
        a = self.mean_returns
        #a = self.log_returns
        x = np.linalg.solve(var,a)
        allocation = x/a.dot(x)
        #allocation = allocation/sum(allocation)
        return allocation, Optimiser.mean_returns(self.prices,allocation,"mean"), Optimiser.naive_vol(self.prices,allocation)
    
    def gradient_descent_opt(self, w0,constraints=[],cost_func = "volatility",stepmethod = ["constant",1e-6],N=100,eps = 1e-10,benchmark = 0):
        V = self.cov
        mu = self.mean_returns
        
        def cost(w):
            if cost_func == "volatility":
                return self.vol(w)
            elif cost_func == "sharpe":
                return -self.sharpe(w)
            elif cost_func == "benchmark":
                return np.linalg.norm(self.prices.dot(w)-benchmark)**2
        
        def calc_grad(w):
            if cost_func == "volatility":
                return 2*V.dot(w)
            elif cost_func == "sharpe":
                return -(mu-mu.dot(w)*V.dot(w)/self.vol(w))/np.sqrt(self.vol(w))
            elif cost_func == "benchmark":
                return 2*(self.prices.dot(w)-benchmark).dot(self.prices)
            
        def projection(x):
            if "longonly1" in constraints:
                for j in range(len(x)):
                    x[j] = abs(x[j])
            if "longonly2" in constraints:
                for j in range(len(x)):
                    x[j] = max(x[j],0)
            
            if "shortonly2" in constraints:
                for j in range(len(x)):
                    x[j] = min(x[j],0)
            
            if "sum1" in constraints:
                x = x/sum(x)
                
            if "sum-1" in constraints:
                x = -x/sum(x)
                
            return x
        
        i = 0
        w = [projection(w0)]
        grad = 10
        while i < N and np.linalg.norm(grad) > eps:
            wcurrent = w[-1]
            grad = calc_grad(wcurrent)
            p = -grad/np.linalg.norm(grad)
            
            if stepmethod[0] == "constant":
                step = np.linalg.norm(grad)*stepmethod[1]
                
            elif stepmethod[0] == "armijo":
                c = stepmethod[1]
                tau = stepmethod[2]
                m = grad.dot(p)
                step = -c*m
                if m > 0:
                    print("No descent?")
                while cost(wcurrent)-cost(wcurrent+step*p) <= -step*c*m:
                    step = tau*step
                    
            wnext = projection(wcurrent + p*step)
            w.append(wnext)
            i+=1
        return w
    
    def cardinality_opt(self,M,rho,alfa,cost_func,limit_attempts = False, attempts = 20):
        N = len(self.assets)
        T = len(self.prices)
        
        self.mean_returns = (self.prices[-1,:] - self.prices[0,:])/len(self.prices[:,0])*1000
        self.naive_cov = np.cov(np.transpose(self.prices))
        P = self.naive_cov
        q = np.matrix(-alfa*rho*self.mean_returns)
        e = np.matrix(np.ones(N))
        C = P+np.transpose(e).dot(q)+np.transpose(q).dot(e)
        return Optimiser.cardinality_optimise(C,M,limit_attempts = limit_attempts, attempts = attempts)
    
    def cardinality_benchmark(self,benchmark,M):
        N = len(self.assets)
        T = len(self.prices)
        
        P = self.prices.transpose().dot(self.prices)
        q = -np.matrix(benchmark.dot(self.prices))
        e = np.matrix(np.ones(N))
        C = P+np.transpose(e).dot(q)+np.transpose(q).dot(e)
        return Optimiser.cardinality_optimise(C,M)
        
    def l1_cardinality_benchmark(self,benchmark,cardinality,constraints):
        M = cardinality
        N = len(self.assets)
        T = len(self.prices)
        a = [1]
        done = False
        constraints.append("L1")
        ws = np.round(np.array(self.cvx_opt(constraints,"benchmark",benchmark=benchmark,parameter = a[-1])),3)
        nums = np.array([len(ws[ws>0])])
        
        i=0
        while np.all(nums > M) and i < 20:
            a.append(a[-1]*2)
            w = np.round(np.array(self.cvx_opt(constraints,"benchmark",benchmark=benchmark,parameter = a[-1])),3)
            ws = np.append(ws,w)
            nums = np.append(nums,len(w[w>0]))
            i+=1
                
        while np.all(nums < M) and i < 20:
            a.append(a[-1]/2)
            w = np.round(np.array(self.cvx_opt(constraints,"benchmark",benchmark=benchmark,parameter = a[-1])),3)
            ws = np.append(ws,w)
            nums = np.append(nums,len(w[w>0]))
            i+=1
        i = 0
        while not done:
            i+=1
            if len(a) > 1:
                if min(nums[-1],nums[-2]) <= M and M <= max(nums[-1],nums[-2]):
                    a.append((a[-1]+a[-2])/2)
                elif min(nums[-1],nums[-3]) <= M and M <= max(nums[-1],nums[-3]):
                    a[-2] = a[-3]
                    nums[-2] = nums[-3]
                    a.append((a[-1]+a[-3])/2)
                else:
                    a.append(a[-1])
            else:
                a.append(a[-1])
            w = np.round(np.array(self.cvx_opt(constraints,"benchmark",benchmark=benchmark,parameter = a[-1])),3)
            ws = np.append(ws,w)
            nums = np.append(nums,len(w[w>0]))
            
            if nums[-1] == M or i == 10:
                
                # retrieve last time n <= M to continue
                
                lastlower_index = np.where(nums<=M)[0][-1]
                a.append(a[lastlower_index])
                w = np.round(np.array(self.cvx_opt(constraints,"benchmark",benchmark=benchmark,parameter = a[-1])),3)
                ws = np.append(ws,w)
                nums = np.append(nums,len(w[w>0]))
                
                j = 0
                while nums[-1] <= M and j <= 20:
                    f = 0.99
                    w = np.round(np.array(self.cvx_opt(constraints,"benchmark",benchmark=benchmark,parameter = a[-1]*f)),3)
                    if len(w[w>0]) <= M:
                        ws = np.append(ws,w)
                        nums = np.append(nums,len(w[w>0]))
                        a.append(a[-1]*f)
                    j+=1
                done = True
        ws = ws.reshape(len(ws)//len(self.assets),len(self.assets))
        self.benchmark = benchmark
        #return ws, nums, a    
        return self.cvx_opt(cost_func="benchmark",constraints=constraints,benchmark=benchmark,sparsity=ws[-1])
        
    def monte_carlo_cardinality(self,constraints,cost_func,selection = [], benchmark = [], adjust = (0,0),debug=False, parameter = 0,minreturn = 0,maxsum=1,sparsity = [], cardinality = 0, time = [0,None], minallocation = 0,simulations=100):
        self.benchmark = benchmark
        N = len(self.assets)
        bestw = 0
        lowestcost = 1e10
        n = np.array(range(N))
        
        for j in range(simulations):
            sample = np.random.choice(n, M)
            sparsity = np.array([int(i in sample) for i in range(N)])
            w = self.cvx_opt(constraints,cost_func,selection, benchmark, adjust,debug,parameter,minreturn,maxsum,sparsity,cardinality,time,minallocation)
            c = self.cost(w,cost_func)
            lowestcost = min(lowestcost,c)
            if c == lowestcost:
                bestw = w
        return bestw
    
    def brute_cardinality(self,constraints,cost_func,selection = [], benchmark = [], adjust = (0,0),debug=False, parameter = 0,minreturn = 0,maxsum=1,sparsity = [], cardinality = 0, time = [0,None], minallocation = 0):
        N = len(self.assets)
        def power_set(input):
        # returns a list of all subsets of the list a
            if (len(input) == 0):
                return [[]]
            main_subset = [ ]
            for small_subset in power_set(input[1:]):
                main_subset += [small_subset]
                main_subset += [[input[0]] + small_subset]
            return main_subset
        sets = power_set(np.arange(0,N))
        exactsets = []
        for i in sets:
            if len(i) == cardinality:
                exactsets.append(i)
        
        curmin = 1e10
        curbestw = np.zeros(N)
        for I in exactsets:
            sparsity = np.array([int(i in I) for i in range(N)])
            w = self.cvx_opt(constraints,cost_func,selection, benchmark, adjust,debug,parameter,minreturn,maxsum,sparsity,cardinality,time,minallocation)
            cost = self.cost(w,cost_func)
            if cost <= curmin:
                curmin = cost
                curbestw = w
        return curbestw
    
    def greedy_cardinality(self,constraints,cost_func,selection = [], benchmark = [], adjust = (0,0),debug=False, parameter = 0,minreturn = 0,maxsum=1,sparsity = [], cardinality = 0, quick = True, time = [0,None],min_change = 0.0001,strict=False, minallocation = 0):
        curmin = 1e10
        curminindex = 0
        N = len(self.assets)
        n = set(np.arange(N))
        I = np.array([])
        update = True
        while I.size < cardinality and update == True:
            update = False
            if strict:
                curmin = 1e10
            for i in n:
                I_ = np.append(I,i)
                sparsity = np.array([int(i in I_) for i in range(N)])
                wi = self.cvx_opt(constraints,cost_func,selection, benchmark, adjust,debug,parameter,minreturn,maxsum,sparsity,cardinality,time,minallocation)
                costi = self.cost(wi, cost_func)
                if costi <= curmin*(1-min_change) and self.returns(wi) >= minreturn:
                    curmin = costi
                    curminindices = I_
                    addedindex = i
                    update = True
            if update:
                I = curminindices
                n.remove(addedindex)
                if debug:
                    print("Current best indices: " + str(np.sort(I)))
                    print("Current lowest cost: " + str(curmin))
            if quick == False and update:
                while update == True:
                    update = False
                    for k in I:
                        for i in n:
                            I_ = I[I!=k]
                            I_ = np.append(I_,i)
                            sparsity = np.array([int(i in I_) for i in range(N)])
                            wi = self.cvx_opt(constraints,cost_func,selection, benchmark, adjust,debug,parameter,minreturn,maxsum,sparsity,cardinality,time,minallocation)
                            costi = self.cost(wi, cost_func)
                            if costi < curmin*(1-min_change) and self.returns(wi) >= minreturn:
                                curmin = costi
                                addedindex = i
                                removedindex = k
                                curminindices = I_
                                update = True
                    if update:
                        n.remove(addedindex)
                        n.add(removedindex)
                        I = curminindices
                        if debug:
                            print("Current best indices: " + str(np.sort(I)))
                            print("Current lowest cost: " + str(curmin))
                update = True
        update = True
        
        while update == True:
            update = False
            for k in I:
                for i in n:
                    I_ = I[I!=k]
                    I_ = np.append(I_,i)
                    sparsity = np.array([int(i in I_) for i in range(N)])
                    wi = self.cvx_opt(constraints,cost_func,selection, benchmark, adjust,debug,parameter,minreturn,maxsum,sparsity,cardinality,time,minallocation)
                    costi = self.cost(wi, cost_func)
                    if costi < curmin*(1-min_change) and self.returns(wi) >= minreturn:
                        curmin = costi
                        addedindex = i
                        removedindex = k
                        curminindices = I_
                        update = True
            if update:
                n.remove(addedindex)
                n.add(removedindex)
                I = curminindices
                if debug:
                    print("Current best indices: " + str(np.sort(I)))
                    print("Current lowest cost: " + str(curmin))
        sparsity = np.array([int(i in I) for i in range(N)])
        return self.cvx_opt(constraints,cost_func,selection, benchmark, adjust,debug,parameter,minreturn,maxsum,sparsity,cardinality,time,minallocation)
            
        
    def cvx_opt(self,constraints,cost_func,selection = [], benchmark = [], adjust = (0,0),debug=False, parameter = 0,minreturn = 0,maxsum=1,sparsity = [], cardinality = 0, time = [0,None], minallocation = 0):
        #expected return, sharpe, VaR, MAD
        #cost function
        if len(benchmark) > 0:
            self.benchmark = benchmark[time[0]:time[1]]
        self.prices = self.price_data[time[0]:time[1],:]
        self.fullassets = self.assets
        self.fullprices = self.prices
        if len(sparsity) > 0:
            indices = []
            for i in range(len(self.assets)):
                if np.round(sparsity[i],5) != 0:
                    indices.append(i)
            if len(indices) == 1:
                return sparsity/max(sparsity)
            self.assets = self.fullassets[indices]
            self.prices = self.fullprices[:,indices]
            if adjust[1] != 0:
                adjust = (adjust[0][indices],adjust[1])
        
        
        N = len(self.assets)
        T = len(self.prices)
        
        
        self.mean_returns = (self.prices[-1,:] - self.prices[0,:])/len(self.prices[:,0])*1000
        self.naive_cov = np.cov(np.transpose(self.prices))
        self.cov = self.naive_cov
        self.diffs = self.prices[1:]-self.prices[:-1]
        
        if cost_func == "volatility":
            Q = np.cov(np.transpose(self.prices))
            q = np.zeros(N)
        elif cost_func == "emp_volatility":
            Q = EmpiricalCovariance().fit(self.prices).covariance_
            q = np.zeros(N)
        elif cost_func == "robust_volatility":
            Q = MinCovDet().fit(self.prices).covariance_
            q = np.zeros(N)
        elif cost_func == "denoised_volatility":
            if N >= 3:
                Q = Optimiser.denoised_cov(self.prices)
            else:
                Q = np.cov(np.transpose(self.prices))
            q = np.zeros(N)
        elif cost_func == "benchmark":
            Q = self.prices.transpose().dot(self.prices)
            q = -self.benchmark.dot(self.prices)
        elif cost_func == "expected_return":
            Q = np.zeros([N,N])
            q = -self.mean_returns
        elif cost_func == "sharpe":
            Q = np.zeros([N+1,N+1])
            Q[:N,:N] = self.cov
            q = np.zeros(N+1)
        elif cost_func == "MAD":
            T = T-1
            Q = np.zeros([N+T,N+T])
            q = np.zeros(N+T)
            q[N:] = np.ones(T)
        
        #constraints
        G = np.array([])
        h = np.array([])
        A = np.array([])
        b = np.array([])
        if "longonly" in constraints:
            G = np.append(G,-np.eye(len(self.assets)))
            h = np.append(h,np.zeros(len(self.assets)))
            
        if "sum1" in constraints:
            A = np.append(A,np.ones(len(self.assets)))
            b = np.append(b,1)
        
        if "minreturn" in constraints:
            G = np.append(G,-self.mean_returns)
            h = np.append(h,-minreturn)
        
        if "return" in constraints:
            A = np.append(A,self.mean_returns)
            b = np.append(b,minreturn)
        
        if "maxsum" in constraints:
            G = np.append(G,np.ones(len(self.assets)))
            h = np.append(h,maxsum)
        
        if "select" in constraints:
            C = np.zeros([len(selection),len(self.assets)])
            for j in range(len(selection)):
                neg = 0
                if selection[j][1][0] == "!":
                    selection[j][1] = selection[j][1][1:]
                    neg = 1
                for i in range(len(self.assets)):
                    if self.universe.iloc[i][selection[j][0]]==selection[j][1]:
                        C[j,i] = 1
                if neg == 1:
                    C[j,:] = np.abs(C[j,:]-1)
            for j in range(len(selection)):
                if selection[j][2] == "=":
                    A = np.append(A,C[j,:])
                    b = np.append(b,selection[j][3])
                if selection[j][2] == "<=":
                    G = np.append(G,C[j,:])
                    h = np.append(h,selection[j][3])
                if selection[j][2] == ">=":
                    G = np.append(G,-C[j,:])
                    h = np.append(h,-selection[j][3])
        
        if minallocation > 0:
            G = np.append(G,-np.eye(len(self.assets)))
            h = np.append(h,-np.ones(len(self.assets))*minallocation)
        
        # not compatible with sharpe
        if "adjust" in constraints:
            Q = Q + np.eye(len(self.assets))*adjust[1]
            q = q + -2*adjust[1]*np.array(adjust[0])
        
        G = G.reshape(-1,len(self.assets))
        A = A.reshape(-1,len(self.assets))
        
        if cost_func == "sharpe":
            A_ = np.zeros([len(A)+1,len(A[0])+1])
            b_ = np.zeros(len(A)+1)
            A_[:len(A),:len(A[0])] = A
            A_[:len(A),len(A[0])] = -b
            A_[len(A),:len(A[0])] = self.mean_returns
            b_[len(A)] = 1
            
            G_ = np.zeros([len(G)+1,len(G[0])+1])
            h_ = np.zeros(len(G)+1)
            G_[:len(G),:len(G[0])] = G
            G_[:len(G),len(G[0])] = -h
            G_[len(G),len(G[0])] = -1
            
            A = A_
            b = b_
            G = G_
            h = h_
        
        if cost_func == "MAD":
            A_ = np.zeros([len(A),N+T])
            A_[:,:N] = A
            G_ = np.zeros([len(G)+2*T,N+T])
            h_ = np.zeros(len(G)+2*T)
            G_[:len(G),:N] = G
            h_[:len(G)] = h
            for i in range(T):
                G_[len(G)+2*i,:N] = self.diffs[i]-self.mean_returns
                G_[len(G)+2*i,N+i] = -1
                G_[len(G)+2*i+1,:N] = -self.diffs[i]+self.mean_returns
                G_[len(G)+2*i+1,N+i] = -1
            A = A_
            G = G_
            h = h_
            
        if "L1" in constraints:
            Q_ = np.zeros([len(Q)+N,len(Q)+N])
            Q_[:len(Q),:len(Q)] = Q
            q_ = np.zeros(len(q)+N)
            q_[:len(q)] = q
            q_[len(q):] = parameter*np.ones(N)
            
            A_ = np.zeros([len(A),2*N])
            A_[:,:N] = A
            G_ = np.zeros([len(G)+2*N,2*N])
            h_ = np.zeros(len(G)+2*N)
            G_[:len(G),:N] = G
            h_[:len(G)] = h
            for i in range(N):
                G_[len(G)+2*i,N+i] = -1  
                G_[len(G)+2*i,i] = 1
                G_[len(G)+2*i+1,N+i] = -1  
                G_[len(G)+2*i+1,i] = -1
            A = A_
            G = G_
            h = h_
            Q = Q_
            q = q_
            
        if "L2" in constraints:
            Q = Q + np.eye(len(Q))*parameter
        """Q = np.matrix(Q)
        q = np.matrix(q)
        G = np.matrix(G)
        h = np.matrix(h)
        A = np.matrix(A)
        b = np.matrix(b)"""
        
        
        Q = matrix(Q)
        q = matrix(q)
        G = matrix(G)
        h = matrix(h)
        A = matrix(A)
        b = matrix(b)
        
        
        if cost_func in ["MAD","expected_return"]:
            sol = solvers.lp(q,G,h,A,b)
        else:
            #for i in (Q, q, G, h, A, b):
            #    print(i)
            sol = solvers.qp(Q, q, G, h, A, b)
        
        
        
        
        #if debug == True:
        #    solution = sol
        if cost_func == "sharpe":
            solution = np.array(sol['x'])[:-1,0]/np.array(sol['x'])[-1,0]
        elif cost_func == "MAD" or "L1" in constraints:
            solution = np.array(sol['x'])[:N,0]
        else:
            solution = np.array(sol['x'])[:,0]
        
        if len(sparsity) > 0:
            sol = np.zeros(len(self.fullassets))
            for i in range(len(indices)):
                sol[indices[i]] = solution[i]
            solution = sol
        self.assets = self.fullassets
        self.prices = self.fullprices
        
        if "cardinality" in constraints:
            reduced_w = np.zeros(len(solution))
            for i in range(cardinality):
                reduced_w[np.argmax(solution)] = np.max(solution)
                solution[np.argmax(solution)] = 0
            if "sum1" in constraints:
                return reduced_w/sum(reduced_w)
            return reduced_w
        return solution
        
    def cost(self,x,cost_func,time=[0,None]):
        if cost_func == "volatility":
            Q = np.cov(np.transpose(self.prices))
            return x.dot(Q).dot(np.transpose(x))
        elif cost_func == "emp_volatility":
            Q = EmpiricalCovariance().fit(self.prices).covariance_
            return x.dot(Q).dot(np.transpose(x))
        elif cost_func == "robust_volatility":
            Q = MinCovDet().fit(self.prices).covariance_
            return x.dot(Q).dot(np.transpose(x))
        elif cost_func == "denoised_volatility":
            if len(x) >= 3:
                Q = Optimiser.denoised_cov(self.prices)
            else:
                Q = np.cov(np.transpose(self.prices))
            return x.dot(Q).dot(np.transpose(x))
        elif cost_func == "benchmark":
            return np.linalg.norm(self.prices.dot(x)-self.benchmark)
        elif cost_func == "expected_return":
            Q = np.zeros([N,N])
            q = -self.mean_returns
            x.dot(Q).dot(np.transpose(x))+2*x.dot(q)
        elif cost_func == "MAD":
            self.diffs = self.prices[1:]-self.prices[:-1]
            #display([abs(self.diffs[i]-self.mean_returns.dot(x)) for i in range(len(self.assets)-1)])
            return sum([abs(self.diffs[i].dot(x)-self.mean_returns.dot(x)) for i in range(len(self.assets)-1)])
        
    def lowerbound(C,K):
            alfa = np.min(C)
            d = sorted(np.diag(C))
            if alfa in d:
                return alfa
            K = min(K,len(d))
            return alfa + 1/np.sum(1/(d[:K]-np.ones(K)*alfa))
        
    def cardinality_optimise(C,M,limit_attempts = True, attempts = 20):
        M = M+1
        def submatrix(M,indices):
            return M[np.ix_(sorted(indices),sorted(indices))]

        def C_I(C,I):
            res = np.zeros([len(C),len(C)])
            for i in I:
                for j in I:
                    res[i,j] = C[i,j]
            return res

        def is_pd(K):
            try:
                np.linalg.cholesky(K)
                return 1 
            except np.linalg.linalg.LinAlgError as err:
                if 'Matrix is not positive definite' in err.args[0]:
                    return 0
                else:
                    raise

        # optimise x'Cx under the constraint supp(x) <= M
        n = len(C)
        N = np.arange(n)
        # index sets
        S_next = [[i] for i in N]
        mins = [np.min(np.diag(C))]
        minargs = [tuple([np.argmin(np.diag(C))])]
        minvecs = [[1]]
        
        # iterate
        for i in range(2,M):
            update = False
            S = S_next.copy()
            S_next = []
            curmin = mins[-1]
            curminargs = minargs[-1]
            curminvec = minvecs[-1]
            done = set()
            for I in S:
                J = np.setdiff1d(N,I)
                # find pos def candidates
                cand = J
                for j in J:
                    k = 0
                    while k < len(I):
                        if is_pd(submatrix(C,[j,I[k]])):
                            k+=1
                        else:
                            k = len(I)
                            cand = cand[cand!=j]
                lower = Optimiser.lowerbound(submatrix(C,np.union1d(cand,I)),M)
                if lower < curmin:
                    if limit_attempts:
                        cand = np.random.choice(cand,min(attempts,len(cand)),replace=False)
                    for j in cand:
                        # find next point
                        I_next = tuple(np.sort(np.append(I,j)))
                        if I_next not in done:
                            done.add(I_next)
                            C_I = submatrix(C,I_next)
                            e = np.ones(len(I_next))
                            xmin = (e.dot(np.linalg.inv(C_I)).dot(np.transpose(e)))**(-1)*(np.linalg.inv(C_I)).dot(np.transpose(e))
                            if np.all((xmin>0)):
                                xcost = xmin.dot(C_I).dot(np.transpose(xmin))
                                #display([I,xcost,curmin])
                                curmin = min(curmin,xcost)
                                if curmin == xcost:
                                    curminargs = I_next
                                    curminvec = xmin
                                    update = True
                                S_next.append(I_next)
                else:
                    #display("lower bound used")
                    pass

            if update:
                mins.append(np.array(curmin))
                minargs.append(curminargs)
                minvecs.append(np.array(curminvec))
            #display([curmin,curminargs,curminvec])
        
        w = np.zeros(n)
        #display("minargs: " + str(minargs) + ". minvecs:"+ str(minvecs))
        for i in range(len(minargs[-1])):
            w[minargs[-1][i]] = minvecs[-1][0][i]
        return w
   
    
    def time_subset_portfolio(self,t0,dt,constraints,cost_func,selection = [], benchmark = 0):
        self.prices_temp = self.prices
        self.prices = self.prices_temp[t0:t0+dt]
        optw = opt.cvx_opt(constraints,cost_func,selection)
        self.prices = self.prices_temp
        return optw
    
    def portfolio_sum(self,allocation,selection):
        s = 0
        for i in range(len(allocation)):
            if self.universe.iloc[i][selection[0]] == selection[1]:
                s += allocation[i]
        return s
    # volatility of allocation
    def vol(self,allocation):
        return allocation.dot(self.cov).dot(allocation)
    
    # sharpe ratio of allocation
    def sharpe(self,allocation):
        return allocation.dot(self.mean_returns)/np.sqrt(self.vol(allocation))
    
    def returns(self, allocation):
        self.mean_returns = (self.prices[-1,:] - self.prices[0,:])/len(self.prices[:,0])*1000
        return allocation.dot(self.mean_returns)
    
    def show_returns(self, allocation):
        pass
    
    def view_portfolio(self,allocation,rounding = 5, zeros = False):
        df = self.universe.filter(['TICKER','NAME','SECTOR','LOCATION'], axis=1)
        df["ALLOCATION"] = np.round(allocation,rounding)
        if zeros == False:
            df = df[df.ALLOCATION != 0]
        print("returns:" + str(opt.returns(allocation)))
        print("variance:" + str(opt.cost(allocation,"volatility")))

        return df
    
    def mad(self,data):
        m = np.mean(data)
        return np.mean(abs(m-data))
    
    def running_portfolio_returns(self,allocations):
        returns = np.zeros(len(self.prices))
        for i in range(1,len(returns)):
            returns[i] = returns[i-1] + (self.prices[i]-self.prices[i-1]).dot(allocations[i])
        return returns
    # Average return from historical data given allocation of assets
    @staticmethod
    def mean_returns(prices,allocation,method="mean"):
        if method == "mean":
            final_value = allocation.dot(prices[-1,:])
            initial_value = allocation.dot(prices[1,:])
            return (final_value-initial_value)/len(prices[:,0])
        elif method == "dailylog":
            values = prices.dot(allocation)
            return np.mean(np.log(values[1:]/values[:-1]))
        elif method == "fulllog":
            final_value = allocation.dot(prices[-1,:])
            initial_value = allocation.dot(prices[1,:])
            return np.log(final_value/initial_value)/len(prices[:,0])
    
    # Naive volatility
    @staticmethod
    def naive_vol(prices,allocation):
        var = np.cov(np.transpose(prices))
        return allocation.dot(var).dot(allocation)
    
    @staticmethod
    def denoised_cov(prices):
        T = len(prices)
        N = len(prices[0,:])
        corr0 = np.corrcoef(np.transpose(prices))
        eVal0,eVec0=getPCA(corr0)
        sigma = np.sqrt(np.diag(np.cov(np.transpose(prices))))
        q = T/N
        eMax0,var0=findMaxEval(np.diag(eVal0),q,bWidth=.01)
        nFacts0=max(eVal0.shape[0]-np.diag(eVal0)[::-1].searchsorted(eMax0),1)
        corr1 = denoisedCorr(eVal0,eVec0,nFacts0)
        return corr1*(np.outer(sigma,sigma))

    
    

In [None]:
opt = Optimiser()
optbench = Optimiser()
T = 2000

opt.load_feather("eurostoxx_prices","eurostoxx_universe",T=T,N=15,normalize=True)
optbench.load_feather("SXXP_Benchmark_Prices","SXXP_Benchmark_Prices",T=T,normalize=True)

In [None]:
len(opt.assets)

In [None]:
fig = px.line(opt.prices)

In [None]:
fig.write_image("prices.png")

## COMPARISON

In [None]:
opt = Optimiser()
optbench = Optimiser()
T = 1000

opt.load_feather("eurostoxx_prices","eurostoxx_universe",T=T,N=48,normalize=True)
optbench.load_feather("SXXP_Benchmark_Prices","SXXP_Benchmark_Prices",T=T,normalize=True)

In [None]:
opt.benchmark

In [None]:
len(opt.benchmark)

In [None]:
bench = optbench.prices[:,0]
w = opt.l1_cardinality_benchmark(constraints=["longonly"],cardinality=7,benchmark=bench)
opt.cost(w,"benchmark")

In [None]:
opt.view_portfolio(opt.cvx_opt(cost_func="benchmark",constraints=["longonly"],benchmark=bench))

In [None]:
### MINVAR
Ms = [3,4,5,6,7,8]
a = len(Ms)
cost = []
runtime = []
for M in Ms:
    display(M)
    times = np.zeros(5)
    costs = np.zeros(5)
    
    tic = time.perf_counter()
    #wbrute = opt.brute_cardinality(cost_func="volatility",constraints=["longonly","sum1"],cardinality=M)
    toc = time.perf_counter()
    times[0] = toc-tic
    #costs[0] = opt.cost(wbrute,"volatility")
    costs[0] = 1
    
    tic = time.perf_counter()
    wrandom = opt.monte_carlo_cardinality(cost_func="volatility",constraints=["longonly","sum1"],cardinality=M,simulations=1000)
    toc = time.perf_counter()
    times[1] = toc-tic
    costs[1] = opt.cost(wrandom,"volatility")

    tic = time.perf_counter()
    wincset = opt.cardinality_opt(M,0,0,cost_func="volatility",limit_attempts=False)
    toc = time.perf_counter()
    times[2] = toc-tic
    costs[2] = opt.cost(wincset,"volatility")

    tic = time.perf_counter()
    wincsetfast = opt.cardinality_opt(M,0,0,cost_func="volatility",limit_attempts=True,attempts=5)
    toc = time.perf_counter()
    times[3] = toc-tic
    costs[3] = opt.cost(wincsetfast,"volatility")

    tic = time.perf_counter()
    wgreedy = opt.greedy_cardinality(cost_func="volatility",constraints=["longonly","sum1"],cardinality=M)
    toc = time.perf_counter()
    times[4] = toc-tic
    costs[4] = opt.cost(wgreedy,"volatility")
    cost.append(costs)
    runtime.append(times)

In [None]:
### BENCH
Ms = [5,10,15,20,25,30]
a = len(Ms)
cost = []
runtime = []
bench = np.array(optbench.prices[:,0])
for M in Ms:
    display(M)
    times = np.zeros(4)
    costs = np.zeros(4)
    
    tic = time.perf_counter()
    #wbrute = opt.brute_cardinality(cost_func="benchmark",constraints=[""],cardinality=M,benchmark=bench)
    toc = time.perf_counter()
    times[0] = toc-tic
    #costs[0] = opt.cost(wbrute,"benchmark")
    costs[0] = 10
    
    tic = time.perf_counter()
    wrandom = opt.monte_carlo_cardinality(cost_func="benchmark",constraints=[""],cardinality=M,simulations=1000,benchmark=bench)
    toc = time.perf_counter()
    times[1] = toc-tic
    costs[1] = opt.cost(wrandom,"benchmark")

    tic = time.perf_counter()
    wl1 = opt.l1_cardinality_benchmark(constraints=[""],cardinality=M,benchmark=bench)
    toc = time.perf_counter()
    times[2] = toc-tic
    costs[2] = opt.cost(wl1,"benchmark")

    tic = time.perf_counter()
    wgreedy = opt.greedy_cardinality(cost_func="benchmark",constraints=[""],cardinality=M,benchmark=bench)
    toc = time.perf_counter()
    times[3] = toc-tic
    costs[3] = opt.cost(wgreedy,"benchmark")
    cost.append(costs)
    runtime.append(times)

In [None]:
cost

In [None]:
wgreedy = opt.greedy_cardinality(cost_func="volatility",constraints=["sum1"],cardinality=4,debug=True)

In [None]:
runtime

In [None]:
relcost = []
for i in cost:
    relcost.append((i-min(i))/min(i))
relcost

In [None]:
display(runtime,cost)

In [None]:
for i in range(len(runtime)):
    text = "&"
    for j in range(len(relcost[i])):
        text = text + str(np.round(relcost[i][j],3)) + " (" + str(np.round(runtime[i][j],1)) + "s)" + "&"
    print(text[:-1])


In [None]:
2317/60

## efficient frontier

In [None]:
opt = Optimiser()
optbench = Optimiser()
T = 1000
opt.load_feather("eurostoxx_prices","eurostoxx_universe",T=T,N0=0,N=4,normalize=True)
optbench.load_feather("SXXP_Benchmark_Prices","SXXP_Benchmark_Prices",T=T,normalize=True)

In [None]:
N = 100
weights = np.zeros([N,len(opt.assets)])

returns = np.linspace(0,2,N)

costs = np.zeros(N)

constraints = ["sum1","minreturn"]
for i in range(len(returns)):
    weights[i,:] = opt.cvx_opt(cost_func="volatility",constraints=constraints,minreturn = returns[i])

    costs[i] = opt.cost(weights[i,:],"volatility")
    returns[i] = opt.returns(weights[i,:])
 
    

In [None]:
sparsities = [[1,1,0,0],[1,0,1,0],[1,0,0,1],[0,1,1,0],[0,1,0,1],[0,0,1,1]]

N = 1000
fullcosts = np.zeros([len(sparsities),N])
fullreturns = np.zeros([len(sparsities),N])

for j in range(len(sparsities)):
    weights = np.zeros([N,len(opt.assets)])

    returns = np.linspace(0,2,N)

    costs = np.zeros(N)

    constraints = ["sum1","minreturn"]
    for i in range(len(returns)):
        weights[i,:] = opt.cvx_opt(cost_func="volatility",constraints=constraints,minreturn = returns[i],sparsity=sparsities[j])
        costs[i] = opt.cost(weights[i,:],"volatility")
        returns[i] = opt.returns(weights[i,:])
    fullcosts[j,:] = costs
    fullreturns[j,:] = returns


In [None]:
lowestcost = np.zeros(N)
for i in range(len(fullcosts[0,:])):
    lowestcost[i] = np.min(fullcosts[:,i])

In [None]:
fig = px.line(x=fullcosts[0,:],y=fullreturns[0,:],log_x=False)
fig.add_scatter(x=fullcosts[1,:],y=fullreturns[1,:],mode="lines",marker = {'color' : 'blue'})
fig.add_scatter(x=fullcosts[2,:],y=fullreturns[2,:],mode="lines",marker = {'color' : 'blue'})
fig.add_scatter(x=fullcosts[3,:],y=fullreturns[3,:],mode="lines",marker = {'color' : 'blue'})
fig.add_scatter(x=fullcosts[4,:],y=fullreturns[4,:],mode="lines",marker = {'color' : 'blue'})
fig.add_scatter(x=fullcosts[5,:],y=fullreturns[5,:],mode="lines",marker = {'color' : 'blue'})
fig.add_scatter(x=lowestcost,y=fullreturns[3,:],mode="lines",marker = {'color' : 'red'})

fig.update_layout(title="Cardinality Constrained Efficient Frontier",
        font_family="Times New Roman",
        font_color="black",
        font_size=25,
        xaxis_title="Risk",
        yaxis_title="Return",
        autosize=False,
        width=1000,
        height=500,
        showlegend=False
        )
fig.show()

In [None]:
fig = px.scatter(x=costs,y=returns,log_x=False)
fig.update_layout(title="Unconstrained Efficient Frontier",
        font_family="Times New Roman",
        font_color="black",
        font_size=25,
        xaxis_title="Risk",
        yaxis_title="Return"
        )
fig.show()

## Allocation evolution

In [None]:
opt = Optimiser()
optbench = Optimiser()
T = 1000
opt.load_feather("eurostoxx_prices","eurostoxx_universe",T=T,N=48,normalize=True)
optbench.load_feather("SXXP_Benchmark_Prices","SXXP_Benchmarks",T=T,normalize=True)

In [None]:
N = 50
sample = 350
weights = np.zeros([len(opt.assets),N])
cardweights = np.zeros([len(opt.assets),N])
robustweights = np.zeros([len(opt.assets),N])

M = 8
constraints = ["longonly"]
bench = optbench.prices[:,0]

last = len(opt.prices)-sample
dt = last//N
t = np.arange(N)*dt+sample
display(t)
sparsity=np.array([0,0,1,0,0,1,0,0,0,0])
l = 0.0000

for i in range(0,len(t)):
    if i % 10 == 0:
        print(str(i))
    cardweights[:,i] = opt.greedy_cardinality(benchmark=bench,debug=False,cardinality=M,time=[t[i]-sample,t[i]],cost_func="benchmark",constraints=constraints)
    weights[:,i] = opt.cvx_opt(benchmark=bench,time=[t[i]-sample,t[i]],cost_func="benchmark",constraints=constraints)
    #robustweights[:,i] = opt.cvx_opt(adjust = (weights[:,i-1],l),time=[t[i]-sample,t[i]],cost_func="robust_volatility",constraints=constraints,minreturn=0.01)
    

In [None]:
opt.benchmark = bench
opt.prices = opt.price_data
display(opt.cost(cardweights[:,3],"benchmark"))

In [None]:
opt = Optimiser()
optbench = Optimiser()
T = 1000
opt.load_feather("eurostoxx_prices","eurostoxx_universe",T=T,N=10,normalize=True)
optbench.load_feather("SXXP_Benchmark_Prices","SXXP_Benchmarks",T=T,normalize=True)

In [None]:
px.line(card)

In [None]:
fig = px.line(costs)
fig.add_scatter(y=cardcosts)

In [None]:
unconstrained

In [None]:
N = 100
sample = 1000
weights = np.zeros([len(opt.assets),N])
noadjustweights = np.zeros([len(opt.assets),N])
cardweights = np.zeros([len(opt.assets),N])
#robustweights = np.zeros([len(opt.assets),N])

bench = optbench.prices[:,0]
M = 2
constraints = ["sum1","longonly"]
cost="volatility"

last = len(opt.prices)-sample
dt = last//N
t = np.arange(N)*dt+sample
display(t)
sparsity=np.array([0,0,1,0,0,1,0,0,0,0])
l = 0.0000

costs = np.zeros(N)
cardcosts = np.zeros(N)
card = np.zeros(N)

cardweights[:,0] = opt.greedy_cardinality(quick=True,strict=False,minallocation=0.00,debug=False,cardinality=M,time=[t[0]-sample,t[0]],cost_func=cost,constraints=constraints,minreturn=0.01)
weights[:,0] = opt.cvx_opt(time=[t[0]-sample,t[0]],cost_func=cost,constraints=constraints)
#noadjustweights[:,0] = opt.cvx_opt(time=[t[0]-sample,t[0]],cost_func="volatility",constraints=constraints[:2])
for i in range(1,len(t)):
    if i % 5 == 0:
        print(str(i))
    cardweights[:,i] = opt.greedy_cardinality(quick=True,strict=False,minallocation=0.00,adjust = (cardweights[:,i-1],l),debug=False,cardinality=M,time=[t[i]-sample,t[i]],cost_func=cost,constraints=constraints,minreturn=0.01)
    weights[:,i] = opt.cvx_opt(adjust = (weights[:,i-1],l),time=[t[i]-sample,t[i]],cost_func=cost,constraints=constraints)
    costs[i] = opt.cost(weights[:,i],cost)
    cardcosts[i] = opt.cost(cardweights[:,i],cost)
    unconstrained = weights[:,i]
    card[i] = len(unconstrained[unconstrained>1e-6])
    
    #robustweights[:,i] = opt.cvx_opt(adjust = (weights[:,i-1],l),time=[t[i]-sample,t[i]],cost_func="robust_volatility",constraints=constraints,minreturn=0.01)
    #noadjustweights[:,i] = opt.cvx_opt(adjust = (weights[:,i-1],l),time=[t[i]-sample,t[i]],cost_func="volatility",constraints=constraints[:2],minreturn=0.01)

In [None]:
robustweights

In [None]:
N = 100
sample = 350
weights = np.zeros([len(opt.assets),N])
noadjustweights = np.zeros([len(opt.assets),N])
cardweights = np.zeros([len(opt.assets),N])
robustweights = np.zeros([len(opt.assets),N])
denoisedweights = np.zeros([len(opt.assets),N])

bench = optbench.prices[:,0]
M = 2
constraints = ["sum1","longonly"]

last = len(opt.prices)-sample
dt = last//N
t = np.arange(N)*dt+sample
display(t)
sparsity=np.array([0,0,1,0,0,1,0,0,0,0])
l = 0.0000

cost = "volatility"

#cardweights[:,0] = opt.greedy_cardinality(debug=False,cardinality=M,time=[t[0]-sample,t[0]],cost_func=cost,constraints=constraints)
weights[:,0] = opt.cvx_opt(time=[t[0]-sample,t[0]],cost_func=cost,constraints=constraints)
robustweights[:,0] = opt.cvx_opt(time=[t[0]-sample,t[0]],cost_func="robust_volatility",constraints=constraints,minreturn=0.01)
denoisedweights[:,0] = opt.cvx_opt(time=[t[0]-sample,t[0]],cost_func="denoised_volatility",constraints=constraints,minreturn=0.01)

noadjustweights[:,0] = opt.cvx_opt(time=[t[0]-sample,t[0]],cost_func="volatility",constraints=constraints[:2])
for i in range(1,len(t)):
    if i % 5 == 0:
        print(str(i))
    #cardweights[:,i] = opt.greedy_cardinality(adjust = (cardweights[:,i-1],l),debug=False,cardinality=M,time=[t[i]-sample,t[i]],cost_func=cost,constraints=constraints)
    weights[:,i] = opt.cvx_opt(adjust = (weights[:,i-1],l),time=[t[i]-sample,t[i]],cost_func=cost,constraints=constraints)
    robustweights[:,i] = opt.cvx_opt(adjust = (weights[:,i-1],l),time=[t[i]-sample,t[i]],cost_func="robust_volatility",constraints=constraints)
    denoisedweights[:,i] = opt.cvx_opt(adjust = (weights[:,i-1],l),time=[t[i]-sample,t[i]],cost_func="denoised_volatility",constraints=constraints)
    
    #noadjustweights[:,i] = opt.cvx_opt(adjust = (weights[:,i-1],l),time=[t[i]-sample,t[i]],cost_func="volatility",constraints=constraints[:2],minreturn=0.01)

In [None]:
len(opt.assets)

In [None]:
for i in range(len(opt.assets)):
    fig = px.line(labels={"x":"start time (days)","y":"portfolio weight"})
    
    #fig.add_scatter(x=t,y=np.round(cardweights[i,:],5),name="cardinality")
    fig.add_scatter(x=t,y=np.round(weights[i,:],5),name="sample",marker = {'color' : 'blue'})
    fig.add_scatter(x=t,y=np.round(robustweights[i,:],5),name="robust",marker = {'color' : 'red'})
    fig.add_scatter(x=t,y=np.round(denoisedweights[i,:],5),name="denoised",marker = {'color' : 'green'})

    fig.update_layout(title="Asset " + str(i+1) + ": " + str(opt.universe.iloc[i]["NAME"]),
        font_family="Times New Roman",
        font_color="black",
        font_size=25,
        showlegend=True
        ,yaxis_range=[0,1]
        )
    fig.show()
    #opt.view_portfolio(np.round(weights[:,-1],5))
    fig.write_image("3asset" + str(i) +".png")

## " predict "

In [None]:
opt_train = Optimiser()
opt_test = Optimiser()
T_train = 1500
opt_train.load_feather("prices","universe",T0=0,T=T_train,normalize=True)
opt_test.load_feather("prices","universe",T0=T_train,T=T_train+500,normalize=True)

unconstrainedw = opt_train.cvx_opt(cost_func="volatility",constraints=["sum1","minreturn"],minreturn=0)
cconstrainedw = opt_train.greedy_cardinality(cost_func="volatility",constraints=["sum1","minreturn"],minreturn=0,cardinality=10)


In [None]:
fig = px.line(opt_test.prices.dot(unconstrainedw))
fig.add_scatter(y=opt_test.prices.dot(cconstrainedw))

# Optimiser

Different cost functions (full sample)

In [None]:
len(opt.prices)

In [None]:
# return
minvarw =  opt.time_subset_portfolio(2000,500,cost_func="volatility",constraints=["longonly","sum1"])
maxreturnw = opt.cvx_opt(cost_func="expected_return",constraints=["longonly","sum1"])
maxsharpew = opt.cvx_opt(cost_func="sharpe",constraints=["longonly","sum1"])
madw = opt.time_subset_portfolio(2000,500,cost_func="MAD",constraints=["sum1","longonly"])

fig = px.line(opt.prices.dot(minvarw))
fig.add_scatter(y=opt.prices.dot(maxreturnw))
fig.add_scatter(y=opt.prices.dot(maxsharpew))
fig.add_scatter(y=opt.prices.dot(madw))

fig.show()
display(opt.view_portfolio(np.round(minvarw,5)))
display(opt.view_portfolio(np.round(maxreturnw,5)))
display(opt.view_portfolio(np.round(maxsharpew,5)))
display(opt.view_portfolio(np.round(madw,5)))
display(opt.mad(opt.diffs.dot(minvarw)),opt.mad(opt.diffs.dot(madw)))
display(np.var(opt.prices[2000:2500].dot(minvarw)),np.var(opt.prices[2000:2500].dot(madw)))

## greedy

In [None]:
bench = optbench.prices[:,0]

In [None]:
i = 12
wgreed = opt.greedy_cardinality(cost_func="benchmark",constraints=["longonly"],cardinality=i,benchmark=bench)
w = opt.cvx_opt(cost_func="benchmark",constraints=["longonly"],benchmark=bench)
ws, nums, a = opt.l1_cardinality_benchmark(bench, M = i, constraints=["longonly"])
condw = np.round(opt.cvx_opt(cost_func="benchmark",constraints=["sparsity_profile","longonly"],sparsity=ws[-1][:],benchmark = bench),5)


In [None]:
display(opt.cost(wgreed,cost_func="benchmark"))
display(opt.cost(condw,cost_func="benchmark"))
display(opt.cost(w,cost_func="benchmark"))

In [None]:
wgreed = opt.greedy_cardinality(cost_func="volatility",constraints=["longonly","sum1"],cardinality=5,quick=False)
#wgreedq = opt.greedy_cardinality(cost_func="volatility",constraints=["longonly","sum1"],cardinality=5,quick=True)

In [None]:
display(opt.cost(wgreed,"volatility"),opt.cost(wgreedq,"volatility"))

In [None]:
display(opt.view_portfolio(wgreed))

In [None]:
display(opt.view_portfolio(w))

In [None]:
str([1,2])

In [None]:
w = opt.cvx_opt(cost_func="volatility",constraints=["sum1","minreturn","longonly"],minreturn=1e-8)

In [None]:
def write(filename,number):
    with open(filename + '.txt', 'a') as f:
        f.write(str(number) + "\n")

In [None]:
times

In [None]:
opt = Optimiser()
optbench = Optimiser()
T = 1000
opt.load_feather("eurostoxx_prices","eurostoxx_universe",T=T,N=48,normalize=True)
optbench.load_feather("SXXP_Benchmark_Prices","SXXP_Benchmark_Prices",T=T,normalize=True)

In [None]:
returns = np.linspace(0,0.7,250)
M = [3,4,5,7,10,15]
costs = np.zeros([len(M)+1,len(returns)])
actual_returns = np.zeros([len(M)+1,len(returns)])
times = np.zeros([len(M)+1,len(returns)])
for i in range(len(returns)):
    display(i)
    for j in range(len(M)):
        tic = time.perf_counter()
        wgreed = opt.greedy_cardinality(cost_func="volatility",constraints=["sum1","return"],cardinality=M[j],minreturn=returns[i],quick=False)
        w = opt.cvx_opt(cost_func="volatility",constraints=["sum1","return"],minreturn=returns[i])
        costs[j,i] = opt.cost(wgreed,"volatility")
        actual_returns[j,i] = opt.returns(wgreed)
        toc = time.perf_counter()
        times[j,i] = toc-tic
        
        write("M"+str(M[j])+"cost",costs[j,i])
        write("M"+str(M[j])+"time",times[j,i])
        write("M"+str(M[j])+"return",actual_returns[j,i])
        
    costs[-1,i] = opt.cost(w,"volatility")
    actual_returns[-1,i] = opt.returns(w)

In [None]:
returns = np.linspace(0,1,100)
N = 6
returns = returns[:N]
M = np.arange(3,12)
costs = np.zeros([len(M)+1,len(returns)])
actual_returns = np.zeros([len(M)+1,len(returns)])
times = np.zeros([len(M)+1,len(returns)])
for i in range(len(M)):
    #N = len(np.loadtxt("M"+str(M[i])+"cost.txt", delimiter='\n', skiprows=0, dtype=float))
    costs[i,:N] = np.loadtxt("M"+str(M[i])+"cost.txt", delimiter='\n', skiprows=0, dtype=float)[:N]
    actual_returns[i,:N] = np.loadtxt("M"+str(M[i])+"return.txt", delimiter='\n', skiprows=0, dtype=float)[:N]
    times[i,:N] = np.loadtxt("M"+str(M[i])+"time.txt", delimiter='\n', skiprows=0, dtype=float)[:N]
    

In [None]:
returns[1]

In [None]:
px.line(x=M,y=[np.mean(times[i,:]) for i in range(len(times)-1)])

In [None]:
wgreed = opt.greedy_cardinality(strict=True,minallocation=.001,cost_func="volatility",constraints=["sum1","minreturn","longonly"],cardinality=10,minreturn=0,debug=True)

In [None]:
wgreed = wgreed = opt.greedy_cardinality(cost_func="volatility",constraints=["sum1","minreturn","longonly"],cardinality=10,minreturn=0.5,debug=True)

In [None]:
wgreed = opt.greedy_cardinality(cost_func="volatility",constraints=["return","sum1"],cardinality=4,minreturn=0.35,debug=True,quick=True)
display(opt.view_portfolio(wgreed))

In [None]:
wgreed = opt.greedy_cardinality(cost_func="volatility",constraints=["return","sum1"],cardinality=5,minreturn=0.35,debug=True,quick=False)
display(opt.view_portfolio(wgreed))

In [None]:
(0.004160279510962817-0.004160279528396642)/0.004160279528396642

In [None]:
fig = px.line(x=costs[-1,:],y=actual_returns[-1,:])
for i in range(len(M)):
    fig.add_scatter(x=costs[i,:],y=actual_returns[i,:],mode="lines",name='M = '+str(M[i]),marker=dict(size=5,symbol="cross"))
fig.update_layout(title="Approximate Efficient Frontiers",
        font_family="Times New Roman",
        font_color="black",
        font_size=25,
        xaxis_title="Risk",
        yaxis_title="Return",
        autosize=False,
        width=1000,
        height=500,
        showlegend=True
        )
fig.show()

In [None]:
wgreed = opt.greedy_cardinality(cost_func="volatility",constraints=["longonly","sum1","minreturn"],cardinality=5,minreturn=20.05e-6)

In [None]:
wgreed = opt.greedy_cardinality(strict=True,cost_func="volatility",constraints=["sum1","minreturn","longonly"],cardinality=10,minreturn=1e-6,debug=True,minallocation=1e-3)

In [None]:
wgreed = opt.greedy_cardinality(cost_func="volatility",constraints=["sum1","minreturn"],cardinality=3,minreturn=0,debug=True)

In [None]:
costs[0,3]

In [None]:
display(opt.view_portfolio(wgreed))

In [None]:
display(opt.view_portfolio(w))
display(opt.view_portfolio(wgreed))


## monte carlo

In [None]:
times = []
for i in range(500):
    tic = time.perf_counter()
    w = opt.cvx_opt(cost_func="volatility",constraints=["longonly","sum1"])
    toc = time.perf_counter()
    times.append(toc-tic)
np.mean(times)

In [None]:
scipy.special.comb(40,10,exact=True)

In [None]:
bench = optbench.prices[:,0]

In [None]:
sims = 10000
variances = np.array([])
for i in range(10,11):
    tic = time.perf_counter()
    mcw = np.round(opt.monte_carlo_cardinality(cost_func="volatility",constraints=["longonly","sum1"],benchmark = bench, simulations=sims,M=i),3)
    toc = time.perf_counter()
    with open('results.txt', 'a') as f:
        f.write("Completed " + str(sims) + " simulations for M = " + str(i) + " yielding a variance of " + str(opt.cost(mcw,"volatility")) + ". Time: " + str(toc-tic) + " seconds. \n weights: " + str(mcw) + "\n")
    variances = np.append(variances,opt.cost(mcw,"volatility"))
    #display(mcw)
with open('results.txt', 'a') as f:
    f.write("variances: " + str(variances))
#display(opt.cost(mcw,"volatility"))

In [None]:
np.linalg.norm(opt.prices.dot(mcw)-bench)

## L1

In [None]:
opt = Optimiser()
optbench = Optimiser()
T = 1000
opt.load_feather("eurostoxx_prices","eurostoxx_universe",T=T,N=48,normalize=True)
optbench.load_feather("SXXP_Benchmark_Prices","SXXP_Benchmark_Prices",T=T,normalize=True)

In [None]:
params = np.logspace(-2,2,20)
nums = []
for i in params:
    minw = np.round(opt.cvx_opt(cost_func="volatility",constraints=["minreturn","L1","longonly","sum1"],minreturn = 1,parameter=i,maxsum=1),3)
    nums.append(len(minw[minw>0]))
nums
display(opt.view_portfolio(minw))
px.line(x=params,y=nums,log_x=True)

In [None]:
bench = optbench.prices[:,0]

ws, nums, a = opt.l1_cardinality_benchmark(bench, M = 11, constraints=["longonly"])
#display(a)
condw = np.round(opt.cvx_opt(cost_func="benchmark",constraints=["sparsity_profile","longonly"],sparsity=ws[-1][:],benchmark = bench),5)
px.line(nums)
opt.cost(condw,"benchmark")

In [None]:
np.linalg.norm(opt.prices.dot(ws[-1])-bench)

In [None]:
# naive
np.round(wgreed,5)
ws[4]

In [None]:
bench = optbench.prices[:,0]
uncondw = np.round(opt.cvx_opt(cost_func="benchmark",constraints=["longonly"],benchmark = bench),5)

M = np.arange(5,20)

errs = []
errsnaive = []
errsgreedy = []
anums = []
for i in M:
    ws, nums, a = opt.l1_cardinality_benchmark(bench, M = i, constraints=["longonly"])
    condw = np.round(opt.cvx_opt(cost_func="benchmark",constraints=["sparsity_profile","longonly"],sparsity=ws[-1][:],benchmark = bench),5)
    naivew = np.round(opt.cvx_opt(cost_func="benchmark",constraints=["cardinality","longonly"],benchmark = bench,cardinality=i),5)
    wgreed = opt.greedy_cardinality(cost_func="benchmark",constraints=["longonly"],cardinality=i,benchmark=bench)

    errs.append(np.linalg.norm(opt.prices.dot(condw)-bench))
    errsnaive.append(np.linalg.norm(opt.prices.dot(naivew)-bench))
    errsgreedy.append(np.linalg.norm(opt.prices.dot(wgreed)-bench))

    anums.append(len(condw[condw>0]))


In [None]:
fig = px.line(x=M,y=[errs,errsnaive,errsgreedy])
fig.add_scatter(x=M,y=anums)
fig.add_scatter(x=M,y=M)
fig.add_scatter(x=M,y=np.ones(len(M))*np.linalg.norm(opt.prices.dot(uncondw)-bench))

In [None]:
naive_allocation = np.ones(len(opt.assets))/len(opt.assets)
random_allocation = np.random.uniform(0,1/len(opt.assets),len(opt.assets))
bench_naive = opt.prices.dot(naive_allocation)
bench_random = opt.prices.dot(random_allocation)
bench_flat = np.ones(len(opt.prices))
bench_line = np.arange(len(opt.prices))
bench_wave = np.sin(np.arange(len(opt.prices))/300)*100

bench = optbench.prices[:,0]

params = np.logspace(-2,3,100)
nums = []
errs1 = []
errs2 = []

for i in params:
    
    minw1 = np.round(opt.cvx_opt(cost_func="benchmark",constraints=["L1","longonly"],parameter=i,benchmark = bench),3)
    w = opt.cvx_opt(cost_func="benchmark",constraints=["longonly"],benchmark=bench,sparsity=minw1)
    nums.append(len(minw1[minw1>0]))
    errs1.append(np.linalg.norm(opt.prices.dot(w)-bench))


#fig.add_scatter(x=params,y=errs2)

In [None]:
wmin = opt.cvx_opt(cost_func="benchmark",constraints=["longonly"],benchmark=bench)
cmin = np.linalg.norm(opt.prices.dot(wmin)-bench)

In [None]:
fig = px.line(x=params,y=nums,log_x=True)
fig.add_scatter(x=params,y=(errs1-cmin)/cmin)
fig.update_layout(title="Number of assets in optimal portfolio",
        font_family="Times New Roman",
        font_color="black",
        font_size=25,
        xaxis_title="$\lambda$",
        yaxis_title="n"
        )

In [None]:
i=1
minw1 = np.round(opt.cvx_opt(cost_func="benchmark",constraints=["L1","longonly"],parameter=i,benchmark = bench),5)
minw2 = np.round(opt.cvx_opt(cost_func="benchmark",constraints=["sparsity_profile","longonly"],sparsity=minw1,benchmark = bench),5)
len(minw1)
fig = px.line(bench)
fig.add_scatter(y=opt.prices.dot(minw2))
fig.show()
display(opt.view_portfolio(minw2))


## cardinality

In [None]:
bench.dot(opt.prices)

In [None]:
naive_allocation = np.ones(len(opt.assets))/len(opt.assets)
random_allocation = np.random.uniform(0,1/len(opt.assets),len(opt.assets))
bench_naive = opt.prices.dot(naive_allocation)
bench_random = opt.prices.dot(random_allocation)
bench_flat = np.ones(len(opt.prices))
bench_line = np.arange(len(opt.prices))
bench_wave = np.sin(np.arange(len(opt.prices))/300)*100

bench = bench_random

out = opt.cardinality_benchmark(bench,5)
allocation = np.zeros(len(opt.assets))
indices = out[1]
allocations = out[2]
j = 0
for i in range(len(opt.assets)):
    if i in indices[-1]:
        allocation[i] = allocations[-1][j]
        j+=1
        
fig = px.line(bench)
fig.add_scatter(y=opt.prices.dot(allocation))

# increasing set algorithm

In [None]:
w = opt.cardinality_opt(5,0,0,cost_func="volatility",limit_attempts=False,attempts=5)

In [None]:
out = opt.cardinality_opt(6,0,0,cost_func="volatility",limit_attempts=False,attempts=5)
allocation = np.zeros(len(opt.assets))
indices = out[1]
allocations = out[2]
j = 0
for i in range(len(opt.assets)):
    if i in indices[-1]:
        allocation[i] = allocations[-1][0][j]
        j+=1
display(opt.view_portfolio(np.round(allocation,5)))

In [None]:
tic = time.perf_counter()

out = opt.cardinality_opt(6,0,0,cost_func="volatility",limit_attempts=True,attempts=10)
allocation = np.zeros(len(opt.assets))
indices = out[1]
allocations = out[2]
j = 0
for i in range(len(opt.assets)):
    if i in indices[-1]:
        allocation[i] = allocations[-1][0][j]
        j+=1
toc = time.perf_counter()
display(toc-tic)

display(opt.view_portfolio(np.round(allocation,5)))

In [None]:
M10 = [-0.     0.113 -0.    -0.     0.     0.196  0.     0.     0.     0.219
 -0.     0.    -0.     0.     0.    -0.    -0.     0.076  0.158 -0.
  0.084  0.063  0.     0.     0.018  0.    -0.    -0.     0.     0.
  0.     0.     0.     0.     0.     0.     0.    -0.     0.117  0.
  0.     0.     0.     0.    -0.    -0.     0.    -0.   ]

In [None]:
opt.cost(allocation,"volatility")

In [None]:
naive_reduced = naive_reduce(minvarw,4)
display(opt.vol(allocation),opt.vol(minvarw),opt.vol(naive_reduced))

In [None]:
def naive_reduce(allocation,n):
    w = allocation.copy()
    reduced_w = np.zeros(len(w))
    for i in range(n):
        reduced_w[np.argmax(w)] = np.max(w)
        w[np.argmax(w)] = 0
    return reduced_w/sum(reduced_w)


## adjust

In [None]:
naive_allocation = np.ones(len(opt.assets))/len(opt.assets)
iberdrola = np.zeros(len(opt.assets))
iberdrola[7] = 1
iberdrola[12] = 0
minvarw = opt.cvx_opt(cost_func="volatility",constraints=["longonly","sum1"])
#adjustw = opt.cvx_opt(cost_func="volatility",constraints=["longonly","sum1","adjust"],adjust=[iberdrola,.021])
display(opt.view_portfolio(np.round(minvarw,5)))
#display(opt.view_portfolio(np.round(adjustw,5)))


In [None]:
N = 50
sample = 50
weights = np.zeros([len(opt.assets),N])
robustweights = np.zeros([len(opt.assets),N])

italy = ["LOCATION","Italy","=",1]
last = len(opt.prices)-sample
dt = last//N
t = np.arange(N)*dt+sample
for i in range(len(t)):
    pass
    weights[:,i] = opt.time_subset_portfolio(t[i]-sample,t[i],cost_func="emp_volatility",constraints=["sum1","longonly"])
    robustweights[:,i] = opt.time_subset_portfolio(t[i]-sample,t[i],cost_func="robust_volatility",constraints=["sum1","longonly"])

In [None]:
weights = np.genfromtxt("weights.txt")
robustweights = np.genfromtxt("robustweights.txt")

In [None]:
np.savetxt("weights.txt",weights)
np.savetxt("robustweights.txt",robustweights)

In [None]:
for i in range(len(opt.assets)):
    fig = px.line(x=t,y=np.round(weights[i,:],5),labels={"x":"start time (days)","y":"portfolio weight"})
    fig.add_scatter(x=t,y=np.round(robustweights[i,:],5),name="robust")
    #fig.add_scatter(x=t,y=weights[12,:]+weights[28,:])

    fig.update_layout(title="Asset " + str(i) + ": " + str(opt.universe.iloc[i]["NAME"]),
        font_family="Times New Roman",
        font_color="black",
        font_size=25,
        showlegend=True)
    fig.show()
    #opt.view_portfolio(np.round(weights[:,-1],5))
    #fig.write_image("asset" + str(i) +".png")

### backtest performance

In [None]:
# time_i,weight_i: times (t[0] = 0) such that weight is w[i] between t[i] and t[i+1]
M = len(t)
time_i = np.array([0])
time_i = np.append(time_i,t)
weight_i = np.zeros([M+1,len(opt.assets)])
robustweight_i = np.zeros([M+1,len(opt.assets)])
weight_i[0,:] = np.ones(len(opt.assets))/len(opt.assets)
robustweight_i[0,:] = np.ones(len(opt.assets))/len(opt.assets)
for i in range(M):
    weight_i[i+1] = weights[:,i]
    robustweight_i[i+1] = robustweights[:,i]
# allocations = TxN allocation matrix
allocations = np.zeros([len(opt.prices),len(opt.assets)])
robust_allocations = np.zeros([len(opt.prices),len(opt.assets)])
j = 0
for i in range(len(allocations)):
    allocations[i,:] = weight_i[j,:]
    robust_allocations[i,:] = robustweight_i[j,:]
    if i in t:
        j+=1

naive_allocations = np.ones([len(opt.prices),len(opt.assets)])/len(opt.assets)
minvar = np.array(opt.cvx_opt(cost_func="volatility",constraints=["longonly","sum1"])['x'])[:,0]
minvar_allocations = np.array([minvar for i in range(len(opt.prices))])

fig = px.line(opt.running_portfolio_returns(naive_allocations))
fig.add_scatter(y=opt.running_portfolio_returns(allocations),name="running minvar")
fig.add_scatter(y=opt.running_portfolio_returns(robust_allocations),name="running robust minvar")
fig.add_scatter(y=opt.running_portfolio_returns(minvar_allocations),name="minvar")

fig.show()

In [None]:
display(np.var(opt.running_portfolio_returns(naive_allocations)[500:]))
display(np.var(opt.running_portfolio_returns(allocations)[500:]))
display(np.var(opt.running_portfolio_returns(minvar_allocations)[500:]))
display(np.var(opt.running_portfolio_returns(robust_allocations)[500:]))

In [None]:
allocations[t[1]]

In [None]:
fig = px.line(x=t,y=weights[7,:])
fig.add_scatter(x=t,y=robustweights[7,:])

In [None]:
germany = ["LOCATION","Germany","=",.3]
france = ["LOCATION","France","=",.3]
italy = ["LOCATION","Italy","=",.3]
bmw = ["TICKER","BMW",">=",0.1]
it = ["SECTOR","Information Technology",">=",0.2]

optw = opt.cvx_opt(cost_func="sharpe",constraints=["select","sum1","longonly"],selection=[germany,it,bmw])
#display(np.round(optw,8))
#display(opt.universe["LOCATION"])
#px.line(opt.prices.dot(optw))
display(opt.portfolio_sum(optw,["LOCATION","Germany"]))
df = opt.view_portfolio(np.round(optw,5))
df[df.ALLOCATION !=0]

### minvar, longonly sum to one

In [None]:
naive_allocation = np.ones(len(opt.assets))/len(opt.assets)
w = opt.cvx_opt(cost_func="volatility",constraints=["longonly","sum1"])
optw = np.array(w['x'])[:,0]
display(optw)
px.line(opt.prices.dot(optw))

In [None]:
naive_allocation = np.ones(len(opt.assets))/len(opt.assets)
w = opt.gradient_descent_opt(naive_allocation,cost_func="volatility",constraints=["longonly2","sum1"],N=200,stepmethod=["armijo",1/2,1/2])
wvar = []
wdiff = []
for i in w:
    wvar.append(Optimiser.naive_vol(opt.prices,i))
    wdiff.append(np.linalg.norm(i-optw))
fig = px.line(wdiff,log_y=True)
fig.show()
px.line(opt.prices.dot(w[-1]))

### maxsharpe

In [None]:
naive_allocation = np.ones(len(opt.assets))/len(opt.assets)
w = opt.gradient_descent_opt(naive_allocation*1.1,cost_func="sharpe",constraints=["longonly2"],N=100,stepmethod=["armijo",1/2,1/2])
wsharpe = []
diff = []
max_sharpe = opt.maximum_sharpe()
for i in w:
    wsharpe.append(opt.sharpe(i))
    diff.append(np.linalg.norm(max_sharpe[0]-i))
fig = px.line(wsharpe,log_y=False)
fig.show()
display(w[-1],opt.returns(w[-1]),opt.vol(w[-1]))

## benchmark

In [None]:
naive_allocation = np.ones(len(opt.assets))/len(opt.assets)
random_allocation = np.random.uniform(0,1/len(opt.assets),len(opt.assets))
bennch_naive = opt.prices.dot(naive_allocation)
bench_random = opt.prices.dot(random_allocation)
bench_flat = np.ones(len(opt.prices))*40
bench_line = np.arange(len(opt.prices))
bench_wave = np.sin(np.arange(len(opt.prices))/300)*100

bench = bench_wave

w = opt.cvx_opt(cost_func="benchmark",constraints=["longonly"],benchmark=bench)
wopt = w
display(wopt)

fig = px.line(bench)
fig.add_scatter(y=opt.prices.dot(wopt))

fig.show()

In [None]:
naive_allocation = np.ones(len(opt.assets))/len(opt.assets)
random_allocation = np.random.uniform(0,1/len(opt.assets),len(opt.assets))
bench_random = opt.prices.dot(random_allocation)
bench_flat = np.ones(len(opt.prices))*40
bench_line = np.arange(len(opt.prices))/10
bench_wave = np.sin(np.arange(len(opt.prices))/300)*100

bench = bench_wave

w = opt.gradient_descent_opt(naive_allocation,cost_func="benchmark",constraints=[],N=200,stepmethod=["armijo",1/2,1/2],benchmark = bench)

df = pd.DataFrame(opt.prices.dot(np.transpose(w)))

fig = px.line(bench)
fig.add_scatter(y=opt.prices.dot(w[-1]))
fig.add_scatter(y=opt.prices.dot(w[0]))

fig.show()

In [None]:
df = pd.DataFrame(opt.prices.dot(np.transpose(w)))
df

In [None]:
np.random.uniform(0,1,len(opt.assets))

In [None]:
len(opt.prices)

In [None]:
opt.prices.dot(naive_allocation)

# Simulations

In [None]:
minvar = opt.minimum_variance()
maxsharpe = opt.maximum_sharpe()
naive_allocation = np.ones(len(opt.assets))/len(opt.assets)
naive = (naive_allocation,Optimiser.mean_returns(opt.prices,naive_allocation),Optimiser.naive_vol(opt.prices,naive_allocation))

In [None]:
display(opt.returns(maxsharpe[0]/max(maxsharpe[0])),opt.vol(maxsharpe[0]/max(maxsharpe[0])))

In [None]:
fig = px.scatter(opt.simulate_portfolios(2000,norm=1),x='volatility',y='returns')
fig.add_scatter(x=[minvar[2]],y=[minvar[1]])
fig.add_scatter(x=[maxsharpe[2]],y=[maxsharpe[1]])
fig.add_scatter(x=[naive[2]],y=[naive[1]])
#fig.add_shape(type='line',x0=0,y0=0,x1=maxsharpe[2],y1=maxsharpe[1])

#fig.update_layout(xaxis_range=[-100,1200])

fig.show()

In [None]:
data = pd.read_feather("prices")

In [None]:
prices = np.zeros([3445,50])
assets = data.columns[1:]
for i in range(len(assets)):
    prices[:,i] = data[assets[i]]

In [None]:
means = np.log(prices[-1,:]/prices[0,:])/len(prices[:,0])*365

In [None]:
allocation = np.ones(len(opt.assets))
#allocation[0] = 0

In [None]:
values = opt.prices.dot(maxsharpe[0])
display(values)
np.log(values[1:]/values[:-1])

In [None]:
sum(minvar[0])

In [None]:
px.imshow(np.corrcoef(np.transpose(opt.prices)))

In [None]:
np.transpose(allocation).dot(c).dot(allocation)

In [None]:
allocation.dot(c.dot(np.transpose(allocation)))

In [None]:
N = 4
r = np.random.uniform(size=[N,len(assets)])

In [None]:
allocation.dot(prices[-1,:])

In [None]:
class Optimiser:
    def __init__(self):
        pass
    
    def cost(x,C):
        return x.dot(C).dot(np.transpose(x))
    
    def lowerbound(C,K):
            alfa = np.min(C)
            d = sorted(np.diag(C))
            if alfa in d:
                return alfa
            K = min(K,len(d))
            return alfa + 1/np.sum(1/(d[:K]-np.ones(K)*alfa))
        
    def cardinal_optimise(C,M):
        # optimise x'Cx under the constraint supp(x) < M
        n = len(C)
        N = np.arange(n)
        # index sets
        S_next = [[i] for i in N]
        mins = [np.min(np.diag(C))]
        minargs = [tuple([np.argmin(np.diag(C))])]
        minvecs = [[1]]

        # iterate
        for i in range(2,M):
            update = False
            S = S_next.copy()
            S_next = []
            curmin = mins[-1]
            curminargs = minargs[-1]
            curminvec = minvecs[-1]
            done = set()
            for I in S:
                J = np.setdiff1d(N,I)
                # find pos def candidates
                cand = J
                for j in J:
                    k = 0
                    while k < len(I):
                        if is_pd(submatrix(C,[j,I[k]])):
                            k+=1
                        else:
                            k = len(I)
                            cand = cand[cand!=j]
                lower = Optimiser.lowerbound(submatrix(C,np.union1d(cand,I)),M)
                if lower < curmin:
                    for j in cand:
                        # find next point
                        I_next = tuple(np.sort(np.append(I,j)))
                        if I_next not in done:
                            done.add(I_next)
                            C_I = submatrix(C,I_next)
                            e = np.ones(len(I_next))
                            xmin = (e.dot(np.linalg.inv(C_I)).dot(np.transpose(e)))**(-1)*(np.linalg.inv(C_I)).dot(np.transpose(e))
                            if np.all((xmin>0)):
                                xcost = Optimiser.cost(xmin,C_I)
                                #display([I,xcost,curmin])
                                curmin = min(curmin,xcost)
                                if curmin == xcost:
                                    curminargs = I_next
                                    curminvec = xmin
                                    update = True
                                S_next.append(I_next)
                else:
                    #display("lower bound used")
                    pass

            if update:
                mins.append(curmin)
                minargs.append(curminargs)
                minvecs.append(curminvec)
            display([curmin,curminargs,curminvec])

        return (mins, minargs, minvecs)

In [None]:
opt = Optimiser()

In [None]:
n=25
C = np.random.normal(0,1,n**2).reshape(n,n)
C = C + np.transpose(C) + 5*np.eye(n)
Optimiser.cardinality_optimise(C,10)

In [None]:
n = 25
N = np.arange(n)
M = 10

#random covariance matrix
C = np.random.normal(0,1,n**2).reshape(n,n)
C = C + np.transpose(C) + np.eye(n)*3
#C = np.transpose(C).dot(C)-np.eye(n)*15
display(is_pd(C))
# index sets
S_next = [[i] for i in N]
mins = [np.min(np.diag(C))]
minargs = [tuple([np.argmin(np.diag(C))])]
minvecs = [[1]]

def cost(x,C):
    return x.dot(C).dot(np.transpose(x))

def lowerbound(C,K):
    alfa = np.min(C)
    d = sorted(np.diag(C))
    if alfa in d:
        return alfa
    K = min(K,len(d))
    return alfa + 1/np.sum(1/(d[:K]-np.ones(K)*alfa))
    
# iterate
for i in range(2,M):
    update = False
    S = S_next.copy()
    S_next = []
    curmin = mins[-1]
    curminargs = minargs[-1]
    curminvec = minvecs[-1]
    done = set()
    for I in S:
        J = np.setdiff1d(N,I)
        # find pos def candidates
        cand = J
        for j in J:
            k = 0
            while k < len(I):
                if is_pd(submatrix(C,[j,I[k]])):
                    k+=1
                else:
                    k = len(I)
                    cand = cand[cand!=j]
        lower = lowerbound(submatrix(C,np.union1d(cand,I)),M)
        if lower < curmin:
            for j in cand:
                # find next point
                I_next = tuple(np.sort(np.append(I,j)))
                if I_next not in done:
                    done.add(I_next)
                    C_I = submatrix(C,I_next)
                    e = np.ones(len(I_next))
                    xmin = (e.dot(np.linalg.inv(C_I)).dot(np.transpose(e)))**(-1)*(np.linalg.inv(C_I)).dot(np.transpose(e))
                    if np.all((xmin>0)):
                        xcost = cost(xmin,C_I)
                        #display([I,xcost,curmin])
                        curmin = min(curmin,xcost)
                        if curmin == xcost:
                            curminargs = I_next
                            curminvec = xmin
                            update = True
                        S_next.append(I_next)
        else:
            #display("lower bound used")
            pass
       
    if update:
        mins.append(curmin)
        minargs.append(curminargs)
        minvecs.append(curminvec)
    display([curmin,curminargs,curminvec])
        
display("Summary:", mins,minargs,minvecs)

In [None]:
np.union1d((10,5),(5))

## Marcenko-Pastur

In [None]:
!pip install sklearn

In [None]:
from scipy.optimize import minimize
from sklearn.neighbors import KernelDensity

In [None]:
def getPCA(matrix):
# Get eVal,eVec from a Hermitian matrix
    eVal,eVec=np.linalg.eigh(matrix)
    indices=eVal.argsort()[::-1] # arguments for sorting eVal desc
    eVal,eVec=eVal[indices],eVec[:,indices]
    eVal=np.diagflat(eVal)
    return eVal,eVec

def fitKDE(obs,bWidth=.25,kernel='gaussian',x=None):
    # Fit kernel to a series of obs, and derive the prob of obs
    # x is the array of values on which the fit KDE will be evaluated
    if len(obs.shape)==1:obs=obs.reshape(-1,1)
    kde=KernelDensity(kernel=kernel,bandwidth=bWidth).fit(obs)
    if x is None:x=np.unique(obs).reshape(-1,1)
    if len(x.shape)==1:x=x.reshape(-1,1)
    logProb=kde.score_samples(x) # log(density)
    pdf=pd.Series(np.exp(logProb),index=x.flatten())
    return pdf

def errPDFs(var,eVal,q,bWidth,pts=1000):
    # Fit error
    pdf0=mpPDF(var,q,pts) # theoretical pdf
    pdf1=fitKDE(eVal,bWidth,x=pdf0.index.values) # empirical pdf
    sse=np.sum((pdf1-pdf0)**2)
    return sse

def findMaxEval(eVal,q,bWidth):
    # Find max random eVal by fitting Marcenko’s dist
    out=minimize(lambda *x:errPDFs(*x),.5,args=(eVal,q,bWidth),
    bounds=((1E-5,1-1E-5),))
    if out['success']:var=out["x"][0]
    else:var=1
    eMax=var*(1+(1./q)**.5)**2
    return eMax,var

def denoisedCorr(eVal,eVec,nFacts):
    # Remove noise from corr by fixing random eigenvalues
    eVal_=np.diag(eVal).copy()
    eVal_[nFacts:]=eVal_[nFacts:].sum()/float(eVal_.shape[0]-nFacts)
    eVal_=np.diag(eVal_)
    corr1=np.dot(eVec,eVal_).dot(eVec.T)
    corr1=cov2corr(corr1)
    return corr1
    #- - - - - - - -- - - - - - - - - - - - -- - - - - - - - - - - - - -- - - - - - - - - - - - -- - -
    corr1=denoisedCorr(eVal0,eVec0,nFacts0)
    eVal1,eVec1=getPCA(corr1)
    
def atf(x):
    if isinstance(x,np.ndarray):
        x = x.item()
    return x

def mpPDF(var,q,pts=1000):
    # Marcenko-Pastur pdf
    # q=T/N
    eMin=atf(var*(1-(1/q)**.5)**2)
    eMax=atf(var*(1+(1/q)**.5)**2)

    eVal=np.linspace(eMin,eMax,pts)
    #display(eVal)
    pdf=q/(2*np.pi*var*eVal)*((eMax-eVal)*(eVal-eMin))**.5
    pdf=pd.Series(pdf,index=eVal)
    return pdf

def cov2corr(cov):
    # Derive the correlation matrix from a covariance matrix
    std=np.sqrt(np.diag(cov))
    corr=cov/np.outer(std,std)
    corr[corr<-1],corr[corr>1]=-1,1 # numerical error
    return corr

In [None]:
T = 10000
N = 1000
x = np.random.normal(size=(T,N))
evals = eigenvalues(np.corrcoef(x,rowvar=0))

In [None]:
def getRndCov(nCols,nFacts):
    w=np.random.normal(size=(nCols,nFacts))
    cov=np.dot(w,w.T) # random cov matrix, however not full rank
    cov+=np.diag(np.random.uniform(size=nCols)) # full rank cov
    return cov
#- - - - - - - -- - - - - - - - - - - - -- - - - - - - - - - - - - -- - - - - - - - - - - - -- - -
def cov2corr(cov):
    # Derive the correlation matrix from a covariance matrix
    std=np.sqrt(np.diag(cov))
    corr=cov/np.outer(std,std)
    corr[corr<-1],corr[corr>1]=-1,1 # numerical error
    return corr
#- - - - - - - -- - - - - - - - - - - - -- - - - - - - - - - - - - -- - - - - - - - - - - - -- - -
alpha,N,nFact,T=.995,1000,100,20000
cov=np.cov(np.random.normal(size=(T,N)),rowvar=0)
cov=alpha*cov+(1-alpha)*getRndCov(N,nFact) # noise+signal
corr0=cov2corr(cov)
eVal0,eVec0=getPCA(corr0)

In [None]:
nFacts0

In [None]:
q = T/N
eMax0,var0=findMaxEval(np.diag(eVal0),q,bWidth=.01)
nFacts0=eVal0.shape[0]-np.diag(eVal0)[::-1].searchsorted(eMax0)

In [None]:
density = mpPDF(var0,q)
fig = px.histogram(np.diag(eVal0), histnorm='probability density',nbins=1000)
fig.add_scatter(x=density.index,y=density)
fig.update_layout(title="Eigenvalue distribution (noise+signal)",
        font_family="Times New Roman",
        font_color="black",
        font_size=25,
        showlegend=False
        ,xaxis_range=[0,8]
        ,xaxis_title="$\lambda$"
        ,autosize=False
        ,width=700
        ,height=500
        )
fig.show()
#fig.write_image("mpnoise.png")

In [None]:
T = 10000
N = 1000
x = np.random.normal(size=(T,N))
evals = eigenvalues(np.corrcoef(x,rowvar=0))

In [None]:
l = np.linspace(0,8,1000)
density = mpPDF(1,T/N)
fig = px.histogram(np.diag(eVal0), histnorm='probability density',nbins=10000)
fig.add_scatter(x=density.index,y=density)
fig.update_layout(title="Eigenvalue distribution (noise)",
        font_family="Times New Roman",
        font_color="black",
        font_size=25,
        showlegend=False
        ,xaxis_range=[0,2]
        ,xaxis_title="$\lambda$"
        ,autosize=False
        ,width=700
        ,height=500
        )
fig.show()
#fig.write_image("mppure.png")

In [None]:
opt = Optimiser()
optbench = Optimiser()
T = 200
N = 48
opt.load_feather("eurostoxx_prices","eurostoxx_universe",T=T,N=10,normalize=True)
optbench.load_feather("SXXP_Benchmark_Prices","SXXP_Benchmarks",T=T,normalize=True)

In [None]:
corr0 = np.corrcoef(np.transpose(opt.prices))

In [None]:
eVal0,eVec0=getPCA(corr0)

In [None]:
q = T/N
eMax0,var0=findMaxEval(np.diag(eVal0),q,bWidth=.01)
nFacts0=eVal0.shape[0]-np.diag(eVal0)[::-1].searchsorted(eMax0)

In [None]:
px.imshow(corr0)

In [None]:
corr1 = denoisedCorr(eVal0,eVec0,nFacts0)

In [None]:
px.imshow(corr1)

In [None]:
np.linalg.norm(corr0-corr1)

In [None]:
def getPCA(matrix):
# Get eVal,eVec from a Hermitian matrix
    eVal,eVec=np.linalg.eigh(matrix)
    indices=eVal.argsort()[::-1] # arguments for sorting eVal desc
    eVal,eVec=eVal[indices],eVec[:,indices]
    eVal=np.diagflat(eVal)
    return eVal,eVec

def fitKDE(obs,bWidth=.25,kernel='gaussian',x=None):
    # Fit kernel to a series of obs, and derive the prob of obs
    # x is the array of values on which the fit KDE will be evaluated
    if len(obs.shape)==1:obs=obs.reshape(-1,1)
    kde=KernelDensity(kernel=kernel,bandwidth=bWidth).fit(obs)
    if x is None:x=np.unique(obs).reshape(-1,1)
    if len(x.shape)==1:x=x.reshape(-1,1)
    logProb=kde.score_samples(x) # log(density)
    pdf=pd.Series(np.exp(logProb),index=x.flatten())
    return pdf

def errPDFs(var,eVal,q,bWidth,pts=1000):
    # Fit error
    pdf0=mpPDF(var,q,pts) # theoretical pdf
    pdf1=fitKDE(eVal,bWidth,x=pdf0.index.values) # empirical pdf
    sse=np.sum((pdf1-pdf0)**2)
    return sse

def findMaxEval(eVal,q,bWidth):
    # Find max random eVal by fitting Marcenko’s dist
    out=minimize(lambda *x:errPDFs(*x),.5,args=(eVal,q,bWidth),
    bounds=((1E-5,1-1E-5),))
    if out['success']:var=out["x"][0]
    else:var=1
    eMax=var*(1+(1./q)**.5)**2
    return eMax,var

def denoisedCorr(eVal,eVec,nFacts):
    # Remove noise from corr by fixing random eigenvalues
    eVal_=np.diag(eVal).copy()
    eVal_[nFacts:]=eVal_[nFacts:].sum()/float(eVal_.shape[0]-nFacts)
    eVal_=np.diag(eVal_)
    corr1=np.dot(eVec,eVal_).dot(eVec.T)
    corr1=cov2corr(corr1)
    return corr1
    #- - - - - - - -- - - - - - - - - - - - -- - - - - - - - - - - - - -- - - - - - - - - - - - -- - -
    corr1=denoisedCorr(eVal0,eVec0,nFacts0)
    eVal1,eVec1=getPCA(corr1)
    
def atf(x):
    if isinstance(x,np.ndarray):
        x = x.item()
    return x

def mpPDF(var,q,pts=1000):
    # Marcenko-Pastur pdf
    # q=T/N
    eMin=atf(var*(1-(1/q)**.5)**2)
    eMax=atf(var*(1+(1/q)**.5)**2)

    eVal=np.linspace(eMin,eMax,pts)
    #display(eVal)
    pdf=q/(2*np.pi*var*eVal)*((eMax-eVal)*(eVal-eMin))**.5
    pdf=pd.Series(pdf,index=eVal)
    return pdf

def cov2corr(cov):
    # Derive the correlation matrix from a covariance matrix
    std=np.sqrt(np.diag(cov))
    corr=cov/np.outer(std,std)
    corr[corr<-1],corr[corr>1]=-1,1 # numerical error
    return corr

In [None]:
def denoised_cov(prices):
    T = len(prices)
    N = len(prices[0,:])
    corr0 = np.corrcoef(np.transpose(prices))
    eVal0,eVec0=getPCA(corr0)
    sigma = np.sqrt(np.diag(np.cov(np.transpose(prices))))
    q = T/N
    eMax0,var0=findMaxEval(np.diag(eVal0),q,bWidth=.01)
    nFacts0=eVal0.shape[0]-np.diag(eVal0)[::-1].searchsorted(eMax0)
    corr1 = denoisedCorr(eVal0,eVec0,nFacts0)
    return corr1*(np.outer(sigma,sigma))

In [None]:
denoised_cov(opt.prices)

In [None]:
sigma = np.sqrt(np.diag(np.cov(np.transpose(opt.prices))))

In [None]:
px.imshow(np.outer(sigma,sigma)*np.corrcoef(np.transpose(opt.prices))-np.cov(np.transpose(opt.prices)))