In [27]:
import pandas as pd
import cplex
from cplex.exceptions import CplexError
from collections import Iterable
import sys

In [2]:
# Reading the data
#CAC40 = pd.read_excel('./data_if/CAC40.xlsx', index_col = 'Date')
#dowjones = pd.read_excel('./data_if/dowjones.xlsx', index_col = 'Date')
CAC40 = pd.read_excel('./data_if/CAC40.xlsx').drop(columns = ['Date'])
dowjones = pd.read_excel('./data_if/dowjones.xlsx').drop(columns = ['Date'])

In [3]:
# Compute returns
def compute_returns(df):
    returns = pd.DataFrame(data = [])
    l = len(df)
    for asset in df:
        for t in range(1, l):
            returns.at[t - 1, asset] = (df.at[t, asset] - df.at[t - 1, asset]) / df.at[t - 1, asset]
    return returns

In [6]:
def flatten(iterable):
    for el in iterable:
        if isinstance(el, Iterable) and not isinstance(el, str): 
            yield from flatten(el)
        else:
            yield el

In [7]:
def setProblemData(p, index_cor, q):
    
    dim = index_cor.shape[0]
    p.objective.set_sense(p.objective.sense.maximize)
    # define colnames
    X = [["x_{}_{}".format(i, j) for j in range(1, dim + 1)] for i in range(1, dim + 1)]
    Y = ["y_{}".format(i) for i in range(1, dim + 1)]
    
    cor_values = index_cor.values.flatten().tolist()
    # add x_i_j
    my_ub = [1] * dim * dim
    my_lb = [0] * dim * dim
    p.variables.add(obj = cor_values,  names = list(flatten(X)),
                    ub = my_ub, lb = my_lb)
    # add y_i_j
    my_ub = [1] * dim
    my_lb = [0] * dim
    p.variables.add(obj = [0] * dim,  names = Y, ub = my_ub, lb = my_lb)
    # add q constraint
    senses = "E"
    rhs = [q]
    rows = [[Y, [1] * dim]]
    for i in range(dim):
        rows.append([X[i], [1] * dim])
        senses += "E"
        rhs.append(1)
    
    for i in range(dim):
        for j in range(dim):
            rows.append([[X[i][j], Y[j]], [1, -1]])
            senses += "L"
            rhs.append(0)
    #print(rows)
    
    p.linear_constraints.add(lin_expr = rows, senses = senses, rhs = rhs)

In [25]:
def select_assets(index, q):
    try:
        index = compute_returns(index)
        index_cov = index.cov()
        index_cor = index.corr()
        p = cplex.Cplex()
        setProblemData(p, index_cor, q)
        p.solve()
        
        numrows = p.linear_constraints.get_num()
        numcols = p.variables.get_num()
    
        print("solution status : {} : {}".format(p.solution.get_status(), 
                                             p.solution.status[p.solution.get_status()]))
        print("solution value : {}".format(p.solution.get_objective_value()))
        slack = p.solution.get_linear_slacks()
        pi    = p.solution.get_dual_values()
        x     = p.solution.get_values()
        dj = p.solution.get_reduced_costs()
        #print("type x --> :", type(x))
        x = x[-index_cor.shape[0]:]
    
        assets = index.columns
        chosen_assets = []
    
        for i in range(len(x)):
            if x[i] == 1.0:
                #print("asset : ", assets[i])
                chosen_assets.append(assets[i])
        """for i in range(numrows):
            print ("Row %d:  Slack = %10f  Pi = %10f" % (i, slack[i], pi[i]))"""
        """for j in range(numcols):
            print ("Column %d:  Value = %10f Reduced cost = %10f" % (j, x[j], dj[j]))"""
    
        return chosen_assets
    except CplexError as exc:
        print (exc)

In [29]:
select_assets(CAC40, 5)

  import sys
  import sys


CPXPARAM_Read_DataCheck                          1
Tried aggregator 1 time.
No LP presolve or aggregator reductions.
Presolve time = 0.01 sec. (0.43 ticks)
Initializing dual steep norms . . .

Iteration log . . .
Iteration:     1   Dual objective     =            30.000000
Iteration:    82   Dual objective     =             6.287661
Iteration:   149   Dual objective     =             6.230934
solution status : 1 : optimal
solution value : 6.229597061716615


['CA', 'CS', 'EN', 'MC', 'SU']