In [436]:
import pandas as pd


mcca2 = pd.read_csv("../tests/data/multicca2.csv", sep=",")
mcca1 = pd.read_csv("../tests/data/multicca1.csv", sep=",")


In [437]:
import numpy as np
import pyomo.environ as pyo
from scipy.linalg import svd
from collections import defaultdict

In [438]:
# get only values from datsets
datasets = [mcca1.iloc[:,1:7].values, mcca2.iloc[:,1:6].values]

In [439]:
# params
standardize = True
mimic_R = True
# penalties usually same length as datasets
penalties = 1
features = len(datasets[0][0])

In [440]:
def ObjRule(model):
    """Objective Function (4.3 in witten 2009)"""
    features = len(model.PC.data())
    #TODO: 25 not hard coded -> shape / features: len(xi)/features?
    return sum(np.asarray([model.w_i_k[xi, f].value for f in model.PC.data()]).T @ np.asarray(xi).reshape(25,features).T @ np.asarray(xj).reshape(25,features) @ np.asarray([model.w_i_k[xj, f].value for f in model.PC.data()]) for idx, xi in enumerate(model.X) for jdx, xj in enumerate(model.X) if idx<jdx )

In [382]:
'''def lasso(model, xi):
    # sum over all entries of vector wi (i in [1:K])
    return sum(abs(model.w[xi][f]) for f in range(len(model.w[xi])) ) <= model.c[xi]'''

'def lasso(model, xi):\n    # sum over all entries of vector wi (i in [1:K])\n    return sum(abs(model.w[xi][f]) for f in range(len(model.w[xi])) ) <= model.c[xi]'

In [383]:
'''def norm2(model, xi):
    # sum over all entries of vector wi (i in [1:K])
    return sum(model.w[xi][f] * model.w[xi][f] for f in range(len(model.w[xi])) <= 1)'''

'def norm2(model, xi):\n    # sum over all entries of vector wi (i in [1:K])\n    return sum(model.w[xi][f] * model.w[xi][f] for f in range(len(model.w[xi])) <= 1)'

In [441]:
# init weigths to zero
def init_w(xi):
    # TODO use svd vh from scipy 
    return np.zeros(features)
    #return [0] * featurs

In [442]:
def svd_x():
    ws = []
    K=1
    for idx in range(len(datasets)):
        ws.append(svd(datasets[idx])[2][0:K].T.tolist())
    return ws

SVD = svd_x()

In [443]:
def init_weigth(model, pc):
    return SVD[0][pc][0]

In [444]:
def scale(mtx, center=True, scale=True):
    """
    Reimplement scale function from R
    """
    if not center:
        raise NotImplementedError('Scaling without centering not implemented')

    centered = mtx - np.mean(mtx, axis=0)
    if not scale:
        return centered

    # to replicate the R implementation of scale, we apply Bessel's
    # correction when calculating the standard deviation in numpy
    scaled = centered / centered.std(axis=0, ddof=1)
    return scaled


In [445]:
# preprocess data
datasets = datasets.copy()
# 2 features needed
for data in datasets:
    if len(data[0]) < 2:
        raise Exception('Need at least 2 features in each dataset')

    # standardize if set TRUE
if standardize:
    for idx in range(len(datasets)):
        if mimic_R:
            datasets[idx] = scale(datasets[idx], center=True, scale=True)
        else:
            datasets[idx] = scale(datasets[idx], center=True, scale=False)
        datasets[idx] = datasets[idx].tolist()


In [446]:
datasets_as_tuples = [tuple(map(tuple,data)) for data in datasets] #(hashable)


In [458]:
model = pyo.ConcreteModel()


In [459]:
# set: Xi i in [1:K]
# Xi = set(x for x in datasets if x)
# X here is only for one datset 

model.X = pyo.Set(initialize=datasets_as_tuples) 
model.PC = pyo.Set(initialize=range(len(datasets_as_tuples[0][0])))


In [460]:
# TODO: for all Xi
model.SVD = pyo.Var(model.PC, initialize=init_weigth)

In [461]:
model.SVD.pprint()

SVD : Size=5, Index=PC
    Key : Lower : Value                 : Upper : Fixed : Stale : Domain
      0 :  None :    0.9499637718656263 :  None : False : False :  Reals
      1 :  None :    0.3001339763989326 :  None : False : False :  Reals
      2 :  None :   0.06026018504528991 :  None : False : False :  Reals
      3 :  None : -0.003339869607191106 :  None : False : False :  Reals
      4 :  None :  -0.06201599570361449 :  None : False : False :  Reals


In [462]:
# params: ci i in [1:K]
model.c = pyo.Param(model.X, initialize=penalties)

In [463]:
# variables: wi i in [1:K]
#each wi needs to be a vector 1*len(features) -> features is amount of columns in Xi use list 
model.w_i_k = pyo.Var(model.X, model.PC, bounds=(0, 1), initialize=0)
#model.w_i_k = pyo.Var(model.X, model.PC, bounds=(0, 1), initialize=model.SVD)


In [464]:
# Objective
model.Obj = pyo.Objective(rule=ObjRule, sense=pyo.maximize)

In [465]:
# constraints: lasso 
model.constraint_lasso = pyo.ConstraintList()
for xi in model.X:
    model.constraint_lasso.add(sum(model.w_i_k[xi, f] for f in model.PC.data())<= model.c[xi])

In [455]:
 # constraints: (2-norm)^2 ||wi||22 <=1
'''model.constraint_norm2 = pyo.ConstraintList()
for xi in model.X:
    model.constraint_norm2.add(sum(model.w_i_k[xi, f] * model.w_i_k[xi,f] for f in model.PC.data()) <= 1)
'''

In [466]:
#nonLinearOpt =pyo.SolverFactory('ipopt')
nonLinearOpt =pyo.SolverFactory('glpk')

instance_non_linear = model.create_instance()
res = nonLinearOpt.solve(instance_non_linear)
model.solutions.load_from(res)

    solver failure.


In [467]:
instance_non_linear.display()

Model unknown

  Variables:
    SVD : Size=5, Index=PC
        Key : Lower : Value                 : Upper : Fixed : Stale : Domain
          0 :  None :    0.9499637718656263 :  None : False :  True :  Reals
          1 :  None :    0.3001339763989326 :  None : False :  True :  Reals
          2 :  None :   0.06026018504528991 :  None : False :  True :  Reals
          3 :  None : -0.003339869607191106 :  None : False :  True :  Reals
          4 :  None :  -0.06201599570361449 :  None : False :  True :  Reals
    w_i_k : Size=10, Index=w_i_k_index
        Key                                                                                                                                                                                                                                                                                                                                                                                                                                                 

In [468]:
w = defaultdict(list)
for xi in model.X:
    for f in model.PC.data():
        w[xi].append(instance.w_i_k[xi,f].value) # maybe just i as index?

print(w)

defaultdict(<class 'list'>, {(0.09662867604382566, 0.10810156002316562, 0.01997954379219518, -0.08404992095009216, -0.6307997030245771, 1.0586666280387018, -0.9433703436667538, -2.210285356747912, 1.6227618682468274, -0.5283821235821443, -0.07942745579557513, 0.8876331176434012, 0.9988571973465816, 0.13921077478493527, 1.5933653872380338, -0.8638548544043742, 0.14965216180482996, 1.4843457381364915, -1.1408104780319113, 0.7162495366181354, 0.9902211121694227, 0.8852737040163977, -0.018441902456054364, -0.09717111862852387, 0.2464230839831094, -1.259828847625922, -0.2015612429995194, -0.20250990436668248, 1.8125273114233647, -3.2854660647221268, -1.4296184853635543, -2.0183455155645236, -2.2455916698691225, 1.5900979133776942, -0.1974637098864798, 0.4645309360907232, 0.872442268544012, 0.9078560091613558, -0.8482397417916058, 0.38889857552904195, -0.2999981224083293, 0.11282290112933971, 0.6624531437819309, -0.7731021001487046, 0.7233974111341313, 1.1705500309790067, 0.9560320886931588,