# Linear Programming to solve for best weights

In [None]:
import pandas as pd


mcca2 = pd.read_csv("../tests/data/multicca2.csv", sep=",")
mcca1 = pd.read_csv("../tests/data/multicca1.csv", sep=",")


In [437]:
import numpy as np
import pyomo.environ as pyo
from scipy.linalg import svd
from collections import defaultdict

## Prepare data

In [438]:
# get only values from datsets
datasets = [mcca1.iloc[:,1:7].values, mcca2.iloc[:,1:6].values]

## Parameters

In [439]:
# Parameter 
standardize = True
mimic_R = True
# penalties same length as datasets
penalties = [1, 1]
K = 1

In [444]:
def scale(mtx, center=True, scale=True):
    """
    Reimplement scale function from R
    """
    if not center:
        raise NotImplementedError('Scaling without centering not implemented')

    centered = mtx - np.mean(mtx, axis=0)
    if not scale:
        return centered

    # to replicate the R implementation of scale, we apply Bessel's
    # correction when calculating the standard deviation in numpy
    scaled = centered / centered.std(axis=0, ddof=1)
    return scaled


In [445]:
# preprocess data
datasets = datasets.copy()
# 2 features needed
for data in datasets:
    if len(data[0]) < 2:
        raise Exception('Need at least 2 features in each dataset')

    # standardize if set TRUE
if standardize:
    for idx in range(len(datasets)):
        if mimic_R:
            datasets[idx] = scale(datasets[idx], center=True, scale=True)
        else:
            datasets[idx] = scale(datasets[idx], center=True, scale=False)
        datasets[idx] = datasets[idx].tolist()


In [446]:
#datasets_as_tuples = [tuple(map(tuple,data)) for data in datasets] #(hashable)


## Sets

In [None]:
model = pyo.ConcreteModel()

model.Idx = pyo.Set(initialize=range(len(datasets)))
model.samples = pyo.Set(initialize=range(len(datasets[0])))
model.PC = pyo.Set(initialize=range(len(datasets[0][0])))
model.K = pyo.Set(initialize=range(K))
model.X = pyo.Set(initialize=datasets) 

## Parameters

In [None]:
# params: ci i in [1:K]
model.c = pyo.Param(model.Idx, initialize=penalties)

## Variables

In [463]:
model.w_i_k_f = pyo.Var(model.Idx,model.K, model.PC, bounds=(0, 1), initialize=0.5)

## Objective

In [3]:
def ObjRule(model):
    """Objective Function (4.3 in witten 2009)"""
    features = len(model.PC.data())
    samples = len(model.samples.data())
    #TODO: array from  w_i_k (for all pcs)
    return sum(
                sum((np.asarray([[model.w_i_k_f[idx, k, f] for f in model.PC.data()] for k in model.K.data()])
               @ np.asarray(xi).reshape(samples,features).T 
               @ np.asarray(xj).reshape(samples,features)
               @ np.asarray([[model.w_i_k_f[jdx, k, f] for f in model.PC.data()] for k in model.K.data()]).T)[r,c] for r in model.K.data() for c in model.K.data())
               for idx, xi in enumerate(model.X) for jdx, xj in enumerate(model.X) if idx<jdx )
    

In [464]:
# Objective
model.Obj = pyo.Objective(rule=ObjRule, sense=pyo.maximize)

## Constraints

In [465]:
# constraints: lasso 
model.constraint_lasso = pyo.ConstraintList()
for i in model.Idx:
    model.constraint_lasso.add(sum(model.w_i_k_f[i,k,f] for k in model.K.data() for f in model.PC.data())<= model.c[i])
    
    

In [None]:
model.constraint_norm2 = pyo.ConstraintList()
#model.constraint_norm2.add(model.X, rule=norm2)
for i in model.Idx:
    model.constraint_norm2.add(sum(model.w_i_k_f[i,k,f] * model.w_i_k_f[i,k,f] for k in model.K.data() for f in model.PC.data()) <= 1)

## Solve with ipopt

In [None]:
nonLinearOpt =pyo.SolverFactory('ipopt')
instance_non_linear = model.create_instance()
res = nonLinearOpt.solve(instance_non_linear)
model.solutions.load_from(res)

In [None]:
instance_non_linear.display()

In [None]:
from collections import defaultdict

In [None]:
w = defaultdict(list)
for i in model.Idx:
    for k in model.K.data():
        for f in model.PC.data():
            w[i,k].append(instance_non_linear.w_i_k_f[i,k,f].value) 
