In [1]:
%run header.py
%matplotlib inline

from utils import sample_from_interactions
from utils import from_interactions_to_coo

In [2]:
exp_path = '/nmnt/x04-hdd/boris_temp/SGIMC_IMC/movielens/'

In [3]:
PROBLEM = "classification" if False else "regression"

step_fn = step_qaadmm

if PROBLEM == "classification":
    QAObjectiveLoss = QAObjectiveLogLoss
else:
    QAObjectiveLoss = QAObjectiveL2Loss  # QAObjectiveHuberLoss

In [4]:
C_ridge = 1e-4
C_group = 2e-4
C_lasso = 10 * C_group
C = (C_lasso, C_group, C_ridge)

eta = 1e1

step_kwargs = {
    "C": C,                 # the regularizr constants (C_lasso, C_group, C_ridge)
    "eta": eta,             # the eta of the ADMM (larger - faster but more unstable)
    "rtol": 1e-5,           # the relative tolerance for stopping the ADMM
    "atol": 1e-8,           # the absolute tolerance
    "method": "cg",         # the method to use in Sub_0
    "n_iterations": 2,      # the number of iterations of the inner ADMM
}

n_iter = 100
K = 5

## Data loading

In [5]:
interactions = np.load(exp_path + 'I.npy')
X = np.load(exp_path + 'X.npy')
Y = np.load(exp_path + 'Y.npy')

In [6]:
R_full = from_interactions_to_coo(interactions)
full_mask = R_full.toarray() > 0

In [7]:
interaction_train, interaction_test = sample_from_interactions(interactions, 0.01)



In [8]:
R_train = from_interactions_to_coo(interaction_train).tocsr()
test_mask = from_interactions_to_coo(interaction_test).toarray() > 0

In [9]:
print(X.shape)
print(Y.shape)
print(R_full.shape)

(943, 23)
(1682, 20)
(943, 1682)


# Kernel died :((

In [None]:
problem = IMCProblem(QAObjectiveLoss, X, Y, R_train, n_threads=4)

W_0 = random_state.normal(size=(X.shape[1], K))
H_0 = random_state.normal(size=(Y.shape[1], K))

W, H = W_0.copy(), H_0.copy()

W, H = imc_descent(problem, W, H,
                   step_fn,                  # the inner optimization
                   step_kwargs=step_kwargs,  # asrtguments for the inner optimizer
                   n_iterations=n_iter,      # the number of outer iterations (Gauss-Siedel)
                   n_init_iterations=0,
                   return_history=True,      # Record the evolution of the matrices (W, H)
                   rtol=1e-5,                # relative stopping tolerance for the outer iterations
                   atol=1e-7,                # absolute tolerance
                   verbose=True,             # show the progress bar
                   check_product=True,       # use the product W H' for stopping
                   )

0it [00:00, ?it/s]
  0%|          | 0/100 [00:00<?, ?it/s]

## Synthetic data

In [10]:
if PROBLEM == "classification":
    C = 1e0, 1e-1, 1e-3
    eta = 1e0
else:
    # C = 2e-5, 2e-3, 0
    C = 2e-3, 2e-4, 1e-4
    eta = 1e1
    
step_kwargs = {
    "C": C,                 # the regularizr constants (C_lasso, C_group, C_ridge)
    "eta": eta,             # the eta of the ADMM (larger - faster but more unstable)
    "rtol": 1e-5,           # the relative tolerance for stopping the ADMM
    "atol": 1e-8,           # the absolute tolerance
    "method": "cg",         # the method to use in Sub_0
    "n_iterations": 2,      # the number of iterations of the inner ADMM
}

n_samples, n_objects = 800, 1600
n_rank = 25

scale = 0.05
noise = 0.10

n_iter = 100

In [11]:
X, W_ideal, Y, H_ideal, R_noisy_full, R_clean_full = make_imc_data(
        n_samples, 50, n_objects, 50,
        n_rank, scale=(scale, scale), noise=scale*noise,
        binarize=(PROBLEM == "classification"),
        random_state=random_state,
        return_noisy_only=False)

R_train, mask = sparsify(R_noisy_full, 0.1, random_state=random_state)

In [12]:
problem = IMCProblem(QAObjectiveLoss, X, Y, R_train, n_threads=8)
    
        
W_0 = random_state.normal(size=(X.shape[1], K))
H_0 = random_state.normal(size=(Y.shape[1], K))

W, H = W_0.copy(), H_0.copy()

W, H = imc_descent(problem, W, H,
                   step_fn,                  # the inner optimization
                   step_kwargs=step_kwargs,  # asrtguments for the inner optimizer
                   n_iterations=n_iter,      # the number of outer iterations (Gauss-Siedel)
                   n_init_iterations=0,
                   return_history=True,      # Record the evolution of the matrices (W, H)
                   rtol=1e-5,                # relative stopping tolerance for the outer iterations
                   atol=1e-7,                # absolute tolerance
                   verbose=True,             # show the progress bar
                   check_product=True,       # use the product W H' for stopping
                   )

0it [00:00, ?it/s]
100%|██████████| 100/100 [00:02<00:00, 34.82it/s]
