# Kernelized Synaptic Weight Matrices

In [None]:
import numpy as np
import pandas as pd
from time import time
import tensorflow as tf
import sys
import os

seed = int(time())
np.random.seed(seed)

## `load_data`

loads ml-1m data

- :param path: path to the ratings file
- :param val_frac: fraction of data to use for validation
- :param delimiter: delimiter used in data file
- :param seed: random seed for validation splitting
- :param transpose: flag to transpose output matrices (swapping users with movies)
- :return: train ratings (n_u, n_m), valid ratings (n_u, n_m)

In [None]:
def load_data(
    path='./',
    val_frac=0.1,
    delimiter='::',
    seed=1234,
    transpose=False
):
    tic = time()
    print('reading data...')

    data = pd.read_csv(os.path.join(path, 'ratings.csv'), header=0)
    print('data read in', time() - tic, 'seconds')

    unique_user_ids = pd.unique(data['userId'])
    unique_movie_ids = pd.unique(data['movieId'])

    n_u = len(unique_user_ids)
    n_m = len(unique_movie_ids)
    n_r = len(data)

    # these dictionaries define a mapping
    # from user/movie id to to user/movie number (contiguous from zero)
    udict = {}
    for i, u in enumerate(unique_user_ids):
        udict[u] = i

    mdict = {}
    for i, m in enumerate(unique_movie_ids):
        mdict[m] = i

    # shuffle indices
    idx = np.arange(n_r)
    np.random.shuffle(idx)

    train_ratings = np.zeros((n_u, n_m), dtype='float32')
    valid_ratings = np.zeros((n_u, n_m), dtype='float32')

    for i in range(n_r):
        u_id = data['userId'][i]
        m_id = data['movieId'][i]
        r = data['rating'][i]

        if i <= val_frac * n_r:
            valid_ratings[udict[u_id], mdict[m_id]] = int(r)
        else:
            train_ratings[udict[u_id], mdict[m_id]] = int(r)

    if transpose:
        train_ratings = train_ratings.T
        valid_ratings = valid_ratings.T

    return train_ratings, valid_ratings

In [None]:
tr, vr = load_data(
    path='./dataset/small',
    delimiter='::',
    seed=seed,
    transpose=True,
    val_frac=0.1
)

In [None]:
tm = np.greater(tr, 1e-12).astype('float32')  # masks indicating non-zero entries
vm = np.greater(vr, 1e-12).astype('float32')

n_m = tr.shape[0]  # number of movies
n_u = tr.shape[1]  # number of users (may be switched depending on 'transpose' in loadData)

# Set hyper-parameters
n_hid = 500
lambda_2 = 60. # float(sys.argv[1]) if len(sys.argv) > 1 else 60.
lambda_s = 0.013 # float(sys.argv[2]) if len(sys.argv) > 2 else 0.013
n_layers = 2
output_every = 50  # evaluate performance on test set; breaks l-bfgs loop
n_epoch = n_layers * 10 * output_every
verbose_bfgs = True
use_gpu = False

if not use_gpu:
    os.environ['CUDA_VISIBLE_DEVICES'] = ''

# Input placeholders
R = tf.placeholder("float", [None, n_u])

## `kernel`

Sparsifying kernel function

- :param u: input vectors `[n_in, 1, n_dim]`
- :param v: output vectors `[1, n_hid, n_dim]`
- :return: input to output connection matrix

In [None]:
def kernel(u, v):
    dist = tf.norm(u - v, ord=2, axis=2)
    hat = tf.maximum(0., 1. - dist**2)
    return hat

## `kernel_layer`

a kernel sparsified layer

- :param x: input `[batch, channels]`
- :param n_hid: number of hidden units
- :param n_dim: number of dimensions to embed for kernelization
- :param activation: output activation
- :param name: layer name for scoping
- :return: layer output, regularization term


In [None]:
def kernel_layer(
    x,
    n_hid=500,
    n_dim=5,
    activation=tf.nn.sigmoid,
    lambda_s=lambda_s,
    lambda_2=lambda_2,
    name=''
):
    # define variables
    with tf.variable_scope(name):
        W = tf.get_variable('W', [x.shape[1], n_hid])
        n_in = x.get_shape().as_list()[1]
        u = tf.get_variable('u', initializer=tf.random.truncated_normal([n_in, 1, n_dim], 0., 1e-3))
        v = tf.get_variable('v', initializer=tf.random.truncated_normal([1, n_hid, n_dim], 0., 1e-3))
        b = tf.get_variable('b', [n_hid])

    # compute sparsifying kernel
    # as u and v move further from each other
    # for some given pair of neurons, their connection
    # decreases in strength and eventually goes to zero.
    w_hat = kernel(u, v)

    # compute regularization terms
    sparse_reg = tf.contrib.layers.l2_regularizer(lambda_s)
    sparse_reg_term = tf.contrib.layers.apply_regularization(sparse_reg, [w_hat])

    l2_reg = tf.contrib.layers.l2_regularizer(lambda_2)
    l2_reg_term = tf.contrib.layers.apply_regularization(l2_reg, [W])

    # compute output
    W_eff = W * w_hat
    y = tf.matmul(x, W_eff) + b
    y = activation(y)

    return y, sparse_reg_term + l2_reg_term

## 여기서부터 메인 코드

In [None]:
# Instantiate network
y = R
reg_losses = None
for i in range(n_layers):
    y, reg_loss = kernel_layer(y, n_hid, name=str(i))
    reg_losses = reg_loss if reg_losses is None else reg_losses + reg_loss

prediction, reg_loss = kernel_layer(y, n_u, activation=tf.identity, name='out')
reg_losses = reg_losses + reg_loss

# Compute loss (symbolic)
diff = tm*(R - prediction)
sqE = tf.nn.l2_loss(diff)
loss = sqE + reg_losses

# Instantiate L-BFGS Optimizer
optimizer = tf.contrib.opt.ScipyOptimizerInterface(
    loss,
    options={
        'maxiter': output_every,
        'disp': verbose_bfgs,
        'maxcor': 10
    },
    method='L-BFGS-B'
)

init = tf.global_variables_initializer()

In [None]:
with tf.Session() as sess:
    sess.run(init)

    for i in range(int(n_epoch / output_every)):
        optimizer.minimize(sess, feed_dict={R: tr}) # do maxiter optimization steps
        pre = sess.run(prediction, feed_dict={R: tr}) # predict ratings

        # compute validation error
        error = (vm * (np.clip(pre, 1., 5.) - vr) ** 2).sum() / vm.sum()

        # compute train error
        error_train = (tm * (np.clip(pre, 1., 5.) - tr) ** 2).sum() / tm.sum()

        print('.-^-._' * 12)
        print('epoch:', i, 'validation rmse:', np.sqrt(error), 'train rmse:', np.sqrt(error_train))
        print('.-^-._' * 12)

    with open('summary_ml1m.txt', 'a') as file:
        for a in sys.argv[1:]:
            file.write(a + ' ')

        file.write(
            str(np.sqrt(error)) + ' ' +
            str(np.sqrt(error_train)) + ' ' +
            str(seed) + '\n'
        )
        file.close()