In [1]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()


import numpy as np


X = np.random.normal(size=(1000,1000))
X_observed = []
for row in X.tolist():
    current_row = []
    for val in row:
        val = val if np.random.rand() > 0.1 else np.nan
        current_row.append(val)
    X_observed.append(current_row)
X_observed = np.array(X_observed)


class MCNNM(object):
    def __init__(self):
        self.train_op = None
        
    def build_tf_graph(self, X, num_factors=3, lambd=0.1):
        self.lambd = lambd
        #tf.reset_default_graph()
        input_shape = X.shape
        observed = ~np.isnan(X_observed)
        self.observed =  tf.constant(observed, dtype=tf.float64)
        self.num_observed = float(observed.sum())
        X = tf.constant(X, tf.float64)
        self.X_completed_tensor = tf.get_variable("x_completed", input_shape, dtype=tf.float64)
        self.l = self.loss(X, self.X_completed_tensor)
        return tf.compat.v1.train.AdagradOptimizer(0.1).minimize(self.l)
        
    def loss(self, X_observed, X_completed):
        delta = self.project_A(X_observed - X_completed)
        normalized_frobenius = tf.norm(delta) / self.num_observed
        s, _, _ = tf.linalg.svd(X_completed)
        nuclear_norm = tf.reduce_sum(s)
        return normalized_frobenius + self.lambd * nuclear_norm
    
    def project_A(self, A):
        return tf.multiply(tf.where(tf.math.is_nan(A), tf.zeros_like(A), A), self.observed)
    
    def fit_predict(self, X, iterations=1500):
        if not self.train_op:
            self.train_op = self.build_tf_graph(X)
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            for i in range(iterations):
                _, loss = sess.run([self.train_op, self.l])
                if i > 0 and i % 100 == 0:
                    print("current loss at step {} is: {}".format(i, loss))
            self.X_completed = sess.run(self.X_completed_tensor)
            return self.X_completed

Instructions for updating:
non-resource variables are not supported in the long term


In [2]:
model = MCNNM()


In [4]:
X_completed = model.fit_predict(X_observed, iterations=5000)

current loss at step 100 is: 1.572396136643584
current loss at step 200 is: 1.5671785388216313
current loss at step 300 is: 1.5611518231808421
current loss at step 400 is: 1.5538877093316967
current loss at step 500 is: 1.5460135915043738
current loss at step 600 is: 1.5385067224004088
current loss at step 700 is: 1.5315371487831275
current loss at step 800 is: 1.5246496579741697
current loss at step 900 is: 1.5177108274031395
current loss at step 1000 is: 1.5108200904703337
current loss at step 1100 is: 1.5040326712695293
current loss at step 1200 is: 1.497374451139773
current loss at step 1300 is: 1.4908459024371528
current loss at step 1400 is: 1.4843699416701646
current loss at step 1500 is: 1.4779478844449367
current loss at step 1600 is: 1.471627916430177
current loss at step 1700 is: 1.4653972707406322
current loss at step 1800 is: 1.4593065561035075
current loss at step 1900 is: 1.4531919427375803
current loss at step 2000 is: 1.4472193212446738
current loss at step 2100 is: 1.

In [5]:
X_completed

array([[ 9.94259665e-05,  4.07840940e-04, -1.30798784e-04, ...,
         9.04067911e-04, -7.26013675e-05,  5.36076915e-05],
       [-7.14563635e-04,  1.91946214e-04, -1.66775343e-04, ...,
         8.55743477e-05, -5.56134477e-04,  1.00696187e-04],
       [-5.98727080e-05, -4.02993535e-04, -9.14863674e-05, ...,
         1.35717464e-04, -8.70166292e-04, -3.10259825e-04],
       ...,
       [ 2.74072632e-04,  8.70875996e-04, -3.91994135e-04, ...,
        -6.45257607e-04, -1.11821963e-03, -9.33014311e-04],
       [ 2.69457735e-04, -5.85496296e-04,  1.47300791e-04, ...,
         8.26859886e-04,  3.37592680e-04,  8.84733846e-04],
       [-1.93775600e-04,  1.05594152e-03,  1.06387294e-03, ...,
        -4.16609183e-04, -4.43443171e-04, -5.71494957e-04]])

In [6]:
X_observed

array([[-0.29360759,  0.44342538, -1.08195743, ...,         nan,
        -0.3577256 , -1.27301085],
       [-1.78436206,         nan,  0.81836862, ..., -0.72992102,
        -1.2155056 ,  1.0641305 ],
       [ 0.47387099, -0.72983496, -0.67506123, ..., -0.68052207,
                nan,  0.72272822],
       ...,
       [        nan, -0.53226707,  0.30833222, ..., -0.65888083,
         0.90068056,  1.71551849],
       [ 0.9959976 , -0.15334836, -0.43478627, ..., -1.07284728,
        -1.03873886,  0.35471299],
       [ 0.84517386,  0.47074639,  0.06049391, ..., -0.43183899,
        -0.9932546 , -0.46910238]])

In [7]:
X

array([[-0.29360759,  0.44342538, -1.08195743, ..., -0.090673  ,
        -0.3577256 , -1.27301085],
       [-1.78436206, -0.60646428,  0.81836862, ..., -0.72992102,
        -1.2155056 ,  1.0641305 ],
       [ 0.47387099, -0.72983496, -0.67506123, ..., -0.68052207,
        -2.79233974,  0.72272822],
       ...,
       [ 0.17950515, -0.53226707,  0.30833222, ..., -0.65888083,
         0.90068056,  1.71551849],
       [ 0.9959976 , -0.15334836, -0.43478627, ..., -1.07284728,
        -1.03873886,  0.35471299],
       [ 0.84517386,  0.47074639,  0.06049391, ..., -0.43183899,
        -0.9932546 , -0.46910238]])