# CDL

In [1]:
import numpy as np
import pickle
import tensorflow as tf
import pandas as pd
from keras.preprocessing.text import Tokenizer, one_hot
from pandas.api.types import CategoricalDtype
from scipy.sparse import csr_matrix
#init random seed
np.random.seed(5)
from sklearn.model_selection import train_test_split
import os
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import mean_squared_error, mean_absolute_error
from collections import defaultdict, namedtuple
from experiment_out_utils import precision_recall_at_k_4df, write_to_csv, XPData, XPRow, write_row
import itertools as it
from matrix_factorization_with_als import MF
from model_out_utils import make_out_dirs

Using TensorFlow backend.


In [2]:
### create all necessary dirs for output ###

XP_PATH, U_V_PATH, MODEL_PATH = make_out_dirs(model_name='sdae', xp_name='sdae_optimized') 
print("Out dir of experiment: ", XP_PATH)
print("Out dir of U, V matricies: ", U_V_PATH)
print("Out dir of model parameters: ", MODEL_PATH)

Out dir of experiment:  D:/Models/thesis/sdae/sdae_optimized/
Out dir of U, V matricies:  D:/Models/thesis/sdae/sdae_optimized/pickles/
Out dir of model parameters:  D:/Models/thesis/sdae/sdae_optimized/tf/


## 1. Data Preprocessing

In [3]:
#df = pd.read_json('/home/neopux/UHH/datasets/Video_Games_5_proc.json')
df = pd.read_json(r'D:\Datasets\amazon_reviews\processed\reviews_Video_Games_5.json')

In [4]:
df.head(2)

Unnamed: 0,asin,helpful,overall,reviewText,reviewTextProc,reviewTime,reviewerID,reviewerName,summary,summaryProc,unixReviewTime
0,700099867,"[8, 12]",1,Installing the game was a struggle (because of...,instal game struggle game window live bugs).so...,"07 9, 2012",A2HD75EMZR8QLN,123,Pay to unlock content? I don't think so.,pay unlock content i not think,1341792000
1,700099867,"[0, 0]",4,If you like rally cars get this game you will ...,if like rally car game fun it orient 34;europe...,"06 30, 2013",A3UR8NLLY1ZHCX,"Alejandro Henao ""Electronic Junky""",Good rally game,good rally game,1372550400


In [5]:
df_train, df_test = train_test_split(df, test_size = 0.3, stratify=df['reviewerID'], random_state=42)

### Prepare Reviews

In [6]:
train_reviews = df_train.groupby('asin').reviewTextProc.agg(' '.join)
test_reviews = df_test.groupby('asin').reviewTextProc.agg(' '.join)

In [7]:
train_summaries = df_train.groupby('asin').summaryProc.agg(' '.join)
test_summaries = df_test.groupby('asin').summaryProc.agg(' '.join)

In [8]:
total_train_rev = train_summaries + train_reviews
total_test_rev = test_summaries + test_reviews

In [9]:
vectorizer = TfidfVectorizer(max_features=10000)
item_infomation_matrix = vectorizer.fit_transform(total_train_rev.values)

In [10]:
print(item_infomation_matrix.shape)

(10668, 10000)


In [11]:
sorted(vectorizer.vocabulary_.items(), reverse=True)[5940:5960]

[('ham', 4059),
 ('halt', 4058),
 ('halo2', 4057),
 ('halo', 4056),
 ('hallway', 4055),
 ('halloween', 4054),
 ('hallmark', 4053),
 ('hall', 4052),
 ('halfway', 4051),
 ('halftime', 4050),
 ('halflife', 4049),
 ('half', 4048),
 ('halen', 4047),
 ('hale', 4046),
 ('hairstyle', 4045),
 ('haired', 4044),
 ('hair', 4043),
 ('hail', 4042),
 ('hahaha', 4041),
 ('haha', 4040)]

In [12]:
list(vectorizer.stop_words_)[0:10]

['artowrk',
 'ganer',
 'dispite',
 'creat',
 'fighterbasicaly',
 'lk',
 'dsbasic',
 'bora',
 'gintaroo',
 'classicfrogger']

In [53]:
item_infomation_matrix = np.array(item_infomation_matrix.todense())

In [17]:
total_train_rev.index

Index(['0700099867', '6050036071', '7100027950', '7293000936', '8176503290',
       '907843905X', '9625990674', '9861019731', '9882155456', 'B000003SQQ',
       ...
       'B00J128FPA', 'B00J226358', 'B00J6DLPLK', 'B00J9P3KBS', 'B00JM3R6M6',
       'B00JQ8YH6A', 'B00JQHU9RC', 'B00JXW6GE0', 'B00KAI3KW2', 'B00KHECZXO'],
      dtype='object', name='asin', length=10668)

### Prepare Rating Matrix

In [18]:
pivoted = df_train.pivot(index = 'reviewerID', columns = 'asin', values = 'overall')
pivoted = pivoted.fillna(0.)

In [19]:
pivoted.head(5)

asin,0700099867,6050036071,7100027950,7293000936,8176503290,907843905X,9625990674,9861019731,9882155456,B000003SQQ,...,B00J128FPA,B00J226358,B00J6DLPLK,B00J9P3KBS,B00JM3R6M6,B00JQ8YH6A,B00JQHU9RC,B00JXW6GE0,B00KAI3KW2,B00KHECZXO
reviewerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A00263941WP7WCIL7AKWL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A005481137I9SCAWEF7ON,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A01588502N52TNG1BP7WG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A01803182IUSFNIFF5984,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A023090719X7MTBCLM19B,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
rating_matrix = pivoted.values

In [22]:
rating_matrix

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [27]:
sum(pivoted.columns == total_train_rev.index)

10668

## 2. Building Model

### Masking Noise 

In [58]:
def mask(corruption_level, shape):
    return np.random.binomial(1, 1 - corruption_level, shape)

def add_noise(x , corruption_level ):
    mask_ = mask(corruption_level , x.shape)
    print("Mask shape: " + str(mask_.shape))
    x = np.multiply(x, mask_)
    print("Noising completed..:" + str(x.shape))
    return x

### SDAE & CDL

In [60]:
class CDL():
    def __init__(self , rating_matrix , item_infomation_matrix, out_path = None, k=10, 
                 epochs=50, batch_size=32, lr=0.0001, hidden_size=25, matrix_noise = 0.3,
                drop_ratio=0.1, lambda_w = 1, lambda_n = 1, lambda_v = 1, lambda_u = 1):
        self.out_path = out_path
        
        self.k = k
        self.n_input = item_infomation_matrix.shape[1] # dimensionality of text representations - 1000
        self.n_hidden1 = hidden_size
        self.n_hidden2 = self.k
        
        # lambdas for loss calc
        self.lambda_w = lambda_w
        self.lambda_n = lambda_n
        self.lambda_v = lambda_v
        self.lambda_u = lambda_u
        
        self.drop_ratio = drop_ratio
        self.learning_rate = lr
        self.epochs = epochs
        self.batch_size = batch_size
        
        self.num_u = rating_matrix.shape[0]
        self.num_v = rating_matrix.shape[1]
        intializer = tf.variance_scaling_initializer()
        self.non_zero_idx = rating_matrix > 0
        
        self.Weights = { 
            'w1' : tf.Variable(intializer([self.n_input, self.n_hidden1]), dtype=tf.float32),
            'w2' : tf.Variable(intializer([self.n_hidden1, self.n_hidden2]), dtype=tf.float32),
            'w3' : tf.Variable(intializer([self.n_hidden2, self.n_hidden1]), dtype=tf.float32),
            'w4' : tf.Variable(intializer([self.n_hidden1, self.n_input]), dtype=tf.float32)   
        }
        self.Biases = {
            'b1' : tf.Variable(tf.random_normal( [self.n_hidden1] , mean=0.0, stddev=1 / self.lambda_w )),
            'b2' : tf.Variable(tf.random_normal( [self.n_hidden2] , mean=0.0, stddev=1 / self.lambda_w )),
            'b3' : tf.Variable(tf.random_normal( [self.n_hidden1] , mean=0.0, stddev=1 / self.lambda_w )),
            'b4' : tf.Variable(tf.random_normal( [self.n_input] , mean=0.0, stddev=1 / self.lambda_w ))
            #'b1' : tf.Variable(tf.zeros(self.n_hidden1)),
            #'b2' : tf.Variable(tf.zeros(self.n_hidden2)),
            #'b3' : tf.Variable(tf.zeros(self.n_hidden1)),
            #'b4' : tf.Variable(tf.zeros(self.n_input))
        }
        
        self.item_infomation_matrix = item_infomation_matrix
        self.item_infomation_matrix_noise = add_noise(self.item_infomation_matrix , matrix_noise)
        self.rating_matrix = rating_matrix
    
        self.build_model()
        self.saver = tf.train.Saver()
        
    def encoder(self , x , drop_ratio):
        w1 = self.Weights['w1']
        b1 = self.Biases['b1']
        L1 = tf.nn.relu(tf.matmul(x, w1) + b1)
        L1 = tf.nn.dropout( L1 , keep_prob= 1 - drop_ratio)
        
        w2 = self.Weights['w2']
        b2 = self.Biases['b2']
        L2 = tf.nn.relu(tf.matmul(L1, w2) + b2)
        L2 = tf.nn.dropout(L2 , keep_prob= 1 - drop_ratio)
        return L2
    
    def decoder(self , x , drop_ratio):
        w3 = self.Weights['w3']
        b3 = self.Biases['b3']
        L3 = tf.nn.relu(tf.matmul(x, w3) + b3)
        L3 = tf.nn.dropout(L3 , keep_prob= 1 - drop_ratio)

        w4 = self.Weights['w4']
        b4 = self.Biases['b4']
        L4 = tf.nn.relu(tf.matmul(L3, w4) + b4)
        L4 = tf.nn.dropout(L4 , keep_prob= 1 - drop_ratio)
        return L4
    
    def build_model(self):
        self.model_X_0 = tf.placeholder(tf.float32 , shape=(None , self.n_input))
        self.model_X_c = tf.placeholder(tf.float32 , shape=(None , self.n_input))
        
        self.model_V = tf.placeholder(tf.float32 , shape=(None , self.k))
        self.model_drop_ratio = tf.placeholder(tf.float32)
        
        self.V_sdae = self.encoder(self.model_X_0 , self.model_drop_ratio)
        self.y_pred = self.decoder(self.V_sdae , self.model_drop_ratio)
        
        self.Regularization = tf.reduce_sum([tf.nn.l2_loss(w) + tf.nn.l2_loss(b) 
                                             for w,b in zip(self.Weights.values() , self.Biases.values())])
        loss_r = 1/2 * self.lambda_w * self.Regularization
        self.loss_a = 1/2 * self.lambda_n * tf.reduce_sum(tf.pow(self.model_X_c - self.y_pred , 2))
        loss_v =1/2 * self.lambda_v * tf.reduce_sum(tf.pow(self.model_V - self.V_sdae , 2))
        
        self.Loss = loss_r + self.loss_a + loss_v
        self.optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(self.Loss)
        
    
    def training(self):
        sess = tf.Session()
        
        ## define dirs for tensorboard if needed
        if self.out_path != None:
            train_writer = tf.summary.FileWriter('%s/tf/train' % self.out_path, sess.graph)
            test_writer = tf.summary.FileWriter('%s/tf/test' % self.out_path, sess.graph)
        
        sess.run(tf.global_variables_initializer())
        mf = MF(self.rating_matrix, self.k, self.lambda_u, self.lambda_v)
        
        for epoch in range(0, self.epochs):
            print("EPOCH %s / %s" % (epoch + 1, self.epochs))
            
            V_sdae = sess.run(self.V_sdae , feed_dict={self.model_X_0 : self.item_infomation_matrix_noise , 
                                                       self.model_drop_ratio : self.drop_ratio})
            # calc and print ALS loss every N epochs
            U , V, beta_u, beta_v, err = mf.ALS_v3_weighted(V_sdae)
            V = V.T
            
            auto_losses = []
            model_losses = []
            for i in range(0 , self.item_infomation_matrix.shape[0] , self.batch_size):
                X_train_batch = self.item_infomation_matrix_noise[i : i+self.batch_size]
                y_train_batch = self.item_infomation_matrix[i : i+self.batch_size]
                
                V_batch = V[i : i + self.batch_size]
                _ , my_loss, auto_loss = sess.run([self.optimizer, self.Loss, self.loss_a] , 
                                       feed_dict={self.model_X_0: X_train_batch , 
                                                  self.model_X_c: y_train_batch, 
                                                  self.model_V: V_batch, 
                                                  self.model_drop_ratio : self.drop_ratio})
                auto_losses.append(auto_loss)
                model_losses.append(my_loss)
            
            print("ALS LOSS %s" % err) 
            print("MODEL LOSS %s" % np.mean(model_losses))
            print("AUTOENCODER LOSS %s" % np.mean(auto_losses))          
            
            # save log files
            if self.out_path != None:
                # dump summaries
                summary = tf.Summary();
                summary.value.add(tag='Autoencoder Loss', simple_value=np.mean(auto_losses))
                summary.value.add(tag='Model Loss', simple_value=np.mean(model_losses))
                summary.value.add(tag='ALS Loss', simple_value=err)
                train_writer.add_summary(summary, epoch + 1)
                
                # dump model and pickles
                if epoch % 5 == 0:
                    os.mkdir('%s/tf/epoch_%s/' % (self.out_path, epoch))
                    os.mkdir('%s/pickles/epoch_%s/' % (self.out_path, epoch))

                    # save tensorflow model
                    self.saver.save(sess, '%s/tf/epoch_%s/model_.ckpt' % (self.out_path, epoch))

                    # save U and V matricies from ALS
                    with open('%s/pickles/epoch_%s/U.pickle' % (self.out_path, epoch), 'wb') as handle:
                        pickle.dump(U, handle, protocol=pickle.HIGHEST_PROTOCOL)
                    with open('%s/pickles/epoch_%s/V.pickle' % (self.out_path, epoch), 'wb') as handle:
                        pickle.dump(V, handle, protocol=pickle.HIGHEST_PROTOCOL)
                
        
        sess.close()
        return U, V, beta_u, beta_v

In [61]:
item_infomation_matrix.shape

(10668, 10000)

In [62]:
rating_matrix.shape

(24303, 10668)

### Training

In [63]:
%reset Out
tf.reset_default_graph()

Once deleted, variables cannot be recovered. Proceed (y/[n])?  y


Flushing output cache (7 entries)


In [65]:
rating_matrix.shape

(24303, 10668)

In [66]:
cdl = CDL(rating_matrix, item_infomation_matrix, out_path=XP_PATH, k=50, hidden_size=150, 
          matrix_noise=0.3, drop_ratio=0.1, epochs=200, lambda_w=20, lambda_v=100, 
          lambda_u=0.1, lambda_n=10)
U, V, beta_u, beta_v = cdl.training() #188910

Mask shape: (10668, 10000)
Noising completed..:(10668, 10000)
EPOCH 1 / 200
ALS LOSS 3.1659064065156843
MODEL LOSS 39533.33
AUTOENCODER LOSS 1360.0447
EPOCH 2 / 200
ALS LOSS 0.8736638385130221
MODEL LOSS 20005.062
AUTOENCODER LOSS 227.5444
EPOCH 3 / 200
ALS LOSS 0.5354408584089498
MODEL LOSS 10214.416
AUTOENCODER LOSS 168.69966
EPOCH 4 / 200
ALS LOSS 0.4900248494778793
MODEL LOSS 5108.576
AUTOENCODER LOSS 152.96867
EPOCH 5 / 200
ALS LOSS 0.47711767256384413
MODEL LOSS 2513.5674
AUTOENCODER LOSS 145.79938
EPOCH 6 / 200
ALS LOSS 0.47193161435337044
MODEL LOSS 1249.1621
AUTOENCODER LOSS 142.03293
EPOCH 7 / 200
ALS LOSS 0.46824485147297806
MODEL LOSS 665.503
AUTOENCODER LOSS 140.12273
EPOCH 8 / 200
ALS LOSS 0.4656987390313292
MODEL LOSS 411.85226
AUTOENCODER LOSS 138.95624
EPOCH 9 / 200
ALS LOSS 0.46436917536735606
MODEL LOSS 306.28183
AUTOENCODER LOSS 138.41791
EPOCH 10 / 200
ALS LOSS 0.4633576799474289
MODEL LOSS 262.21158
AUTOENCODER LOSS 138.13274
EPOCH 11 / 200
ALS LOSS 0.462103796483

In [67]:
## dump U and V matricies to pickle files
print("U shape: %s x %s" % U.shape)
print("V shape: %s x %s" % V.shape)

print("beta_u shape: %s" % beta_u.shape)
print("beta_v shape: %s" % beta_v.shape)

with open(U_V_PATH + 'U_final.pickle', 'wb') as handle:
    pickle.dump(U, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open(U_V_PATH + 'V_final.pickle', 'wb') as handle:
    pickle.dump(V, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
with open(U_V_PATH + 'beta_u_final.pickle', 'wb') as handle:
    pickle.dump(beta_u, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
with open(U_V_PATH + 'beta_v_final.pickle', 'wb') as handle:
    pickle.dump(beta_v, handle, protocol=pickle.HIGHEST_PROTOCOL)

U shape: 24303 x 50
V shape: 10668 x 50
beta_u shape: 24303
beta_v shape: 10668


In [19]:
def test_run():
    rm = np.array(
        [[1, 2, 3, 7, 8],
         [1, 5, 2, 1, 3],
         [1, 7, 2, 1, 10],
         [1, 2, 3, 4, 50]])
    iim = np.array([[0.7, 0.8, 0.9],[ 1, 0.2, 0.3],[0.5, 0.6, 0.7],[0.5, 0.6, 0.7],[0.1, 0.2, 1.0]])
    cdl = CDL(rm, iim, k=3, hidden_size=20, lr=0.0001, epochs=50)
    U, V, beta_u, beta_v = cdl.training() #188910
    
    print('## U and V matricies ##')
    print(U)
    print(V)
    
    print('## U and V biased ##')
    print(beta_u)
    print(beta_v)
    
    print('## Predictions ##')
    predictions = np.dot(U, V.T) + beta_u.reshape(-1, 1) + beta_v.reshape(1, -1)
        
    print(predictions)
    
    print('## Initial matrix ##')
    print(rm)
    
    return rm, U, V

In [20]:
# test run
rating_matrix, U, V = test_run()

Mask shape: (5, 3)
Noising completed..:(5, 3)
EPOCH 1 / 50
ALS LOSS 1.2046472436371132
MODEL LOSS 70.18561
AUTOENCODER LOSS 3.908306
EPOCH 2 / 50
ALS LOSS 0.7107355986833231
MODEL LOSS 54.93246
AUTOENCODER LOSS 3.9942102
EPOCH 3 / 50
ALS LOSS 0.47965679562842073
MODEL LOSS 57.021286
AUTOENCODER LOSS 4.6897564
EPOCH 4 / 50
ALS LOSS 0.43347562379434734
MODEL LOSS 54.94312
AUTOENCODER LOSS 4.1818175
EPOCH 5 / 50
ALS LOSS 0.41045281871592165
MODEL LOSS 52.2169
AUTOENCODER LOSS 3.5133886
EPOCH 6 / 50
ALS LOSS 0.4222943511838082
MODEL LOSS 54.221687
AUTOENCODER LOSS 3.9598656
EPOCH 7 / 50
ALS LOSS 0.3965419194918399
MODEL LOSS 53.519455
AUTOENCODER LOSS 3.5970554
EPOCH 8 / 50
ALS LOSS 0.3636557769409453
MODEL LOSS 53.911064
AUTOENCODER LOSS 2.9565032
EPOCH 9 / 50
ALS LOSS 0.4192090592784522
MODEL LOSS 54.902596
AUTOENCODER LOSS 4.3583302
EPOCH 10 / 50
ALS LOSS 0.3871567019835682
MODEL LOSS 54.66372
AUTOENCODER LOSS 3.700183
EPOCH 11 / 50
ALS LOSS 0.39751770303337386
MODEL LOSS 54.620716
AUTO

## Optimization

In [23]:
if not os.path.isdir('./optimiz/'):
    os.mkdir('./optimiz/')
    
%reset Out

Once deleted, variables cannot be recovered. Proceed (y/[n])?  y


Flushing output cache (0 entries)


In [38]:
params = {
    "hidden_size": [150],
    "k": [50],
    "matrix_noise": [0.3],
    "drop_ratio": [0.1],
    "lambda_u": [0.01, 0.1],
    "lambda_v": [100],
    "lambda_w": [20, 35 ,50],
    "lambda_n": [10, 0.1]
}

#lr=0.0001, hidden_size=25, matrix_noise = 0.3, drop_ratio=0.1

In [39]:
sorted_keys = sorted(params)
combinations = list(it.product(*(params[key] for key in sorted_keys)))
df_test_val = df_test.copy()

In [40]:
print("Num of combinations: %s" % len(combinations))

Num of combinations: 12


In [41]:
write_row('./optimiz/cdl_sdae_7_10epochs.csv', sorted_keys + ['mse_train', 'mae_train', 'mse_test', 'mae_test'])
for ps in combinations:
    tf.reset_default_graph()
    hyper_params = dict(zip(sorted_keys, ps))
    
    print("Start testing hyper params: ", hyper_params)
    cdl = CDL(rating_matrix, item_infomation_matrix, out_path=None, epochs=10, **hyper_params)
    U, V, beta_u, beta_v = cdl.training() #188910
    
    predictions = np.dot(U, V.T) + beta_u.reshape(-1, 1) + beta_v.reshape(1, -1)
    
    # predictions on train set
    train_mse = mean_squared_error(rating_matrix[rating_matrix > 0], predictions[rating_matrix > 0]) ** 0.5
    train_mae = mean_absolute_error(rating_matrix[rating_matrix > 0], predictions[rating_matrix > 0])
    
    print("MSE (non zero, train set): %s" % train_mse)
    print("MAE (non zero, train set): %s" % train_mae)
    
    # predictions on test set
    preds_df_unmelt = pd.DataFrame(predictions, columns = pivoted.columns, index = pivoted.index)
    preds_df_unmelt.index.name = 'reviewerID'
    preds_df_unmelt.columns.name = 'asin'
    
    df_test_val['value'] = df_test_val.apply(get_val, axis = 1)
    mse = mean_squared_error(df_test[~df_test_val.value.isnull()].overall, df_test_val[~df_test_val.value.isnull()].value) ** 0.5
    mae = mean_absolute_error(df_test[~df_test_val.value.isnull()].overall, df_test_val[~df_test_val.value.isnull()].value)
    
    del preds_df_unmelt
    del predictions
    
    print("MSE (test set): %s" % mse)
    print("MAE (test set): %s" % mae)
    
    print("Srop testing hyper params: ", hyper_params)
    
    # write to file
    write_row('./optimiz/cdl_sdae_7_10epochs.csv', [hyper_params[k] for k in sorted_keys] + [train_mse, train_mae, mse, mae] )

Start testing hyper params:  {'k': 50, 'lambda_u': 0.01, 'lambda_v': 100, 'hidden_size': 150, 'drop_ratio': 0.1, 'matrix_noise': 0.3, 'lambda_n': 10, 'lambda_w': 20}
Mask shape: (10668, 10000)
Noising completed..:(10668, 10000)
EPOCH 1 / 10
ALS LOSS 3.2122202412205842
MODEL LOSS 39437.574
AUTOENCODER LOSS 1314.8956
EPOCH 2 / 10
ALS LOSS 0.5641612811289409
MODEL LOSS 19955.096
AUTOENCODER LOSS 222.55614
EPOCH 3 / 10
ALS LOSS 0.2654116926018575
MODEL LOSS 10176.011
AUTOENCODER LOSS 167.86546
EPOCH 4 / 10
ALS LOSS 0.22408585672816306
MODEL LOSS 5079.91
AUTOENCODER LOSS 152.37798
EPOCH 5 / 10
ALS LOSS 0.2119584844837788
MODEL LOSS 2489.6902
AUTOENCODER LOSS 145.43913
EPOCH 6 / 10
ALS LOSS 0.20476335668733642
MODEL LOSS 1226.5087
AUTOENCODER LOSS 141.91673
EPOCH 7 / 10
ALS LOSS 0.20002158483741656
MODEL LOSS 642.65576
AUTOENCODER LOSS 140.1111
EPOCH 8 / 10
ALS LOSS 0.19625925818535642
MODEL LOSS 387.84445
AUTOENCODER LOSS 139.0429
EPOCH 9 / 10
ALS LOSS 0.19262101184915964
MODEL LOSS 280.171