In [1]:
# Import some standard python libraries
import numpy as np
import os
import sys
import json
import scipy.sparse as sparse
from sklearn.utils import class_weight
import pandas as pd
from keras import backend as K
dirpath = os.getcwd()
dir_path = os.path.basename(dirpath)[:-23]
import tensorflow as tf

# Importing all our autoencoder models
sys.path.append(dir_path + '../../src/models/autoencoders')
import CDAE
import deep_1
import deep_2
import deep_3
import deep_4
import deep_5
import deep_6

sys.path.append(dir_path + '../../src/models/recommenders')
from cf_recommender import CFRecommender

# Importing our data models
sys.path.append(dir_path + '../../data/data_models')
from content_data import load_projects_doc2vec, load_projects_tfidf 
from cf_data import load_users_projects, load_new_users_projects, load_movies, load_profile_labels, load_new_profile_labels

# Input Parameters for training our autoencoder
batch_size = 32 #int(sys.argv[1])
epochs = 30 #int(sys.argv[2])
embedding_size = 32 #int(sys.argv[3])
# autoencoder_type = 'deep6' #str(sys.argv[4])
dataSource = 'new_users_projects' #str(sys.argv[5])
q = 0.2 #float(sys.argv[6])

# Load the data
loadData = None

Using TensorFlow backend.


In [2]:
# Load Data
train_labels, train_x, val_labels, val_x, test_labels, test_x = load_new_users_projects('../../')
    
U = train_x.shape[1]
I = train_x.shape[0]
labels = load_new_profile_labels('../../')
labels_index = labels.index

In [3]:
# Set Autoencoder
autoencoder = deep_3

In [4]:
def get_weighted_loss():
    def weighted_loss(y_true, y_pred):
        return K.mean((1*(1-y_true))*(99*(y_true))*K.binary_crossentropy(y_true, y_pred), axis=-1)
    return weighted_loss

In [5]:
# Create our autoencoder model
# get_weighted_loss()
model = autoencoder.create(I=I, U=U, K=embedding_size,
                    hidden_activation='relu', output_activation='sigmoid', q=q, l=0.001)
model.compile(loss='mean_absolute_error', optimizer='adam')

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


  h_item = Dense(1024, W_regularizer=l2(l), b_regularizer=l2(l))(h_item)
  h_user = Embedding(input_dim=U, output_dim=1024, input_length=1, W_regularizer=l2(l), name='embedding_layer')(x_user)
  return Model(input=[x_item, x_user], output=decoded)


In [6]:
# Reorganise some data
train_x = train_x.T
val_x = val_x.T
test_x = test_x.T

train_val_x = train_x + val_x
train_test_x = train_x + test_x

In [7]:
# Train our model
history = model.fit(x=[train_x, labels_index], y=train_val_x,
                    batch_size=batch_size, nb_epoch=epochs, verbose=1, class_weight='balanced',
                    validation_data=[[train_x, labels_index], train_val_x])

Instructions for updating:
Use tf.cast instead.


  after removing the cwd from sys.path.


Train on 344 samples, validate on 344 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [8]:
# model_name = 'deep3-custom_loss-1-example.h5'
# model.save('../../data/autoencoders/' + model_name)

# Make Recommendations

In [9]:
k = 10
train_labels, train_x, val_labels, val_x, test_labels, test_x = load_new_users_projects('../../')
recommender = CFRecommender(k)

In [10]:
from sklearn.metrics import precision_recall_fscore_support, mean_squared_error, average_precision_score
from math import sqrt
import math

In [11]:
fileName = '../../data/experiment-results/cf-autoencoder/' + 'deep3-best-performing-2' + '_' + str(k) + '.json'
f = open(fileName,"w+")
# Clear the current contents of the file
f.truncate(0)
f.write('[')

for profile_idx in range(0, train_x.shape[1]):
    profile_col = np.squeeze(np.asarray(train_x.getcol(profile_idx).todense())).reshape(1,-1)
    labels = np.asarray(train_labels.index)

    # Make a prediction for 
    predictions = model.predict([profile_col, labels])

    # Get the Top-K Recommendataions
    recommendations = recommender.top_projects(profile_col, predictions, train_labels)

    # Generate the y_pred and y_true for evaluation
    if val_x != None:
        y_true, y_pred = recommender.generate_y(recommendations, train_labels, test_x.getcol(profile_idx), val_x=val_x.getcol(profile_idx))
    else:
        y_true, y_pred = recommender.generate_y(recommendations, train_labels, test_x.getcol(profile_idx))

    # Get precision and recall
    precision, recall, fscore, support = precision_recall_fscore_support(y_true, y_pred, average='binary', pos_label=1)
    avg_precision = average_precision_score(y_true, predictions.reshape(y_true.shape), average='weighted', pos_label=1)
    rmse = sqrt(mean_squared_error(y_true, predictions.reshape(y_true.shape)))

    if math.isnan(avg_precision):
        avg_precision = 0
    if math.isnan(rmse):
        rmse = 0

    # Write the results to a JSON file
    things1 = np.nonzero(y_pred)[0].astype('str')
    things2 = np.nonzero(y_true)[0].astype('str')
    y_pred_string = '[' + ', '.join(things1) + ']'
    y_true_string = '[' + ', '.join(things2) + ']'
    f.write('{ "user_index": %s, "precision": %s, "recall": %s, "y_pred": %s, "y_true": %s, "avg_precision": %s, "rmse": %s },' % (str(profile_idx), str(precision), str(recall), y_pred_string, y_true_string, str(avg_precision), str(rmse)))

# Delete the last trailing comma
f.seek(f.tell() - 1, os.SEEK_SET)
f.write('')

# Close the results file
f.write(']')
f.close()

print("-------TEST COMPLETE--------")

  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
 

  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
 

  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
 

  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
 

  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
 

  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
 

  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
 

  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
 

-------TEST COMPLETE--------


  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
  recall = tps / tps[-1]
  'recall', 'true', average, warn_for)
 