# Recommend Projects to Users

In [1]:
from keras.models import load_model
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np
np.random.seed(0)
from time import gmtime, strftime
from keras.models import Model
from scipy.sparse import vstack

import os
import sys
module_path = os.path.abspath(os.path.join('../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
from src.models.cdea.evaluate import evaluate
from src.models.cdea.recommender import Recommender
import src.models.cdea.load_data as load_data

Using TensorFlow backend.


In [2]:
# Load the projects dataframe
projects = pd.read_pickle("../../data/raw/project_data")

In [3]:
# Load the autoencoder to reduce the dimensionality of our TF-IDF vectors
model = load_model('../../autoencoder.h5')

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.


In [4]:
# Load the proejct data
train_projects, train_x, test_projects, test_x, train_project_ids, test_project_ids = load_data.load_projects()
train_x_projects = np.array(train_projects, dtype=np.int32).reshape(len(train_projects), 1)
test_x_projects = np.array(test_projects, dtype=np.int32).reshape(len(test_projects), 1)

# Prepare our variables to be used in recommendation
x = vstack([train_x, test_x])
x_projects = train_projects + test_projects
x_project_ids = train_project_ids + test_project_ids

# Load the users projects
users_projects = load_data.load_user()

In [5]:
# Create a model that we will use to extract the embedding layer output
embed_model = Model(inputs=model.input, outputs=model.get_layer('embedding_layer').output)

# The dimension that the TF-IDF vector will be reduced to
embedding_size = model.get_layer('embedding_layer').output_shape[2]

# Embed our TF-IDF Vectors for all Projects
embeddings = embed_model.predict(x=[x, np.array(x_projects, dtype=np.int32).reshape(len(x_projects), 1)]).reshape(len(x_projects), embedding_size)

### Make Recommendations for a User

In [6]:
# Initialise our recommender class
rec = Recommender(x_project_ids)

# Calculate our cosine similarity matrix between all projects
similarity_matrix = rec.similarity(embeddings)

# Select a user to recommend projects to

In [7]:
# Select a user that we will recommend projects to
user_idx = 0 # select any number between 0 and 4000

# Reshape our user_projects vector
user_projects = users_projects[user_idx].todense()
user_projects = np.array((user_projects.T))

# Count the number of projects this user has interacted with
ones_idx = np.nonzero(user_projects)
num_ones = len(np.array(ones_idx).flatten())
print("This user has interacted with " + str(num_ones) + " projects.")

# Get the top recommender projects
done_projects, top_projects = rec.top_projects(similarity_matrix, user_projects)

This user has interacted with 22 projects.


In [8]:
# Display the projects that we suggested the user take part in
recommended_project_ids = list(top_projects['project_id'])
recommender_projects = projects[projects['project_id'].isin(recommended_project_ids)]
recommender_projects

Unnamed: 0,UN_regions,country,description,error,guid,origin,regions,tags,title,topics,url,project_id
0,[],,Foldit is a revolutionary new computer game en...,,5f80760d-8398-5091-b3c6-f34c39216e88,scistarter,[],"[dna, protein]",Foldit,"[Computers & Technology, Biology, Chemistry]",https://scistarter.com/project/4-Foldit-Foldit,4
1182,[],,Test,,e51fb770-6bc2-5838-ba43-b72a433cfb2d,scistarter,"[{'geometry': {'type': 'MultiPolygon', 'coordi...",[dfds],This is my awesome citizen science project!,"[Geology & Earth Science, Biology]",https://scistarter.com/project/17118-This-is-m...,17118
1183,[],,Cities across the country are competing to doc...,,c1e003ee-8251-5f44-8fa8-67e9f0c7c521,scistarter,"[{'geometry': {'type': 'MultiPolygon', 'coordi...",[],City Nature Challenge 2017: The Wasatch Front.,[],https://scistarter.com/project/17119-City-Natu...,17119
1184,[],,Phendo is an app designed to be an observation...,,92499965-3476-579e-a94d-4e0bb7f98983,scistarter,[],"[angiogenesis, citizen science, data, data col...",Citizen Endo,"[Social Science, Health & Medicine, Food, Comp...",https://scistarter.com/project/17133-Citizen-E...,17133
1185,[],,Join University of Minnesota Cooperative Exten...,,e3dec51f-43cb-515a-808f-391101f26771,scistarter,"[{'geometry': {'type': 'MultiPolygon', 'coordi...",[],City Nature Challenge 2017: Minneapolis/St. Paul,[Nature & Outdoors],https://scistarter.com/project/17143-City-Natu...,17143
1186,[],,OA-Africa Network is organizing an ocean acidi...,,42ca0dcc-17c6-5ceb-96da-d4e7cb2ca19d,citsci,"[{'geometry': {'type': 'MultiPolygon', 'coordi...",[],OA South Africa,[],https://scistarter.com/project/17144-OA-South-...,17144
1187,[],United States,Immerse yourself in a public engagement experi...,,70de7019-0112-5381-a51f-271a230d46e0,scistarter,[],[],Citizen Science Festival!,[],https://scistarter.com/project/17159-Citizen-S...,17159
1188,[],United States,Join us at 5:30pm in the Ballroom for a fun an...,,89149880-8dce-59f3-b0d1-a83017297b83,scistarter,[],[],Project Slam!,[],https://scistarter.com/project/17161-Project-S...,17161
1189,[],,Citizen science volunteers will collect baseli...,,ff2fd413-7851-5d03-a4d1-860d6cf7a565,citsci,"[{'geometry': {'type': 'MultiPolygon', 'coordi...",[],Rocky Mountain Field Institute Restoration Mon...,[],https://scistarter.com/project/17165-Rocky-Mou...,17165
1190,[],,We would like to enlist your help in collectin...,,77b76428-2105-5653-86ed-a94bb4fa6100,scistarter,[],"[bacteria, dirt, drug discovery, environmental...",Got Dirt? we are seeking soil samples,"[Agriculture, Ecology & Environment, Geology &...",https://scistarter.com/project/17168-Got-Dirt-...,17168


In [9]:
# Get y_true (the actual projects done by the user) and y_pred (the projects we predicted they would do)
y_true, y_pred = rec.predictions(user_projects, done_projects, top_projects) 

# Evaluate the models precision and recall
precision, recall = evaluate(y_true, y_pred)

In [10]:
print('--------------------------------')
print('Model Results')
print('Precision: ' + str(precision))
print('Recall: ' + str(recall))
print('--------------------------------')

--------------------------------
Model Results
Precision: 0.0
Recall: 0.0
--------------------------------
