# Recommend Projects to Users

In [1]:
from keras.models import load_model
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np
np.random.seed(0)
from time import gmtime, strftime
from keras.models import Model
from scipy.sparse import vstack

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
from src.evaluate import evaluate
from src.recommender import Recommender
import src.load_data as load_data

Using TensorFlow backend.


In [2]:
# Load the projects dataframe
projects = pd.read_pickle("../data/raw/project_data")

In [3]:
# Load the autoencoder to reduce the dimensionality of our TF-IDF vectors
model = load_model('autoencoder.h5')

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.


In [4]:
# Load the proejct data
train_projects, train_x, test_projects, test_x, train_project_ids, test_project_ids = load_data.load_projects()
train_x_projects = np.array(train_projects, dtype=np.int32).reshape(len(train_projects), 1)
test_x_projects = np.array(test_projects, dtype=np.int32).reshape(len(test_projects), 1)

# Prepare our variables to be used in recommendation
x = vstack([train_x, test_x])
x_projects = train_projects + test_projects
x_project_ids = train_project_ids + test_project_ids

# Load the users projects
users_projects = load_data.load_user()

In [5]:
# Create a model that we will use to extract the embedding layer output
embed_model = Model(inputs=model.input, outputs=model.get_layer('embedding_layer').output)

# The dimension that the TF-IDF vector will be reduced to
embedding_size = model.get_layer('embedding_layer').output_shape[2]

# Embed our TF-IDF Vectors for all Projects
embeddings = embed_model.predict(x=[x, np.array(x_projects, dtype=np.int32).reshape(len(x_projects), 1)]).reshape(len(x_projects), embedding_size)

### Make Recommendations for a User

In [6]:
# Initialise our recommender class
rec = Recommender(x_project_ids)

# Calculate our cosine similarity matrix between all projects
similarity_matrix = rec.similarity(embeddings)

# Select a user to recommend projects to

In [7]:
# Select a user that we will recommend projects to
user_idx = 3999 # select any number between 0 and 4000

# Reshape our user_projects vector
user_projects = users_projects[user_idx].todense()
user_projects = np.array((user_projects.T))

# Count the number of projects this user has interacted with
ones_idx = np.nonzero(user_projects)
num_ones = len(np.array(ones_idx).flatten())
print("This user has interacted with " + str(num_ones) + " projects.")

# Get the top recommender projects
done_projects, top_projects = rec.top_projects(similarity_matrix, user_projects)

This user has interacted with 134 projects.


In [8]:
# Display the projects that we suggested the user take part in
recommended_project_ids = list(top_projects['project_id'])
recommender_projects = projects[projects['project_id'].isin(recommended_project_ids)]
recommender_projects

Unnamed: 0,UN_regions,country,description,error,guid,origin,regions,tags,title,topics,url,project_id
0,[],,Foldit is a revolutionary new computer game en...,,5f80760d-8398-5091-b3c6-f34c39216e88,scistarter,[],"[dna, protein]",Foldit,"[Computers & Technology, Biology, Chemistry]",https://scistarter.com/project/4-Foldit-Foldit,4
1131,[],,CMN aims to promote planning sustainable commu...,,8edfd05d-f2ba-5721-aa58-8f66e2f7b3db,scistarter,[],"[community mapping, csim, eelgrass, fiss, inva...",Community Mapping Network (CMN),"[Geography, Ecology & Environment, Birds, Biol...",https://scistarter.com/project/16825-Community...,16825
1132,[],,Help us track monarch and milkweed occurrences...,,15df56bc-5a3d-505e-aa52-6213ad14effc,scistarter,[],"[asclepias, danaus plexippus, mapper, milkweed...",Western Monarch Milkweed Mapper,"[Biology, Insects & Pollinators, Ecology & Env...",https://scistarter.com/project/16829-Western-M...,16829
1133,[],,"Welcome to GLOBE Observer, an international ci...",,f000f5fb-8b75-5166-bce0-7630104ba6a8,scistarter,[],"[app, clouds, girl scouts, globe, internal_gir...",GLOBE Observer: Clouds,"[Ecology & Environment, Astronomy & Space, Oce...",https://scistarter.com/project/16830-GLOBE-Obs...,16830
1134,[],,NECi's handheld photometer sends nitrate & pho...,,9e1e3584-4b44-5de2-942c-55af0a59ba85,scistarter,[],"[app, neci, nitrate, phosphate, photometer, pl...",NECi's handheld photometer,"[Ocean, Water, Marine & Terrestrial]",https://scistarter.com/project/16835-NECis-han...,16835
1135,[],,Global Fishing Watch is the product of a partn...,,bd5ce379-5112-5db4-ac6f-b70ff520844b,scistarter,[],"[data, exclusive economic zone, fish, fisherie...",Global Fishing Watch,"[Ocean, Water, Marine & Terrestrial, Ecology &...",https://scistarter.com/project/16848-Global-Fi...,16848
1136,[],,Participants will help by reporting bats found...,,500ce1c3-245c-5206-a535-7bf0febf9262,scistarter,"[{'geometry': {'type': 'MultiPolygon', 'coordi...","[bat, bat house, mosquito]",Got Bats?,"[Biology, Insects & Pollinators, Ecology & Env...",https://scistarter.com/project/16849-Got-Bats-...,16849
1137,[],,We are scientists at The California Academy of...,,eff36458-4418-54c2-923b-8c02fbc2bf18,scistarter,"[{'geometry': {'type': 'MultiPolygon', 'coordi...","[ants, backyard, backyard biodiversity, biodiv...",Backyard Biodiversity Project: Pools,"[Ecology & Environment, Biology, Insects & Pol...",https://scistarter.com/project/16855-Backyard-...,16855
1138,[],United States,"Since 1990, surveys have been conducted of hor...",,0d95f19a-14db-5fbf-a127-7a11f6e29421,scistarter,[],"[delaware bay, horseshoe crabs, internal_crowd...",Horseshoe Crab Spawning Survey,"[Ocean, Water, Marine & Terrestrial, Biology, ...",https://scistarter.com/project/16856-Horseshoe...,16856
1139,[],,GlobalXplorer° is an online platform that uses...,,7f25664e-a178-559f-bb16-de70249480ae,scistarter,[],"[culture, girl scouts, internal_girl_scouts, p...",Global Xplorer,[Archeology & Cultural],https://scistarter.com/project/16859-Global-Xp...,16859


In [9]:
# Get y_true (the actual projects done by the user) and y_pred (the projects we predicted they would do)
y_true, y_pred = rec.predictions(user_projects, done_projects, top_projects) 

# Evaluate the models precision and recall
precision, recall = evaluate(y_true, y_pred)

In [10]:
print('--------------------------------')
print('Model Results')
print('Precision: ' + str(precision))
print('Recall: ' + str(recall))
print('--------------------------------')

--------------------------------
Model Results
Precision: 0.018518518518518517
Recall: 0.11764705882352941
--------------------------------
