# Recommend projects to users using the time consistent recommend class

In [13]:
from keras.models import load_model
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np
np.random.seed(0)
from time import gmtime, strftime
from keras.models import Model
from scipy.sparse import vstack

import os
import sys
module_path = os.path.abspath(os.path.join('../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
from src.models.cdea.evaluate import evaluate
from src.models.cdea.timerecommender import Recommender
import src.models.cdea.load_data as load_data

import math

from sklearn.metrics import precision_recall_fscore_support

In [2]:
users_projects_list = pd.read_pickle('../../data/processed/profile_projects_time_consistent')
projects = pd.read_pickle('../../data/raw/project_data')
users_projects_matrix =  pd.read_pickle('../../data/processed/active_profile_projects')
similarity_matrix = pd.read_pickle('../../data/processed/similarity_matrix')
# Normalise the similarity
similarity_matrix = (similarity_matrix + 1) / 2

In [3]:
num_uniques = lambda x: len(set([i for i in x if not math.isnan(i)]))
users_projects_list['num_projects'] = users_projects_list['projects'].apply(num_uniques)

In [4]:
users_projects_list

Unnamed: 0,profile,projects,num_projects
0,000073c3675ea9a1d0fe0ee3ca57e2bf,"[413.0, 413.0]",1
1,0002b85e757486c6d80ed6f73f465eaa,[16864.0],1
2,0003a41bbdb3371df4c1829913f17537,[19794.0],1
3,000436aaa487e461e6e16e02ab3e89eb,[659.0],1
4,000476d8680db78d75b3b9edefc4a6d2,[1510.0],1
5,0004853e76f038acb207adaa98cc4afb,[846.0],1
6,00060488471cb4e5b085e50291d4a3e0,[720.0],1
7,00060e78dfb9091883494f288a1e20e4,[25.0],1
8,0006e4b3c3848d18512fdf38df7b0a60,[25.0],1
9,0007997ece92bc1a4ea4e570093da014,[25.0],1


In [5]:
# Setup our recommender
rec = Recommender(projects, users_projects_matrix)

In [6]:
# Pick out the user we want
user_projects_list = users_projects_list[users_projects_list['num_projects'] > 7].iloc[1000]

In [7]:
# Get the top projects
after_cutoff, similar_items = rec.top_projects(user_projects_list, similarity_matrix)

In [8]:
print(after_cutoff)
similar_items

[54.0, 113.0, 1014.0]


Unnamed: 0,similarity_score,project_id
1114,0.510002,15386.0
1165,0.509387,17002.0
1085,0.508914,11457.0
1350,0.508463,18304.0
1669,0.508274,20136.0


In [9]:
# Generate our y_true and y_pred
y_true, y_pred = rec.predictions(after_cutoff, similar_items)

In [10]:
# Evaluate our model
precision, recall, refined_precision = evaluate(y_true, y_pred, similar_items, similarity_matrix)

5
1
(3, 5)


In [14]:
# Get precision and recall
precision, recall, fscore, support = precision_recall_fscore_support(y_true, y_pred, average='binary', pos_label=1)

In [15]:
# Get the similarity matrix entries for the most similar items to our 
pred_sim_matrix = similarity_matrix[similar_items.index]

In [16]:
pred_sim_matrix

Unnamed: 0,1114,1165,1085,1350,1669
4,0.864343,0.854669,0.958687,0.910340,0.954326
5,0.842162,0.866112,0.968375,0.920734,0.966418
6,0.843234,0.863065,0.966517,0.918774,0.964491
7,0.158914,0.136758,0.033827,0.080069,0.035619
8,0.843391,0.874446,0.972399,0.919870,0.970963
19,0.831739,0.893590,0.981979,0.930778,0.981150
20,0.843228,0.863159,0.966604,0.918813,0.964591
22,0.840871,0.860136,0.964656,0.919179,0.964379
23,0.843185,0.863168,0.966622,0.918909,0.964613
24,0.156429,0.135515,0.032176,0.079809,0.034226


In [17]:
# Get the indices of all the projects that were actually participated with after cut_off time
true_idx = np.nonzero(y_true)

In [18]:
true_idx

(array([ 38,  58, 664]),)

In [19]:
# This should now mean we have a 2D matrix which has 
# len(similar_items) columns 
# len(true_idx) rows
masked_pred_sim_matrix = pred_sim_matrix.iloc[true_idx]

In [20]:
masked_pred_sim_matrix

Unnamed: 0,1114,1165,1085,1350,1669
54,0.214877,0.099774,0.020666,0.064045,0.021312
113,0.843539,0.858113,0.96507,0.915394,0.960541
1014,0.159744,0.137553,0.031976,0.079158,0.032628


In [23]:
np.mean(masked_pred_sim_matrix.max(axis=0))

0.90853155

In [11]:
print(precision)
print(recall)
print(refined_precision)

0.0
0.0
0.9085315465927124
