We will build a **model-based Collaborative Filtering (CF) Recommender**. In this approach, models are developed using machine learning algorithms to recommend questions to professionals. There are many model-based CF algorithms, here we adopt a latent factor model, which compresses professional-donor matrix into a low-dimensional representation in terms of latent factors. A reduced presentation could be utilized for either professional-based or question-based neighborhood searching algorithms to find recommendations. Here we a use popular latent factor model named Singular Value Decomposition (SVD).

In [1]:
import gc
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import scipy
from scipy.sparse.linalg import svds
from tqdm import tqdm
import re
import xml
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
questions =  pd.read_csv("../input/questions.csv")
professionals = pd.read_csv("../input/professionals.csv")
answers = pd.read_csv("../input/answers.csv")

In [27]:
professionals.head()

Unnamed: 0,professionals_id,professionals_location,professionals_industry,professionals_headline,professionals_date_joined
0,9ced4ce7519049c0944147afb75a8ce3,,,,2011-10-05 20:35:19 UTC+0000
1,f718dcf6d2ec4cb0a52a9db59d7f9e67,,,,2011-10-05 20:49:21 UTC+0000
2,0c673e046d824ec0ad0ebe012a0673e4,"New York, New York",,,2011-10-18 17:31:26 UTC+0000
3,977428d851b24183b223be0eb8619a8c,"Boston, Massachusetts",,,2011-11-09 20:39:29 UTC+0000
4,e2d57e5041a44f489288397c9904c2b2,,,,2011-12-10 22:14:44 UTC+0000


In [3]:
prof_ans = pd.merge(professionals, answers, how = 'left' ,
                    left_on = 'professionals_id', right_on = 'answers_author_id')
prof_ans_q = pd.merge(prof_ans, questions, how = 'left' ,
                      left_on = 'answers_question_id', right_on = 'questions_id')

In [4]:
prof_ans_q["eventStrength"] = 1

In [5]:
prof_ans_q = prof_ans_q[['professionals_id','questions_id','eventStrength']]

In [6]:
prof_ans_q.questions_id = prof_ans_q.questions_id.fillna('NoQ')

In [7]:
professionals_questions_full_df = prof_ans_q \
                    .groupby(['professionals_id', 'questions_id'])['eventStrength'].sum().reset_index() 

In [8]:
professionals_questions_full_df = pd.DataFrame(professionals_questions_full_df)

In [9]:
professionals_questions_full_df.head()

Unnamed: 0,professionals_id,questions_id,eventStrength
0,00009a0f9bda43eba47104e9ac62aff5,4c6d71aaf2724b9f8d439ae086d4f3da,1
1,00009a0f9bda43eba47104e9ac62aff5,59c273334fb44c5089bf75dba2ea6e13,1
2,00009a0f9bda43eba47104e9ac62aff5,caca9ab7e13d4297a82b9abe8f11f0b8,1
3,000196ef8db54b9a86ae70ad31745d04,NoQ,1
4,0008138be908438e8944b21f7f57f2c1,NoQ,1


# Create the professional question matrix

We will first get the `professional-question matrix` and print the first five rows.

In [10]:
#Creating a sparse pivot table with professionals in rows and questions in columns
professionals_questions_pivot_matrix_df = professionals_questions_full_df.pivot(index='professionals_id', 
                                                          columns='questions_id', 
                                                          values='eventStrength').fillna(0)

# Transform the professionals-questions dataframe into a matrix
professionals_questions_pivot_matrix = professionals_questions_pivot_matrix_df.as_matrix()

professionals_id = list(professionals_questions_pivot_matrix_df.index)

# Print the first 5 rows of the professionals-questions matrix
professionals_questions_pivot_matrix[:5]

  import sys


array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [11]:
professionals_questions_pivot_matrix_df.head()

questions_id,0003e7bf48f24b5c985f8fce96e611f3,0006609dd4da40dcaa5a83e0499aba14,000af224bc2f4e94a19f8b62ba279cc4,000b30fb534b41f7b716fa9ebf9c3f35,0018752e44b44e26bb74a0a43232b4d6,0025b917219c4a24ba5fe260ca66ca98,00269cdc24fd4a3c93aad485688c240b,002b07f6281a407ca49ee8b3538e5e45,002c5689a0b642a3940f99b68f5364b6,0031077b31454fadae1754e871e84457,0032d46529b3430688679e86de838e38,00361e5dda874e65abaeeceb12c8e10c,0037ccbe4467439490e660f606addf2b,0039bd579d1747d0a176dddf400b62fc,003c7e6c8e7045caad3449a79c65e23f,003d2cf801f2498183390a24302ca82d,003f3f45eb644e5595b9a37235ead724,0048de38f49b4c629c79e0c35508aa50,004aa9dc9507422eb57f278fa4de3d35,0055acec30e24b2f8880cf7eb1293df3,0059c69587f94ec3be5ee889c5c00764,005b667f039b4a6c91fa07cf47f8a80f,005c08c032854fff89671458b13dc95c,005f0a1ab7de46219e26ffdd43a9778e,0064ba8c0318478ea7f10e9ceb7c2eff,0065540c11a1433aa785ce8ea438dedd,006741be07684664bdf398568b3d8f74,006757027d1f4753b007399dc93c778d,00683da3fd6245898d7346780dfa2d63,006a68c16c7c4644bec7dd2d7d328d1f,006b9cf69cd0405ea0a8821c5f6641bb,006e23b28b5b4ae2b6b66ce5ae28d231,006eb8550f5049c48ee634020ca9f46a,00709e33dc9743edb9841e7501cfe0b0,007113a6f5e44b78bcd412d7d7772ea2,007333b935214f399483f7b1bd07d693,00738b1c3d2c4cf0a5306a527d855064,00769048c2894930bca0daa655bf916b,00780376199f43568f03b1b982929f09,00795b55345a44e8956fe214fab8e4bc,...,ff798f02dd91487bbe1db8f5cd369536,ff7b6cbb22e544ba9c4a0a1f3e86dca0,ff82d6711d9f4a9c98436e60d3027717,ff83b3ed7ec54020ad9caed704f0012c,ff88d2ebd009422ca02c98a99cab29ba,ff8b94f826a542f7932800c0030362b3,ff8ddba7c5e740d38dc018bf22da98a9,ff91b4d257d049d5b311dba555160f47,ff989f819f1c42f4b144ccd31a37c6d7,ffa15720a66d407aa90e99df7b8e6744,ffa4bf3f5e1f40c582d3417094d69517,ffa62dd2f402462fb71c823e9b5cc332,ffaef413bb0641419ab23b51e0ada930,ffbbae76b05e4eacbe7d358d041bbe19,ffbbcaf7e35b4511b6a031c3f74524e9,ffbf69b997ba474e820e7dccb4909a5f,ffc201f33712429a8e9feb51f106cdf8,ffc401dcce5d437c86f5f0576c5f3d69,ffc5779720ab4576847991b984013472,ffc75d5200e74412b50c64f13b71ceea,ffca3b38a2d6420d89f2891ab2d7cd14,ffcdc73110f84c42a20eebdce31ceebc,ffd14dee20d34559b619817d36aebfd4,ffd6aacb1f804b2594461dd3391fa1dc,ffd8a42cc1ec49d186e53589a66d8dfd,ffdb718bdd254c9fba426244b81fe13e,ffe07c9af69342c28215cfde711f8a52,ffe16aa5796d4b9fb68e0578acad611c,ffe22e4cc18c4d0e92d4f0687d01b562,ffe4bf409a1a48baa004654eb0d2e6f2,ffe4f998cf7c4bf5a38f5406db1ca746,ffe57b1a870f4a259b74006834f1d52a,ffe720a45708458cac9c6736949a204a,ffe772e9939c4bde84c72ca614699a0b,ffe8410c3a3d484592b37e481c096f63,ffec9bebe1b84a02a1614c7dee44760f,fff69ee35d574057aa9911e844e101b5,fff942d6b0b34a638ecf02428304cdb0,fffc471e892a4b4e826858426da79b7e,fffde8d0b28247b8a3dd635ba792df04
professionals_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
00009a0f9bda43eba47104e9ac62aff5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
000196ef8db54b9a86ae70ad31745d04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0008138be908438e8944b21f7f57f2c1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
000d4635e5da41e3bfd83677ee11dda4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
000e2b5714444d79a672bf927905135c,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Now we are ready to compresses professional-question matrix into a low-dimensional representation in terms of latent factors.

# Singular Value Decomposition (SVD)

Now we will use SVD to get latent factors. After the factorization, we will try to reconstruct the original matrix by multiplying its factors. The resulting matrix is not sparse any more. It is the generated predictions for questions the professional have not yet answered to, which we will exploit for recommendations.

In [12]:
# Performs matrix factorization of the original donor-project matrix
# Here we set k = 20, which is the number of factors we are going to get
# In the definition of SVD, an original matrix A is approxmated as a product A ≈ UΣV 
# where U and V have orthonormal columns, and Σ is non-negative diagonal.
U, sigma, Vt = svds(professionals_questions_pivot_matrix, k = 20)
sigma = np.diag(sigma)

In [13]:
# Reconstruct the matrix by multiplying its factors
all_professionals_predicted_ratings = np.dot(np.dot(U, sigma), Vt) 

#Converting the reconstructed matrix back to a Pandas dataframe
cf_preds_df = pd.DataFrame(all_professionals_predicted_ratings, 
                           columns = professionals_questions_pivot_matrix_df.columns,
                          index = professionals_id ).transpose()
cf_preds_df.head()

Unnamed: 0_level_0,00009a0f9bda43eba47104e9ac62aff5,000196ef8db54b9a86ae70ad31745d04,0008138be908438e8944b21f7f57f2c1,000d4635e5da41e3bfd83677ee11dda4,000e2b5714444d79a672bf927905135c,0018873fbf7742aba1bf13fff12cbfa4,00211ce6a146458b849283adf31ffc13,00270bae68ee49eba99cb63a2942b5cd,00271cc10e0245fba4a35e76e669c281,002d8cabcc74427cae05751c1fa8609a,002f7ee0dd1946f79714441c76684691,002fdedc782f404daa3bb75e42430982,0030069ff7ec43d19305bf410783768b,00344a57348b4fbbadba9e919672b49d,0035e45e1f914bc9aa273b2ad9daa9c4,00375a2ea44344e38c4cb78c7a45dc4f,003b004e59194b459561e2ff36607a18,003b9a29c12c4d9da5462635713d1bd9,003cc21be89d4e42bc4424131a378e86,0043412d89f54bb681de74ab7c61e738,00458835327245d7bce9103ff114eceb,0046ab8089c04b3a8df3f8c28621a818,004cb439b2fb4abcbf823380a1170e83,004d52842a894cd49d7b55c34ae7b6da,00500833ab73411a9c2fa25b8a5e91ba,0051705600a84ff8bc69c9188357fb81,0053becb71d94164b014a5a7d9673210,0056946f0157477d911335e32d070bd5,00594d1a266f4907ac93a689203dacd2,005b70b5124443a4971cee499eb8225a,005cbd7ae54949db98efaa35641a3f17,005d656602c64b8ab52e1df5b98eb47b,005dfab9314e4be689be072511e3c95e,005f69e5497c43d1b523057cf581d1b8,00605f901ee44f089b71589dc20e1d92,0065131184d7451aa3a6ddcdeb27199a,00680f924e8f49d4962876df5b38eb94,006cc69970144fa395aa6707b50b4326,007255008b1849cda45d671f0df02563,0074377f4c4f481aa4b15ee6a37b0771,...,ffa99446950e40609f46ac86b46432a7,ffaa5ac38da24e9196b7a74c9ce70988,ffab52ce6aa84e3a9b0e8b063b604b2a,ffb04826b6bb4cec912b3e546ec76fb2,ffb0f42469d9455b93bc85071e3030b4,ffb157a233c644f2835d15f2a9d68e80,ffb27753bcb647e5b596be0898a6a1bf,ffb4fe241b67446b8e236a424b91b9f7,ffba48ec9c234f918f563f2b93d4927a,ffbcfa4902b143c388d378296c1ba697,ffbd01d889ef4f689eead27c64b9fb4d,ffbf45e16f4d421ea7cf1d92001714e3,ffca7b070c9d41e98eba01d23a920d52,ffd096f22e4b4ab1a1bffec9ee20a144,ffd29a86f6cc43b488dc0caf98ac075a,ffd769225a5c4dbdaee355d1ddc8c556,ffda25eac8124ac5b499af1e5f8d230b,ffda6db035f4497593d6b37284e4091c,ffda74a4fd8841d385c349c242347381,ffda7c2657aa47dc909a72edb554dfd2,ffdcc03fc51c4621819601fa36acd354,ffdccaff893246519b64d76c3561d8c7,ffde84f8571441f1bebea626d1679480,ffdeaf7003a14c80aa237b35f6f636dc,ffdeed71f2d94b41a21f9787078e2cba,ffe2f26d5c174e13b565d026e1d8c503,ffeaf7114b59443581b8acb5a9bfb38a,ffeeee98a65c47b7b976aebe5e90f2f8,ffef670d44aa40e98f0532764d4a75bb,fff1d4319cf64008a88f07da53aeda75,fff1dd08197e47c9967a8832203f776c,fff7d0fa6b1e483b9ea46995583abb2a,fff86d822ca246b796bd960756298da3,fff86e0b4c2d4abbb47b100f8cd3b944,fff87053584d4f7d9bd89651b3ab0ffe,fff8a862a80b4181a1d2584f5bc3fdc3,fffa0ff907ff4a9a84cb1b142911eee1,fffb74900f994cb580448a28ff57a456,ffffbe8d854a4a5a8ab1a381224f5b80,ffffc3e5933d4e4eb9a33c535396c1b8
questions_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
0003e7bf48f24b5c985f8fce96e611f3,2.453846e-05,-8.209981e-18,-8.209981e-18,-1.798629e-06,-8.209981e-18,-8.209981e-18,-8.209981e-18,-8.209981e-18,-5.9e-05,-8.209981e-18,-8.209981e-18,-8.209981e-18,-8.209981e-18,-8.209981e-18,-8.209981e-18,-8.209981e-18,-8.209981e-18,-8.209981e-18,-2.422328e-06,-8.209981e-18,-8.209981e-18,-7.092574e-05,-5.81975e-05,-8.209981e-18,-8.209981e-18,-8.209981e-18,1.13383e-05,-8.209981e-18,-8.209981e-18,-8.209981e-18,-2.5114419999999996e-19,-8.209981e-18,-8.209981e-18,-8.209981e-18,-8.209981e-18,-8.209981e-18,2.340009e-05,-8.209981e-18,-8.209981e-18,-8.209981e-18,...,-8.209981e-18,-8.209981e-18,-8.209981e-18,-1.325874e-07,3.165909e-05,-0.00027,-8.209981e-18,-8.209981e-18,2.358132e-05,-5.270626e-06,-2.575945e-06,-8.209981e-18,-2.906811e-06,-8.209981e-18,-8.209981e-18,-8.209981e-18,-8.209981e-18,-8.209981e-18,3.258314e-08,-8.209981e-18,-8.209981e-18,3.231017e-08,-8.209981e-18,4.574346999999999e-19,-8.209981e-18,-3e-05,-2.9049179999999996e-19,-8.209981e-18,-8.209981e-18,-8.209981e-18,-8.209981e-18,-8.209981e-18,-8.209981e-18,-8.209981e-18,-8.209981e-18,-8.209981e-18,0.003334258,-8.209981e-18,-9.028994e-07,-8.209981e-18
0006609dd4da40dcaa5a83e0499aba14,-2.246401e-05,-1.302523e-18,-1.302523e-18,3.062766e-05,-1.302523e-18,-1.302523e-18,-1.302523e-18,-1.302523e-18,-0.000285,-1.302523e-18,-1.302523e-18,-1.302523e-18,-1.302523e-18,-1.302523e-18,-1.302523e-18,-1.302523e-18,-1.302523e-18,-1.302523e-18,-1.798723e-05,-1.302523e-18,-1.302523e-18,-0.000406667,2.748455e-05,-1.302523e-18,-1.302523e-18,-1.302523e-18,-1.498002e-05,-1.302523e-18,-1.302523e-18,-1.302523e-18,-1.6032869999999999e-19,-1.302523e-18,-1.302523e-18,-1.302523e-18,-1.302523e-18,-1.302523e-18,-1.883677e-05,-1.302523e-18,-1.302523e-18,-1.302523e-18,...,-1.302523e-18,-1.302523e-18,-1.302523e-18,4.394667e-07,-1.403667e-05,-0.000232,-1.302523e-18,-1.302523e-18,1.645611e-05,-2.058122e-05,9.327852e-05,-1.302523e-18,-7.020392e-05,-1.302523e-18,-1.302523e-18,-1.302523e-18,-1.302523e-18,-1.302523e-18,-8.044326e-09,-1.302523e-18,-1.302523e-18,-3.597647e-07,-1.302523e-18,-2.2455009999999997e-19,-1.302523e-18,-3.8e-05,1.1094429999999998e-19,-1.302523e-18,-1.302523e-18,-1.302523e-18,-1.302523e-18,-1.302523e-18,-1.302523e-18,-1.302523e-18,-1.302523e-18,-1.302523e-18,-0.0001604636,-1.302523e-18,-2.267785e-06,-1.302523e-18
000af224bc2f4e94a19f8b62ba279cc4,-4.737723e-06,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.801149e-06,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,-0.000127,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,-2.193814e-06,-1.0088740000000001e-17,-1.0088740000000001e-17,-9.671535e-05,-7.270963e-05,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,-2.583309e-06,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,2.0897719999999997e-19,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,-3.811784e-05,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,...,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,-4.954364e-08,-2.534611e-06,-0.00032,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.571503e-06,-6.815976e-06,-3.766371e-06,-1.0088740000000001e-17,-2.342126e-05,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,-7.577726e-09,-1.0088740000000001e-17,-1.0088740000000001e-17,-2.138192e-08,-1.0088740000000001e-17,3.0038749999999996e-20,-1.0088740000000001e-17,-3.8e-05,-2.268428e-20,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,-1.0088740000000001e-17,3.767688e-06,-1.0088740000000001e-17,5.15702e-07,-1.0088740000000001e-17
000b30fb534b41f7b716fa9ebf9c3f35,1.896251e-07,-1.291634e-18,-1.291634e-18,2.765228e-08,-1.291634e-18,-1.291634e-18,-1.291634e-18,-1.291634e-18,6e-06,-1.291634e-18,-1.291634e-18,-1.291634e-18,-1.291634e-18,-1.291634e-18,-1.291634e-18,-1.291634e-18,-1.291634e-18,-1.291634e-18,-1.224915e-08,-1.291634e-18,-1.291634e-18,2.437701e-07,3.835866e-06,-1.291634e-18,-1.291634e-18,-1.291634e-18,2.054758e-07,-1.291634e-18,-1.291634e-18,-1.291634e-18,-3.7100610000000004e-22,-1.291634e-18,-1.291634e-18,-1.291634e-18,-1.291634e-18,-1.291634e-18,-1.311341e-08,-1.291634e-18,-1.291634e-18,-1.291634e-18,...,-1.291634e-18,-1.291634e-18,-1.291634e-18,4.336105e-10,-2.769569e-08,5e-06,-1.291634e-18,-1.291634e-18,1.262379e-07,-1.119949e-07,5.622897e-08,-1.291634e-18,3.957961e-07,-1.291634e-18,-1.291634e-18,-1.291634e-18,-1.291634e-18,-1.291634e-18,3.102625e-10,-1.291634e-18,-1.291634e-18,-1.859679e-10,-1.291634e-18,1.911755e-22,-1.291634e-18,2e-06,-1.07014e-22,-1.291634e-18,-1.291634e-18,-1.291634e-18,-1.291634e-18,-1.291634e-18,-1.291634e-18,-1.291634e-18,-1.291634e-18,-1.291634e-18,-4.731435e-07,-1.291634e-18,8.61728e-09,-1.291634e-18
0018752e44b44e26bb74a0a43232b4d6,8.284574e-07,8.765115e-20,8.765115e-20,7.847259e-08,8.765115e-20,8.765115e-20,8.765115e-20,8.765115e-20,3e-05,8.765115e-20,8.765115e-20,8.765115e-20,8.765115e-20,8.765115e-20,8.765115e-20,8.765115e-20,8.765115e-20,8.765115e-20,1.316499e-07,8.765115e-20,8.765115e-20,5.28308e-06,-1.647813e-08,8.765115e-20,8.765115e-20,8.765115e-20,1.148878e-06,8.765115e-20,8.765115e-20,8.765115e-20,9.570605e-23,8.765115e-20,8.765115e-20,8.765115e-20,8.765115e-20,8.765115e-20,5.502408e-07,8.765115e-20,8.765115e-20,8.765115e-20,...,8.765115e-20,8.765115e-20,8.765115e-20,3.729108e-09,5.983626e-07,1.2e-05,8.765115e-20,8.765115e-20,7.255671e-07,7.254222e-07,5.395326e-07,8.765115e-20,7.585486e-07,8.765115e-20,8.765115e-20,8.765115e-20,8.765115e-20,8.765115e-20,2.355254e-09,8.765115e-20,8.765115e-20,-1.985225e-09,8.765115e-20,2.7338140000000003e-22,8.765115e-20,1.2e-05,-1.6598170000000001e-22,8.765115e-20,8.765115e-20,8.765115e-20,8.765115e-20,8.765115e-20,8.765115e-20,8.765115e-20,8.765115e-20,8.765115e-20,-7.854544e-07,8.765115e-20,-1.617289e-08,8.765115e-20


# Build the Collaborative Filtering Model

In [14]:
class CFRecommender:
    
    MODEL_NAME = 'Collaborative Filtering'
    
    def __init__(self, cf_predictions_df, questions_df=None):
        self.cf_predictions_df = cf_predictions_df
        self.questions_df = questions_df
        
    def get_model_name(self):
        return self.MODEL_NAME
        
    def recommend_projects(self, professionals_id, questions_to_ignore=[], topn=10):
        # Get and sort the donor's predictions
        sorted_donor_predictions = self.cf_predictions_df[professionals_id].sort_values(ascending=False) \
                                    .reset_index().rename(columns={professionals_id: 'recStrength'})

        # Recommend the highest predicted projects that the donor hasn't donated to
        recommendations_df = sorted_donor_predictions[~sorted_donor_predictions['questions_id'].isin(questions_to_ignore)] \
                               .sort_values('recStrength', ascending = False) \
                               .head(topn)
        
        recommendations_df = recommendations_df.merge(self.questions_df, how = 'left', 
                                                          left_on = 'questions_id', 
                                                          right_on = 'questions_id')


        return recommendations_df

In [15]:
professional1 = '9ced4ce7519049c0944147afb75a8ce3'

In [17]:
cfr_model = CFRecommender(cf_preds_df, questions)
recommended = cfr_model.recommend_projects(professional1)

In [18]:
len(recommended)

10

# Questions answered by a professional

In [19]:
professionals_questions = pd.merge(professionals, answers, 
                                   how = 'left' ,
                                   left_on = 'professionals_id', 
                                   right_on = 'answers_author_id')

In [20]:
#Function to get all questions answered by a professional
def get_questions(professional):
    questions = professionals_questions[professionals_questions.professionals_id == professional].answers_question_id
    questions = pd.DataFrame(questions).rename(index=str, columns={"answers_question_id": "question_id"})
    return(questions) 

In [21]:
questions_all = get_questions(professional1)

# Hit rate calculation -  Scenario 1

In [26]:
len(set(recommended.questions_id).intersection(questions_all.question_id))

0

# Scenario -2

In [28]:
professional2 ='0c673e046d824ec0ad0ebe012a0673e4'

In [29]:
recommended = cfr_model.recommend_projects(professional2)

In [30]:
len(set(recommended.questions_id).intersection(questions_all.question_id))

0