In [1]:
%matplotlib inline

import math
import pandas as pd
import numpy as np
import joblib

In [2]:
#Read the notifications table
notifications = pd.read_csv('data/notifications.csv')
users = pd.read_csv('data/users.csv')
notifications.drop(['id', 'created_at', 'updated_at'], axis = 1, inplace=True)
notifications

Unnamed: 0,post_id,parent_comment_id,comment,sender_id,user_id,status,action,type
0,,,,4,7,1,Followed,Following
1,,,,1,4,1,Followed,Following
2,,,,3,45,0,Followed,Following
3,,,,1,4,1,Followed,Following
4,278.0,,Hey bro!,1,4,1,Commented,Post
...,...,...,...,...,...,...,...,...
185,999.0,,,1,4,1,Like,Reaction
186,1007.0,,,3,3,0,Like,Reaction
187,998.0,,,2,2,0,Like,Reaction
188,999.0,,,4,4,1,Like,Reaction


In [3]:
def smooth_user_preference(x):
    return math.log(1 + x, 2)

In [4]:
def recommend(user_id):
    
    event_type_strength = {
        'Followed': 1.0,
        'Like': 1.0,
        'Love': 2.0,
        'Commented': 4.0,
        'Replied': 4.0
    }
    notifications['eventStrength'] = notifications['action'].apply(lambda x: event_type_strength[x])
    users_interactions_count = notifications.groupby(['user_id', 'post_id']).size().groupby('user_id').size()
    users_with_enough_interactions = users_interactions_count[users_interactions_count >= 2].reset_index()[['user_id']]
    interactions_from_selected_users = notifications.merge(users_with_enough_interactions, how='right',
                                                            left_on='user_id', right_on='user_id')
    interactions_full = interactions_from_selected_users.groupby(['user_id'])['eventStrength'].sum().apply(smooth_user_preference).reset_index()
    popular_users = interactions_full.sort_values('eventStrength', ascending=False)
#     popular_users = popular_users.set_index(["user_id"])
    isSelf = popular_users['user_id'] != user_id
    popular_users.drop(['eventStrength'], axis = 1, inplace=True)
    return popular_users[isSelf]

recommend(1)

Unnamed: 0,user_id
1,2
2,3
4,7
3,4


In [5]:
filename = 'popular.sav'
joblib.dump(recommend, filename)

['popular.sav']

In [6]:
# load the model from disk
loaded_model = joblib.load(filename)
result = loaded_model(3)
print(result)

   user_id
1        2
4        7
3        4
0        1
