# Imports and Data Loading

In [1]:
import sys
import numpy as np
sys.path.append('../')
from src.models.baselines import PopularRecommender, SimpleJaccard, CosineKNN
import pandas as pd
from src.models.models import NetStatKNN
from src.models.evaluator import Evaluator
import src.util.tigergraph_util as tgu
import warnings
warnings.filterwarnings('ignore')

This is the data the baselines use for training as they are non graph-based.

In [2]:
df = pd.read_csv('../data/out/user_subreddit.csv', header=None)

# Our user:
In this notebook, we will make some subreddit recommendations for user 'lowpass'.

In [3]:
user = 'lowpass'

# Evaluation:
Creating an evaluation helper:

In [4]:
evaluator = Evaluator('../data/out/test_interactions.csv')

# Baseline recommender models:

## Popularity recommender:
Recommends popular subreddits user does not belong to

In [5]:
pop_rec = PopularRecommender(df)
pop_rec.recommend(user, n=5)

['politics', 'IAmA', 'trees', 'worldnews', 'science']

In [6]:
evaluator.precision_recall(pop_rec, 5, subset_size=2000)

(0.2393999999999964, 0.3029679395837238)

precision: 0.02990719257540542

recall:    0.2524693404043752

## Jaccard similarity recommender:
Recommends subreddits by determining what similar users (determined by Jaccard similarity) belong to

In [7]:
jaccard_rec = SimpleJaccard(df)
jaccard_rec.recommend(user)

['learnprogramming']

In [8]:
evaluator.precision_recall(jaccard_rec, 10, subset_size=2000)

(0.07844999999999897, 0.1840926966688945)

precision: 0.009563066776587017

recall:    0.07598568418848715

## Cosine similarity KNN recommender:
Recommends subreddits by determining what similar users (determined by nearest-neighbor similarity in a KNN) belong to

In [9]:
knn_rec = CosineKNN(df)
knn_rec.recommend(user)

['beer']

In [10]:
evaluator.precision_recall(knn_rec, 10, subset_size=2000)

(0.15758690476190415, 0.28880661565000765)

precision: 0.02346024330772864

recall:    0.12531798374749745

# Interaction graph recommender models:

In [11]:

conn = tgu.connection('../config/tigergraph.json')
df = conn.getVertexDataFrame('user', select='fastrp_embedding')
df = pd.concat([df['v_id'].to_frame(), df['fastrp_embedding'].apply(pd.Series)], axis=1)
df.columns = ['v_id', 'pagerank', 'louvain', 'label_prop', 'degree']
embeddings = pd.read_csv('../data/out/user.csv', header=None)
embeddings = embeddings.rename(columns={0:'v_id'})
embeddings['v_id'] = embeddings['v_id'].astype(str)
df['v_id'] = df['v_id'].astype(str)
user_data = df.merge(embeddings, on='v_id', how='inner')
subreddit_data = pd.read_csv('../data/out/subreddit.csv', header=None)
user_subreddit = pd.read_csv('../data/out/user_subreddit.csv', header=None)

reddit_graph = NetStatKNN(conn)

## Centrality KNN community recommender:
Recommends subreddits by determining what similar users (determined by nearest neighbors of the following centrality metrics: pagerank, louvain, label propagation, degree) belong to

In [12]:
reddit_graph.fit(user_data, subreddit_data, user_subreddit, 10)

In [13]:
evaluator.precision_recall(reddit_graph, 10, subset_size=2000)

(0.07663645518630331, 0.2259847925853217)