# Imports and Data Loading

In [1]:
import sys
import numpy as np
sys.path.append('../')
from src.models.cosine_knn import CosineKNN
from src.models.jaccard import SimpleJaccard
from src.models.popular_recommender import PopularRecommender
import pandas as pd
from src.models.model import RedditGraph
from src.models.evaluator import Evaluator
import warnings
warnings.filterwarnings('ignore')

In [2]:
comments_to_subreddit = {}
data = []
users = []
with open('../data/out/users_comments.csv') as ucf, open('../data/out/comments.csv') as cf:
    for l in cf.readlines():
        c, sr, _, _ = l.split(',')
        comments_to_subreddit[c] = sr

    for l in ucf.readlines():
        u, c = l.split(',')
        data.append((u, comments_to_subreddit[c.strip()]))
df = pd.DataFrame(data, columns=['user', 'subreddit'])

# Our user:
In this notebook, we will make some subreddit recommendations for user 'lowpass'.

In [3]:
user = 'lowpass'

# Evaluation:
Creating an evaluation helper:

In [4]:
evaluator = Evaluator('../data/out/test_interactions.csv')

# Baseline recommender models:

## Popularity recommender:
Recommends popular subreddits user does not belong to

In [5]:
pop_rec = PopularRecommender(df)
pop_rec.recommend(user, n=5)

['politics', 'IAmA', 'trees', 'worldnews', 'science']

In [6]:
evaluator.precision_recall(pop_rec, 10)

(0.028386919483373657, 0.22462007703145379)

precision: 0.028386919483373657

recall:    0.22462007703145379

## Jaccard similarity recommender:
Recommends subreddits by determining what similar users (determined by Jaccard similarity) belong to

In [7]:
jaccard_rec = SimpleJaccard(df)
jaccard_rec.recommend(user)

['learnprogramming']

In [8]:
evaluator.precision_recall(jaccard_rec, 10)

(0.009563066776587017, 0.07598568418848715)

precision: 0.009563066776587017

recall:    0.07598568418848715

## Cosine similarity KNN recommender:
Recommends subreddits by determining what similar users (determined by nearest-neighbor similarity in a KNN) belong to

In [9]:
knn_rec = CosineKNN(df)
knn_rec.recommend(user)

['beer']

In [10]:
evaluator.precision_recall(knn_rec, 10)

(0.02346024330772864, 0.12531798374749745)

precision: 0.02346024330772864

recall:    0.12531798374749745

# Interaction graph recommender models:

In [11]:
reddit_graph = RedditGraph('../config.json')

ConnectionError: HTTPSConnectionPool(host='ucsd-reddit.i.tgcloud.io', port=9000): Max retries exceeded with url: /echo/Reddit (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001880510CBB0>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'))

## Louvain community recommender:
Recommends subreddits by determining what similar users (determined by assigned Louvain community) belong to

In [None]:
mdl_params = {'how': 'louvain'}
evaluator.precision_recall(reddit_graph, 10, **mdl_params)

(0.0, 0.0)

## Centrality KNN community recommender:
Recommends subreddits by determining what similar users (determined by nearest neighbors of the following centrality metrics: pagerank, louvain, label propagation, degree) belong to

In [None]:
reddit_graph.fit_knn('user', k=10)

In [None]:
mdl_params = {'how': 'knn'}
evaluator.precision_recall(reddit_graph, 10, **mdl_params)

(0.01, 0.1)