In [2]:
from dataset import Dataset
from model_selection import GridSearchCV

from implicit.als import AlternatingLeastSquares
from implicit.evaluation import train_test_split, ranking_metrics_at_k

import pandas as pd
import numpy as np
import random
from tqdm import tqdm
from scipy.sparse import csr_matrix, coo_matrix
import matplotlib.pyplot as plt

In [3]:
dataset = Dataset.from_csv("C:/Users/TS/PycharmProjects/DS1-RecommendationSystems/data.csv",
                           user="user", item="subreddit", rating='count')

In [6]:
model = AlternatingLeastSquares

parameter = dict(iterations=[10, 20],
                 factors=[60],
                 alpha=[1],
                 regularization=[0.01],
                 )
grid = GridSearchCV(algo=model,
                    param_grid=parameter,
                    cv=5, # Number of folds in cross validation
                    eval_k=10, # NUmber of Top items to check in validation
                    metrics=['map', 'precision'] # the metrics to use
                    )
print("Number of parameters combinations in grid:", len(grid))

Number of parameters combinations in grid: 2


In [7]:
grid.fit(dataset.item_user)
grid.get_result(show=False)

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/23581 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/23581 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/23581 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/23581 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/23581 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/23581 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/23581 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/23581 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/23581 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/23581 [00:00<?, ?it/s]

<model_selection.GridSearchCV at 0x1ca59354ac0>

In [10]:
best_model = AlternatingLeastSquares(**grid.get_best()['map'][1])
best_model.fit(dataset.item_user)

  0%|          | 0/10 [00:00<?, ?it/s]

In [13]:
reddit = "nba"

related = best_model.similar_items(dataset.get_item_id(reddit), N=10)

for i, (idx, dist) in enumerate(related):
    print("{0:<3}{1:<20}{2:.3f}".format(i + 1, dataset.get_item(idx), dist))

1  nba                 1.000
2  nfl                 0.745
3  baseball            0.659
4  NBA2k               0.648
5  CollegeBasketball   0.642
6  nbadiscussion       0.641
7  ripcity             0.625
8  fantasybball        0.621
9  rockets             0.619
10 CFB                 0.618


In [20]:
user = "IncognitoCumShot"  # -ah IncognitoCumShot

userid = dataset.get_user_id(user)

rec = best_model.recommend(userid, dataset.user_item, N=10, filter_already_liked_items=True)
for i, (idx, dist) in enumerate(rec):
    print("{0:<3}{1:<20}{2:.3f}".format(i + 1, dataset.get_item(idx), dist))

print("-"*30)
print("True feedback:")
for subreddit, rating in sorted(zip(dataset.user_item.getrow(userid).indices,
                                    dataset.user_item.getrow(userid).data), key=lambda x: x[1], reverse=True):
    print("{0:<23}{1:<3}".format(dataset.get_item(subreddit), rating))

1  Bitcoin             0.795
2  pennystocks         0.720
3  Superstonk          0.712
4  politics            0.650
5  StockMarket         0.610
6  worldnews           0.536
7  ethtrader           0.531
8  pics                0.530
9  news                0.510
10 gaming              0.509
------------------------------
True feedback:
wallstreetbets         635
CryptoCurrency         99 
stocks                 18 
options                10 
interestingasfuck      8  
HVAC                   5  
Wallstreetbetsnew      5  
runescape              4  
australia              3  
AskReddit              2  
dogecoin               2  
pcmasterrace           2  
SPACs                  1  
gifs                   1  
spacex                 1  
technology             1  
tifu                   1  
videos                 1  
