In [124]:
import pandas as pd
from implicit.als import AlternatingLeastSquares
from implicit.evaluation import mean_average_precision_at_k, ranking_metrics_at_k

from dataset import Dataset
from model_selection import GridSearchCV, KFold
from model_wrapper import ModelWrapper

from implicit.nearest_neighbours import CosineRecommender, normalize

import matplotlib.pyplot as plt

plt.rcParams['figure.facecolor'] = 'white'

In [3]:
dataset = Dataset.from_csv("C:/Users/TS/PycharmProjects/DS1-RecommendationSystems/data.csv",
                           user="user", item="subreddit", rating='count')

Loading csv.
Creating pivot.


In [78]:
model = AlternatingLeastSquares

parameter = dict(iterations=[10],
                 factors=[64],
                 alpha=[1],
                 regularization=[0.1],
                 )
grid = GridSearchCV(algo=model,
                    param_grid=parameter,
                    cv=5,  # Number of folds in cross validation
                    eval_k=20,  # Number of Top items to check in validation
                    metrics=['map', 'precision'],  # the metrics to use
                    random_state=0)
print("Number of parameters combinations in grid:", len(grid))

Number of parameters combinations in grid: 1


In [77]:
grid.fit(dataset.item_user)
grid.get_result(show=False)

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

{'param_set_0': {'params': {'alpha': 1,
   'factors': 64,
   'iterations': 10,
   'regularization': 0.1},
  'Fold 0': {'map': 0.12847238237923747, 'precision': 0.23237576538129906},
  'Fold 1': {'map': 0.12816494833311556, 'precision': 0.23295454545454544},
  'Fold 2': {'map': 0.1311190490954621, 'precision': 0.23469793430963026},
  'Fold 3': {'map': 0.1283675372795771, 'precision': 0.232661375978387},
  'Fold 4': {'map': 0.13022042512699, 'precision': 0.2335801393728223},
  'mean': {'map': 0.12926886844287644, 'precision': 0.2332539520993368},
  'std': {'map': 0.001182713498233923, 'precision': 0.0008250436922770743}}}

In [52]:
inf = grid.get_result(show=False, filter_metric='map')
df = pd.DataFrame(inf)

Unnamed: 0,alpha,factors,iterations,regularization,Fold 0,Fold 1,Fold 2,Fold 3,Fold 4,mean,std
0,1,64,5,0.01,0.12584,0.124978,0.123817,0.126122,0.12597,0.125346,0.000861
1,1,64,5,1.0,0.126453,0.124971,0.124607,0.125668,0.126584,0.125657,0.000783
2,1,64,5,0.001,0.124098,0.120606,0.122643,0.123171,0.122291,0.122562,0.001152
3,1,64,5,0.1,0.12913,0.127581,0.127489,0.128568,0.12825,0.128203,0.000615


In [75]:
df = pd.read_csv("T:/Gridsearch_new.csv")
best = df.iloc[df['mean'].argmax()]
print(best)

alpha              1.000000
factors           64.000000
iterations        10.000000
regularization     0.100000
Fold 0             0.130713
Fold 1             0.127383
Fold 2             0.128475
Fold 3             0.129593
Fold 4             0.129868
mean               0.129206
std                0.001159
Name: 213, dtype: float64


In [65]:
# , **grid.get_best()['map'][1]
best_model = ModelWrapper(dataset, alpha=best.alpha, regularization=best.regularization,
                          iterations=int(best.iterations),
                          factors=int(best.factors))
best_model.fit()


  0%|          | 0/10 [00:00<?, ?it/s]

In [80]:
reddit = "nba"
best_model.similar_items(reddit, N=10, show=True)

1  nba                 1.000
2  nfl                 0.655
3  lakers              0.538
4  baseball            0.469
5  warriors            0.469
6  nbadiscussion       0.456
7  sports              0.449
8  fantasyfootball     0.429
9  NBA2k               0.415
10 CollegeBasketball   0.408


[(2549, 1.0000001),
 (2565, 0.65475285),
 (2387, 0.5382221),
 (1728, 0.4692666),
 (3126, 0.4686325),
 (2551, 0.4564415),
 (2915, 0.44869167),
 (2086, 0.42884588),
 (1014, 0.41529006),
 (334, 0.4081936)]

In [81]:
user = "IncognitoCumShot"  # -ah IncognitoCumShot
best_model.recommend(user, N=10, show=True)

1  Bitcoin             0.743
2  pennystocks         0.694
3  GME                 0.679
4  Superstonk          0.652
5  StockMarket         0.586
6  ethtrader           0.534
7  RobinHoodPennyStocks0.499
8  WallStreetbetsELITE 0.489
9  politics            0.478
10 amcstock            0.472
------------------------------
True feedback:
wallstreetbets         635
CryptoCurrency         99 
stocks                 18 
options                10 
interestingasfuck      8  
HVAC                   5  
Wallstreetbetsnew      5  
runescape              4  
australia              3  
AskReddit              2  
AssassinsCreedValhala  2  
dogecoin               2  
investing              2  
pcmasterrace           2  
SPACs                  1  
gifs                   1  
leangains              1  
spacex                 1  
technology             1  
tifu                   1  
videos                 1  


[(219, 0.74302477),
 (2652, 0.6936183),
 (603, 0.6790528),
 (1365, 0.65227216),
 (1344, 0.58603936),
 (2059, 0.5336151),
 (1232, 0.49921656),
 (1534, 0.4886002),
 (2692, 0.47838312),
 (1628, 0.47233248)]

Testing Cosine Neighbours

In [105]:

neighbour_model = CosineRecommender

parameter = dict(K=list(range(10, 201, 10)))
grid = GridSearchCV(algo=neighbour_model,
                    param_grid=parameter,
                    cv=5,  # Number of folds in cross validation
                    eval_k=10,  # Number of Top items to check in validation
                    metrics=['map', 'precision'],  # the metrics to use
                    random_state=0)
print("Number of parameters combinations in grid:", len(grid))

Number of parameters combinations in grid: 20


In [106]:
grid.fit(dataset.item_user.astype(float))
grid.get_result(show=False)


  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

  0%|          | 0/3224 [00:00<?, ?it/s]

{'param_set_0': {'params': {'K': 10},
  'Fold 0': {'map': 0.060827422185572126, 'precision': 0.11812794010106853},
  'Fold 1': {'map': 0.06024520089947615, 'precision': 0.11799151805132667},
  'Fold 2': {'map': 0.06212789100424185, 'precision': 0.1183592111927415},
  'Fold 3': {'map': 0.059437631790677826, 'precision': 0.11650226315818141},
  'Fold 4': {'map': 0.06115497648291812, 'precision': 0.11698061846689896},
  'mean': {'map': 0.060758624472577204, 'precision': 0.11759231019404341},
  'std': {'map': 0.0008994825520485767, 'precision': 0.0007206625787244749}},
 'param_set_1': {'params': {'K': 20},
  'Fold 0': {'map': 0.06893146627574132, 'precision': 0.13620785627749096},
  'Fold 1': {'map': 0.066850969055892, 'precision': 0.1322314049586777},
  'Fold 2': {'map': 0.06902671765585729, 'precision': 0.13340667360131578},
  'Fold 3': {'map': 0.06648815836530694, 'precision': 0.13220547624364765},
  'Fold 4': {'map': 0.06859279055784072, 'precision': 0.13456010452961673},
  'mean': {'m

In [107]:
inf = grid.get_result(show=False, filter_metric='map')
df = pd.DataFrame(inf)
df.to_csv("nearst_neigh_grid.csv", index=False)

In [118]:
m = neighbour_model(K=20)
m.fit(dataset.item_user.astype(float))

  0%|          | 0/3224 [00:00<?, ?it/s]

In [122]:
m.similarity

<3224x3224 sparse matrix of type '<class 'numpy.float64'>'
	with 64480 stored elements in Compressed Sparse Row format>

In [125]:
normalize(dataset.item_user.astype(float))

<3224x34563 sparse matrix of type '<class 'numpy.float64'>'
	with 1284519 stored elements in COOrdinate format>

In [115]:
import numpy as np
from scipy.sparse import csr_matrix, coo_matrix

x = [[10, 1, 0],
     [4, 0, 0]]

X = coo_matrix(x)
X.data = X.data / np.sqrt(np.bincount(X.row, X.data ** 2))[X.row]
X.todense()

matrix([[0.99503719, 0.09950372, 0.        ],
        [1.        , 0.        , 0.        ]])