In [32]:
import pyximport
import numpy as np
pyximport.install(setup_args={"include_dirs": np.get_include()},
                  reload_support=True)
from algorithms.knn_neighborhood import UserKNN
from surprise import Dataset, Reader, accuracy
from surprise.model_selection import train_test_split, KFold
import matplotlib.pyplot as plt
import pandas as pd
from collections import defaultdict, Counter
from datetime import datetime as dt
from scipy.stats import spearmanr
from sklearn.preprocessing import MinMaxScaler
from scipy.stats import gaussian_kde
from sklearn.neighbors import KernelDensity
import networkx as nx

In [33]:
data_df = pd.read_csv("data/ml-100k/u.data", sep="\t")
data_df.columns = ["user_id", "item_id", "rating", "timestamp"]
data_df.drop(columns=["timestamp"], axis=1, inplace=True)
data_df["user_id"] = data_df["user_id"].map({b: a for a, b in enumerate(data_df["user_id"].unique())})
data_df["item_id"] = data_df["item_id"].map({b: a for a, b in enumerate(data_df["item_id"].unique())})

reader = Reader(rating_scale=(1, 5))
dataset = Dataset.load_from_df(data_df, reader=reader)
trainset, testset = train_test_split(dataset, 0.2)

In [45]:
model = UserKNN(k=10, tau_2=0.5)
model.fit(trainset)
_ = model.test(testset)

In [48]:
top_neighbors = sorted(model.exposure_u.items(), key=lambda t: t[1])[::-1][:10]
top_neighbors = [uid for uid, _ in top_neighbors]
top_neighbors

[186, 69, 137, 73, 304, 350, 315, 261, 214, 375]

In [55]:
model = UserKNN(k=10, tau_4=0.5)
model.fit(trainset)
_ = model.test(testset)

In [56]:
top_neighbors = sorted(model.exposure_u.items(), key=lambda t: t[1])[::-1][:10]
top_neighbors = [uid for uid, _ in top_neighbors]
top_neighbors

[186, 69, 73, 137, 304, 261, 315, 350, 375, 54]

In [52]:
user_popularities = UserKNN.compute_popularities(trainset)

In [38]:
V = list(trainset.all_users())

G = nx.DiGraph()
for u_i in V:
    for u_j in V:
        w_ij = R[u_i, u_j]
        G.add_edge(u_i, u_j, weight=w_ij)

In [40]:
pr_scores = nx.algorithms.link_analysis.pagerank_alg.pagerank(G)

In [49]:
pr_top_neighbors = sorted(pr_scores.items(), key=lambda t: t[1])[::-1][:10]
pr_top_neighbors = [uid for uid, _ in pr_top_neighbors]
pr_top_neighbors

[147, 43, 104, 101, 182, 108, 177, 22, 40, 7]

In [51]:
set(top_neighbors).intersection(pr_top_neighbors)

set()