In [17]:
import glob

read_files = glob.glob("*.rating")

with open("test_data.rating", "w") as outfile:
    for f in read_files:
        with open(f, "r") as infile:
            outfile.write(infile.read())

In [2]:
from collections import defaultdict
import numpy as np
import scipy, scipy.sparse, scipy.sparse.linalg
import matplotlib.pyplot as plt

In [13]:
graph = defaultdict(list)
users_set = set()
with open('test_data.links') as file:
    for line in file:
        node_1, node_2, _ = map(int, line.strip().split('\t'))
        graph[node_1].append(node_2)
        graph[node_2].append(node_1)
        users_set.update([node_1, node_2])
num_users = len(users_set)

In [15]:
print("For this dataset, there are:", num_users, "users.")

For this dataset, there are: 3 users.


In [18]:
items_set = set()
with open('test_data.rating') as file:
    for line in file:
        user, item, _ = map(int, line.strip().split('\t'))
        items_set.add(item)
num_items = len(items_set)
item_idx_encode = dict(zip(sorted(list(items_set)), 
                           range(num_users, num_users+num_items)))
with open('test_data.rating') as file:
    for line in file:
        user, item, _ = map(int, line.strip().split('\t'))
        item = item_idx_encode[item]
        graph[user].append(item)
        graph[item].append(user)

In [19]:
print("For this dataset, there are:", num_items, "items.")

For this dataset, there are: 5 items.


In [20]:
A = np.zeros((num_users+num_items, num_users+num_items))
for node in graph:
    for neighbor in graph[node]:
        A[node][neighbor] = 1
        A[neighbor][node] = 1

In [21]:
A_sparse = scipy.sparse.csc_matrix(A)

In [22]:
eig_val, eig_vec = scipy.sparse.linalg.eigs(A_sparse, k=1)

In [23]:
eig_vec = np.abs(eig_vec)

In [24]:
import pickle

pickle.dump(eig_vec.real, open('eigen_vector.p', 'wb'))
pickle.dump(item_idx_encode, open('item_idx_encode.p', 'wb'))

In [40]:
np.load('item_vector.npy')

array([[0.62243465, 0.04657814],
       [0.82466717, 0.36608914],
       [0.88808316, 0.45206357],
       [0.84555485, 0.96098431],
       [0.38744332, 0.75583058]])