In [None]:
%run Functions.ipynb
%matplotlib inline
import numpy as np

np.random.seed(42)

result = {}

# Loading and preparing the data

## Preparing the relations

In [None]:
unique_users, unique_tags, unique_items, pairs, train_set, test_set = synthetic_data(intra, inter)

user_item = np.zeros((len(unique_users), len(unique_items)))
user_tag = np.zeros((len(unique_users), len(unique_tags)))
item_tag = np.zeros((len(unique_items), len(unique_tags)))

i = 0
for rating in train_set:
    user_item[unique_users.index(rating[0]), unique_items.index(rating[1])] = 1
    for entry in pairs[(rating[0], rating[1])]:
        user_tag[unique_users.index(rating[0]), unique_tags.index(entry)] += 1
        item_tag[unique_items.index(rating[1]), unique_tags.index(entry)] += 1

# Recommendation

In [None]:
#random matrix used as baseline
sizeU, sizeI = user_item.shape
random_matrix = np.random.rand(sizeU, sizeI)

In [None]:
predicted = user_tag.dot(item_tag.T)
paths = ((user_tag > 0)*1.).dot(((item_tag.T>0)*1.))
pred_normalized = normalize(predicted, paths)

r_base = recall(evaluate(random_matrix, test_set), 20)
result["Baseline"] = r_base
print("Baseline: ", r_base)

r_multi = recall(evaluate(predicted*1., test_set), 20)
result["Multiplication"] = r_multi
print("Multiplication: ", r_multi)

r_paths = recall(evaluate(paths*1., test_set), 20)
result["Paths"] = r_paths
print("Paths: ", r_paths)

r_norm = recall(evaluate(pred_normalized, test_set), 20)
result["Normalized"] = r_norm
print("Normalized: ", r_norm)

## Pareto
We can use both predicted value and number of paths together to get the pareto frontier.

In [None]:
r_pareto = recall(evaluate(predicted, test_set, paths), 20)
result["Pareto"] = r_pareto
print("Pareto: ", r_pareto)

# DFMF
Using DFMF we can fill the rest of the matrices and use the new data to make predictions.

We can use factor matrices to approximate the original data.

In [None]:
G, S = dfmf(5, user_item, user_tag, item_tag)
G_users, G_items, G_tags = G
S_user_tag, S_tag_item, S_user_item = S

pred_dfmf = G_users.dot(S_user_item.dot(G_items.T))

r_dfmf = recall(evaluate(pred_dfmf, test_set), 20)
result["GSG"] = r_dfmf
print("GSG: ", r_dfmf)

In [None]:
chain_dfmf = G_users.dot(S_user_tag.dot(S_tag_item.dot(G_items.T)))
r_gssg = recall(evaluate(chain_dfmf, test_set), 20)
result["GSSG"] = r_gssg
print("GSSG: ", r_gssg)

chain_dfmf = G_users.dot(S_user_tag.dot(G_tags.T.dot(G_tags.dot(S_tag_item.dot(G_items.T)))))
r_gsg = recall(evaluate(chain_dfmf, test_set), 20)
result["GSGGSG"] = r_gsg
print("GSGGSG: ", r_gsg)

## Paths with threshold

Since DFMF returns full matrices, we can not just count the number of connections, since all objects are connected. Instead we decide to filter these connections as more or less relevant / strong. We will try this first by setting a global filter and then by setting local filters for each element.

In [None]:
b = G_users.dot(S_user_tag.dot(G_tags.T))
d = G_tags.dot(S_tag_item.dot(G_items.T))

In [None]:
(e,f,g,h) = matrixFilter(b, d)
print("Matrix filter")

In [None]:
result["MF_Pat"] = e 
result["MF_FV"] = f
result["MF_Par"] = g
result["MF_PF"] = h

In [None]:
(e,f,g,h) = rowFilter(b, d)
print("Row filter")

In [None]:
result["RF_Pat"] = e
result["RF_FV"] = f
result["RF_Par"] = g
result["RF_PF"] = h 