In [6]:
import os
import pandas as pd


In [7]:
df_recommender_path = os.path.join("exports/df_recommender.pkl")
df_recommender = pd.read_pickle(df_recommender_path)
df_recommender['subject_id'] = df_recommender['subject_id'].astype(str).str.strip()
df_recommender['node label'] = df_recommender['node label'].astype(str).str.strip()
df_recommender['director'] = df_recommender['director'].astype(str).str.strip()
df_recommender['performer'] = df_recommender['performer'].astype(str).str.strip()
df_recommender['genre'] = df_recommender['genre'].astype(str).str.strip()
df_recommender['screenwriter'] = df_recommender['screenwriter'].astype(str).str.strip()


In [9]:
print(df_recommender.head())
print(len(df_recommender))

predicate_label                               subject_id  \
0                http://www.wikidata.org/entity/Q1000825   
1                http://www.wikidata.org/entity/Q1000826   
2                http://www.wikidata.org/entity/Q1001759   
3                http://www.wikidata.org/entity/Q1001777   
4                http://www.wikidata.org/entity/Q1001943   

predicate_label                                        cast member  \
0                wd:Q16305292,pimprapa tangprabhaporn,santisuk ...   
1                wende wagner,fernando rey,frank silvera,michae...   
2                billy chow,shinobu nakayama,toshimichi takahas...   
3                silk smitha,poornam vishwanathan,sridevi,kamal...   
4                burt lancaster,pat mccormick,denver pyle,geral...   

predicate_label                  director performer  \
0                         nonzee nimibutr       nan   
1                            paul wendkos       nan   
2                gordon chan,yuen wooping       nan  

In [11]:
import os
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.preprocessing import normalize
import numpy as np

# Load data
df_recommender_path = os.path.join("exports/df_recommender.pkl")
df_recommender = pd.read_pickle(df_recommender_path)
df_recommender['subject_id'] = df_recommender['subject_id'].astype(str).str.strip()
df_recommender['node label'] = df_recommender['node label'].astype(str).str.strip()
df_recommender['director'] = df_recommender['director'].astype(str).str.strip()
df_recommender['performer'] = df_recommender['performer'].astype(str).str.strip()
df_recommender['genre'] = df_recommender['genre'].astype(str).str.strip()
df_recommender['screenwriter'] = df_recommender['screenwriter'].astype(str).str.strip()

# Define the weight parameters for each type of metadata
weight_params = {
    'director': 0.2,
    'performer': 0.3,
    'genre': 0.25,
    'screenwriter': 0.15,
    'node label': 0.1,
}

# Create a unique mapping for each metadata attribute
all_items = pd.unique(df_recommender[['subject_id', 'node label', 'director', 'performer', 'genre', 'screenwriter']].values.ravel())
item_to_index = {item: idx for idx, item in enumerate(all_items)}
index_to_item = {idx: item for item, idx in item_to_index.items()}

# Construct a weighted interaction matrix
rows, cols, data = [], [], []

for _, row in df_recommender.iterrows():
    subject_id_idx = item_to_index[row['subject_id']]
    for field, weight in weight_params.items():
        for item in row[field].split(','):
            item = item.strip()
            if item in item_to_index:
                item_idx = item_to_index[item]
                rows.append(subject_id_idx)
                cols.append(item_idx)
                data.append(weight)

# Build the sparse interaction matrix
interaction_matrix = csr_matrix((data, (rows, cols)), shape=(len(all_items), len(all_items)))

# Normalize the interaction matrix
interaction_matrix = normalize(interaction_matrix, norm='l1', axis=1)

# Implement the RP3β algorithm with β parameter (assume β = 0.85)
beta = 0.85
num_items = interaction_matrix.shape[0]

# Personalized PageRank with Restart
def rp3beta_recommendations(interaction_matrix, item_idx, beta=0.85, top_k=10):
    scores = np.zeros(num_items)
    scores[item_idx] = 1
    for _ in range(20):  # Run for a fixed number of iterations or until convergence
        scores = beta * interaction_matrix.T.dot(scores) + (1 - beta) * scores
    top_recommended_idx = np.argsort(scores)[::-1][:top_k]
    return [(index_to_item[idx], scores[idx]) for idx in top_recommended_idx if idx != item_idx]

# Example usage
example_item = "Avatar"  # Replace with actual item you want recommendations for
if example_item in item_to_index:
    item_idx = item_to_index[example_item]
    recommendations = rp3beta_recommendations(interaction_matrix, item_idx)
    print("Top recommendations:")
    for rec, score in recommendations:
        print(f"Item: {rec}, Score: {score}")
else:
    print("Item not found in index.")


Top recommendations:
Item: First Daughter, Score: 0.0
Item: http://www.wikidata.org/entity/Q997850, Score: 0.0
Item: tia lessin,carl deal, Score: 0.0
Item: Trouble the Water, Score: 0.0
Item: spike lee, Score: 0.0
Item: romantic comedy,crime film,fantasy film, Score: 0.0
Item: http://www.wikidata.org/entity/Q1001777, Score: 0.0
Item: Moondram Pirai, Score: 0.0
Item: http://www.wikidata.org/entity/Q997694, Score: 0.0
