In [None]:
# Test the library

import networkx as nx
import operator

G = nx.DiGraph()

G.add_nodes_from(["Alice", "Bob", "Charlie", "Diana",
                  "Orange", "Apple", "Banana", "Pineapple", "Raspberry"])

G.add_edges_from([
    ("Alice", "Orange"),
    ("Alice", "Apple"),
    ("Bob", "Orange"),
    ("Bob", "Apple"),
    ("Bob", "Banana"),
    ("Charlie", "Orange"),
    ("Charlie", "Apple"),
    ("Charlie", "Banana"),
    ("Diana", "Banana"),
    ("Diana", "Pineapple"),
    ("Diana", "Raspberry"),
])

ppr = nx.pagerank(G, personalization={"Bob": 1, "Alice": 1})
ppr = sorted(ppr.items(), key=operator.itemgetter(1), reverse=True)
for item, score in ppr:
    print(item, score)

In [None]:
# Test visualization

# Print the graph

print(G.nodes(data=True))
print(G.edges(data=True))

# Visualize the graph

import matplotlib.pyplot as plt

plt.figure(figsize=(10, 10))
pos = nx.spring_layout(G)
nx.draw(G, pos, with_labels=True, node_size=2000, node_color="skyblue")
plt.show()

Implementazione di https://medium.com/eni-digitalks/a-simple-recommender-system-using-pagerank-4a63071c8cbf

Da: https://grouplens.org/datasets/movielens/ scaricare [ml-latest-small.zip](https://files.grouplens.org/datasets/movielens/ml-latest-small.zip)

## recommended for education and development
 
### MovieLens Latest Datasets

These datasets will change over time, and are not appropriate for reporting research results. We will keep the download links stable for automated downloads. We will not archive or make available previously released versions.

_Small_: 100,000 ratings and 3,600 tag applications applied to 9,000 movies by 600 users. Last updated 9/2018.

    README.html
    ml-latest-small.zip (size: 1 MB)

_Full_: approximately 33,000,000 ratings and 2,000,000 tag applications applied to 86,000 movies by 330,975 users. Includes tag genome data with 14 million relevance scores across 1,100 tags. Last updated 9/2018.

    README.html
    ml-latest.zip (size: 335 MB)

Permalink: https://grouplens.org/datasets/movielens/latest/

In [None]:
import pandas as pd

# Open ratings.csv file
ratings = pd.read_csv("ratings.csv")

In [None]:
# Open movies.csv file
movies = pd.read_csv("movies.csv")

In [None]:
# Merge ratings and movies
ratings = pd.merge(ratings, movies, on="movieId")
print(ratings.head())

In [None]:
# Check for common movies rated by multiple users
common_movies = ratings.groupby("title").size().reset_index(name='count')
print(common_movies[common_movies['count'] > 1].head(10))  # Print movies rated by more than one user

In [4]:
# Map rating to scores

mapping_score = {
    0.5:-1,
    1:-1,
    1.5:-0.5,
    2:0,
    2.5:0,
    3:0,
    3.5:0.5,
    4:1,
    4.5:1.1,
    5:1.2}

In [None]:
import networkx as nx

# Create a directed graph
user_movie_graph = nx.Graph()

# Add nodes and edges
for _, row in ratings.iterrows():
    user_movie_graph.add_node(row["userId"], bipartite=0)
    user_movie_graph.add_node(row["title"], bipartite=1)
    # user_movie_graph.add_edge(row["userId"], row["title"], weight=row["rating"])
    user_movie_graph.add_edge(row["userId"], row["title"], weight=mapping_score[row["rating"]])

# Debug print to check the graph construction
print(f"Nodes in the graph: {list(user_movie_graph.nodes(data=True))[:10]}")
print(f"Edges in the graph: {list(user_movie_graph.edges(data=True))[:10]}")

In [None]:
# Project the graph
users = {n for n, d in user_movie_graph.nodes(data=True) if d["bipartite"] == 0}
print(f"Users: {list(users)[:10]}")
print(f"Number of users: {len(users)}")

In [None]:
# Project the graph
movies = {n for n, d in user_movie_graph.nodes(data=True) if d["bipartite"] == 1}
print(f"Movies: {list(movies)[:10]}")
print(f"Number of movies: {len(movies)}")

In [None]:
print(nx.is_bipartite(user_movie_graph))
print(nx.is_connected(user_movie_graph))

In [None]:
# Project the graph using weights
user_user_graph = nx.bipartite.weighted_projected_graph(user_movie_graph, users)
# Debug print to check the projected graphs
print(f"Nodes in user_user_graph: {list(user_user_graph.nodes(data=True))[:10]}")
print(f"Edges in user_user_graph: {list(user_user_graph.edges(data=True))[:10]}")

In [None]:
# Project the graph using weights
movie_movie_graph = nx.bipartite.weighted_projected_graph(user_movie_graph, movies)
# Debug print to check the projected graphs
print(f"Nodes in movie_movie_graph: {list(movie_movie_graph.nodes(data=True))[:10]}")
print(f"Edges in movie_movie_graph: {list(movie_movie_graph.edges(data=True))[:10]}")

filter_nodes() non l'ha scritta dentro alla pagina

In [9]:
# 0: User, 1: Movie
def filter_nodes(graph: nx.Graph, node_type: int):
    return [n for n, d in graph.nodes(data=True) if d["bipartite"] == node_type]

Nella pagina è cosi, ma "movie" non esiste come tipo di nodo, abbiamo 0 e 1

```python 
def create_preference_vector(user_id: int, user_movie_graph: nx.Graph):    
    edges = {m: v for _, m, v in user_movie_graph.edges(user_id, data="weight")}    
    tot = sum(edges.values())    
    if tot > 0:        
        return {        
            movie: edges.get(movie, 0) / tot        
            for movie in filter_nodes(user_movie_graph, "movie")        
        }    
    else:        
        return {
            movie: 1 for movie in filter_nodes(user_movie_graph, "movie")
        }
```

In [11]:
def create_preference_vector(user_id: int, user_movie_graph: nx.Graph):
    edges = {m: v for _, m, v in user_movie_graph.edges(user_id, data="weight")}
    print(f"Edges for user {user_id}: {list(edges)[:10]}")  # Debug print
    tot = sum(edges.values())
    print(f"Total for user {user_id}: {tot}")  # Debug print
    if tot > 0:
        return {
            movie: edges.get(movie, 0) / tot
            for movie in filter_nodes(user_movie_graph, 1) # 1 : Movie
        }
    else:
        return {
            movie: 1 for movie in filter_nodes(user_movie_graph, 1)
        }

nx.pagerank_numpy non esiste

```python 
def predict_user(user_id, user_movie_graph: nx.Graph, movie_movie_graph: nx.Graph):    
    p_vec = create_preference_vector(user_id,user_movie_graph)
    already_seen = [movie for movie,p in p_vec.items() if p>0]    
    if len(already_seen)<1:
        return []    
    item_rank = nx.pagerank_numpy(movie_movie_graph, personalization=p_vec, alpha=0.95, weight="weight")    
    s_t = [x 
        for x in
        sorted(movie_movie_graph.nodes(),key=lambda x: item_rank[x]          
    if x in item_rank else 0, reverse=True) 
        if x not in already_seen
        ]
    
    return s_t
```

In [14]:
def predict_user(user_id, user_movie_graph: nx.Graph, movie_movie_graph: nx.Graph):
    p_vec = create_preference_vector(user_id, user_movie_graph)
    print(f"Preference vector for user {user_id}: {list(p_vec)[:10]}")  # Debug print
    already_seen = [movie for movie, p in p_vec.items() if p > 0]
    print(f"Already seen movies for user {user_id}: {list(already_seen)[:10]}")  # Debug print
    if len(already_seen) < 1:
        return []
    item_rank = nx.pagerank(movie_movie_graph, personalization=p_vec, alpha=0.95, weight="weight")
    print(f"Item rank for user {user_id}: {list(item_rank)[:10]}")  # Debug print
    s_t = [
        x for x in sorted(
            movie_movie_graph.nodes(), key=lambda x: item_rank[x] if x in item_rank else 0, reverse=True
            )
        if x not in already_seen
        ]
    
    return s_t

In [None]:
# Predict the next movie
user = 10
s_t = predict_user(user, user_movie_graph, movie_movie_graph)
print(f"Predicted movies for user {user}: {s_t[:10]}")