In [1]:
import pandas as pd
import networkx as nx
from collections import defaultdict

# Caricamento dei dati
ratings = pd.read_csv("ml-latest-small/ml-latest-small/ratings.csv")
movies = pd.read_csv("ml-latest-small/ml-latest-small/movies.csv")

# Merge dei dataset ratings e movies
ratings = pd.merge(ratings, movies, on="movieId")
print(ratings.head())

   userId  movieId  rating   timestamp             title  \
0       1        1     4.0   964982703  Toy Story (1995)   
1       5        1     4.0   847434962  Toy Story (1995)   
2       7        1     4.5  1106635946  Toy Story (1995)   
3      15        1     2.5  1510577970  Toy Story (1995)   
4      17        1     4.5  1305696483  Toy Story (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1  Adventure|Animation|Children|Comedy|Fantasy  
2  Adventure|Animation|Children|Comedy|Fantasy  
3  Adventure|Animation|Children|Comedy|Fantasy  
4  Adventure|Animation|Children|Comedy|Fantasy  


In [2]:
# Controllo per film comuni valutati da più utenti
common_movies = ratings.groupby("title").size().reset_index(name='count')
print(common_movies[common_movies['count'] > 1].head(10))  # Debug

                             title  count
2           'Round Midnight (1986)      2
4        'Til There Was You (1997)      2
6               'burbs, The (1989)     17
8      (500) Days of Summer (2009)     42
9   *batteries not included (1987)      7
11   ...And Justice for All (1979)      3
14                       10 (1979)      4
15           10 Cent Pistol (2015)      2
16      10 Cloverfield Lane (2016)     14
17         10 Items or Less (2006)      3


In [3]:
# Mappatura dei punteggi
mapping_score = {
    0.5: -1,
    1: -1,
    1.5: -0.5,
    2: 0,
    2.5: 0,
    3: 0,
    3.5: 0.5,
    4: 1,
    4.5: 1.1,
    5: 1.2
}

# ratings["weight"] = ratings["rating"].map(mapping_score)  #non presente in cod originale
# print(ratings.head())

In [4]:
# Creazione del grafo bipartito
user_movie_graph = nx.Graph()
for _, row in ratings.iterrows():
    user_movie_graph.add_node(row["userId"], bipartite=0)
    user_movie_graph.add_node(row["title"], bipartite=1)
    user_movie_graph.add_edge(row["userId"], row["title"], weight=mapping_score[row["rating"]])

# Debug
print(f"Nodes in the graph: {list(user_movie_graph.nodes(data=True))[:10]}")
print(f"Edges in the graph: {list(user_movie_graph.edges(data=True))[:10]}")

Nodes in the graph: [(1, {'bipartite': 0}), ('Toy Story (1995)', {'bipartite': 1}), (5, {'bipartite': 0}), (7, {'bipartite': 0}), (15, {'bipartite': 0}), (17, {'bipartite': 0}), (18, {'bipartite': 0}), (19, {'bipartite': 0}), (21, {'bipartite': 0}), (27, {'bipartite': 0})]
Edges in the graph: [(1, 'Toy Story (1995)', {'weight': 1}), (1, 'Grumpier Old Men (1995)', {'weight': 1}), (1, 'Heat (1995)', {'weight': 1}), (1, 'Seven (a.k.a. Se7en) (1995)', {'weight': 1.2}), (1, 'Usual Suspects, The (1995)', {'weight': 1.2}), (1, 'From Dusk Till Dawn (1996)', {'weight': 0}), (1, 'Bottle Rocket (1996)', {'weight': 1.2}), (1, 'Braveheart (1995)', {'weight': 1}), (1, 'Rob Roy (1995)', {'weight': 1.2}), (1, 'Canadian Bacon (1995)', {'weight': 1.2})]


In [5]:
# Proiezione del grafo bipartito
users = {n for n, d in user_movie_graph.nodes(data=True) if d["bipartite"] == 0}
movies = {n for n, d in user_movie_graph.nodes(data=True) if d["bipartite"] == 1}
user_user_graph = nx.bipartite.weighted_projected_graph(user_movie_graph, users)
movie_movie_graph = nx.bipartite.weighted_projected_graph(user_movie_graph, movies)

# Debug per le proiezioni
print(f"Nodes in user_user_graph: {list(user_user_graph.nodes(data=True))[:10]}")
print(f"Edges in user_user_graph: {list(user_user_graph.edges(data=True))[:10]}")
print(f"Nodes in movie_movie_graph: {list(movie_movie_graph.nodes(data=True))[:10]}")
print(f"Edges in movie_movie_graph: {list(movie_movie_graph.edges(data=True))[:10]}")

KeyboardInterrupt: 

In [32]:
# Funzione per calcolare il vettore di preferenze
def create_preference_vector(user_id: int, user_movie_graph: nx.Graph):
    edges = {m: v for _, m, v in user_movie_graph.edges(user_id, data="weight")}
    tot = sum(edges.values())
    if tot > 0:
        return {
            movie: edges.get(movie, 0) / tot
            for movie in movies
        }
    else:
        return {movie: 1 for movie in movies}


*Link Prediction*

In [None]:
# Funzione per la Link Prediction utilizzando Adamic-Adar Index
def predict_links(user_id, watched_movies, movie_movie_graph):
    predictions = defaultdict(float)
    for movie in watched_movies:
        for u, v, score in nx.adamic_adar_index(movie_movie_graph, [(movie, neighbor) for neighbor in movie_movie_graph.neighbors(movie) if neighbor not in watched_movies]):
            predictions[v] += score
            print(f"Debug - {movie} -> {v}: Adamic-Adar Index = {predictions[v]}")  # Debug
    return sorted(predictions.items(), key=lambda x: x[1], reverse=True)


In [33]:
# # Funzione per la Link Prediction
# def predict_links(user_id, watched_movies, movie_movie_graph):
#     predictions = defaultdict(float)
#     for movie in watched_movies:
#         for neighbor in movie_movie_graph.neighbors(movie):
#             if neighbor not in watched_movies:
#                 predictions[neighbor] += predict_links(movie, neighbor, movie_movie_graph)
#                 print(f"Debug - {movie} -> {neighbor}: Similarity = {predictions[neighbor]}")  # Debug
#     return sorted(predictions.items(), key=lambda x: x[1], reverse=True)

In [42]:
# from matplotlib import pyplot as plt


# def calculate_and_plot_similarities(movie_movie_graph: nx.Graph):
#     nodes = list(movie_movie_graph.nodes())
#     similarities = []  # Lista per memorizzare i punteggi di similarità
#     predicted_edges = []  # Lista per memorizzare gli archi con la similarità predetta

#     # Calcolare la similarità per tutte le coppie di film non collegate
#     for i, movie1 in enumerate(nodes):
#         for movie2 in nodes[i + 1:]:
#             if not movie_movie_graph.has_edge(movie1, movie2):  # Solo coppie non collegate
#                 similarity = common_neighbors_similarity(movie1, movie2, movie_movie_graph)
#                 similarities.append(similarity)
#                 predicted_edges.append((movie1, movie2, similarity))  # Aggiungi gli archi predetti

#     # Plot dell'istogramma che mostra la frequenza dei punteggi di similarità
#     plt.hist(similarities, bins=30, density=True, alpha=0.6, color='g')
#     plt.xlabel('Common Neighbors Similarity')
#     plt.ylabel('Frequency')
#     plt.title('Histogram of Predicted Edge Similarities')
#     plt.show()

#     return predicted_edges  # Restituisce la lista degli archi con i punteggi di similarità


In [None]:
def predict_user(user_id, user_movie_graph: nx.Graph, movie_movie_graph: nx.Graph, alpha=0.5):
    # Vettore delle preferenze personalizzato
    p_vec = create_preference_vector(user_id, user_movie_graph)
    already_seen = [movie for movie, p in p_vec.items() if p > 0]
    print(f"Already seen movies for user {user_id}: {already_seen} \n")  # Debug

    # Predici Link Prediction
    link_predictions = predict_links(user_id, already_seen, movie_movie_graph)
    print(f"Link predictions for user {user_id}: {link_predictions[:10]} \n")  # Debug

    # Calcola PageRank
    pagerank_scores = nx.pagerank(movie_movie_graph, personalization=p_vec, alpha=0.95, weight="weight")
    print(f"PageRank scores for user {user_id}: {list(pagerank_scores.items())[:10]} \n")  # Debug

    # Normalizza i punteggi
    max_lp = max(score for _, score in link_predictions) if link_predictions else 1
    max_pr = max(pagerank_scores.values()) if pagerank_scores else 1

    normalized_lp = {movie: score / max_lp for movie, score in link_predictions}
    normalized_pr = {movie: score / max_pr for movie, score in pagerank_scores.items()}

    # Combina i punteggi
    combined_scores = {}
    for movie, score in normalized_pr.items():
        if movie not in already_seen:
            lp_score = normalized_lp.get(movie, 0)
            combined_scores[movie] = alpha * lp_score + (1 - alpha) * score

    # Ordina i film in base ai punteggi combinati
    recommendations = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)

    return recommendations


In [None]:
# Prevedi i film per un utente specifico
user = 10  # Esempio
recommended_movies = predict_user(user, user_movie_graph, movie_movie_graph)
print(f"Recommended movies for user {user}: {recommended_movies[:10]}")