In [None]:
# import libraries
from pyvis.network import Network  # Interactive network visualization
import pandas as pd # Data Frame
import networkx as nx # Network analysis
from tqdm.notebook import tqdm # Progress bar
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import community as community_louvain

In [None]:
# Read data
ratings_df = pd.read_csv('data/BX-Book-Ratings.csv', sep=';', encoding='latin-1')
users_df = pd.read_csv('data/BX-Users.csv', sep=';', encoding='latin-1')
books_df = pd.read_csv('data/BX-Books.csv', sep=';', encoding='latin-1', on_bad_lines='skip')

In [None]:
books_df.columns = books_df.columns.str.lower()
books_df.columns = books_df.columns.str.replace('-','_')
books_df.head()

In [None]:
users_df.columns = users_df.columns.str.lower()
users_df.columns = users_df.columns.str.replace('-','_')
users_df.head()

In [None]:
ratings_df.columns = ratings_df.columns.str.lower()
ratings_df.columns = ratings_df.columns.str.replace('-','_')
ratings_df.head()

In [None]:
scaler = MinMaxScaler()
ratings_df['normalized_rating'] = scaler.fit_transform(ratings_df[['book_rating']])

In [None]:
# Creating a new graph
G = nx.Graph()

In [None]:
# Sampling from ratings dataframe
sample_fraction = 0.1
ratings_df_sample = ratings_df.sample(frac=sample_fraction, random_state=42)

# Preparing titles for users and books
users_df['title'] =users_df['user_id'].astype(str) + ", " + users_df['location'] + ", Age: " + users_df['age'].fillna('Unknown').astype(str)
user_titles = users_df.set_index('user_id')['title'].to_dict()

books_df['title'] = books_df['isbn'].astype(str) + ", " + books_df['book_title'] + ", " + books_df['book_author']
book_titles = books_df.set_index('isbn')['title'].to_dict()

# # Adding user and book nodes
# users = ratings_df_sample['user_id'].unique()
# books = ratings_df_sample['isbn'].unique()
# G.add_nodes_from(users, bipartite=0)
# G.add_nodes_from(books, bipartite=1)

# # Setting node attributes for titles
# nx.set_node_attributes(G, user_titles, 'title')
# nx.set_node_attributes(G, book_titles, 'title')

# # Optionally setting color and label attributes
# user_attributes = {user: {'color': '#A7BED3', 'label': 'user'} for user in users}
# book_attributes = {book: {'color': '#FFCAAF', 'label': 'book'} for book in books}
# nx.set_node_attributes(G, user_attributes)
# nx.set_node_attributes(G, book_attributes)

with tqdm(total=len(ratings_df_sample)) as pbar:
    for _, row in ratings_df_sample.iterrows():
        if (row['isbn'] not in book_titles):
            book_title = 'undefined'
        else:
            book_title = book_titles[row['isbn']]
        G.add_node(row['user_id'], bipartite=0, title=user_titles[row['user_id']], color='#A7BED3', label='user',type='user')
        G.add_node(row['isbn'], bipartite=1, title=book_title, color='#FFCAAF', label='book',type='book')    
        G.add_edge(row['user_id'], row['isbn'],weight=row['normalized_rating'])
        pbar.update(1)
print(f"Number of nodes in G: {len(G.nodes)}")

In [None]:
partition = community_louvain.best_partition(G)
nx.set_node_attributes(G, partition, 'community')

In [None]:
degree_centrality = nx.degree_centrality(G)

In [None]:
threshold = 0.001  # Adjust the threshold as needed
print(degree_centrality)

# Filter nodes based on degree centrality
filtered_nodes = [node for node, centrality in degree_centrality.items() if centrality >= threshold]

# Create a subgraph with filtered nodes and their edges
G = G.subgraph(filtered_nodes)
print(f"Number of nodes in filtered_nodes: {len(filtered_nodes)}")

In [None]:
print(f"Number of nodes in filtered_nodes: {len(filtered_nodes)}")

In [None]:
nodes_with_one_connection = [node for node, degree in G.degree() if degree > 1]
G = G.subgraph(nodes_with_one_connection)
print(f"Number of nodes in nodes_with_one_connection: {len(nodes_with_one_connection)}")

In [None]:
net = Network(notebook=True)
net.barnes_hut()

In [None]:
# nxg = nx.relabel.convert_node_labels_to_integers(nxg, first_label=1, ordering='default', label_attribute=None)
with tqdm(total=len(G.nodes)) as pbar_label:
    G = nx.relabel.convert_node_labels_to_integers(G, first_label=1, ordering='default', label_attribute=None)
    pbar_label.update(len(G.nodes))

In [None]:
len(G.nodes)

In [None]:
net.from_nx(G)

In [None]:
net.write_html('graph.html',open_browser=True)