# Notebook di test per sistema item based

In [30]:
import psycopg2
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds
import numpy as np

In [31]:
DB_HOST = "localhost"
DB_PORT = "5432"
DB_NAME = "bookstore"
DB_USER = "user"
DB_PASSWORD = "password"

In [32]:
# funzione per connettersi al database
def connect_to_db():
    connection = psycopg2.connect(
        host=DB_HOST,
        port=DB_PORT,
        dbname=DB_NAME,
        user=DB_USER,
        password=DB_PASSWORD
    )
    return connection

In [33]:
# funzione per ottenere i dati dal database e caricarli in una user_book matrix
def load_data(connection):
    query_books = """
        SELECT 
            id, 
            title, 
            author, 
            category, 
            description, 
            price 
        FROM books;
    """
    book_info = pd.read_sql(query_books, connection).set_index('id')

    query_reviews = """
        SELECT 
            user_id, 
            book_id AS id,  -- Mantenendo l'associazione con il nome della colonna nel db
            rating AS review_score
        FROM reviews;
    """
    reviews_data = pd.read_sql(query_reviews, connection)
    
    user_book_matrix = reviews_data.pivot(index='user_id', columns='id', values='review_score').fillna(0)
    
    return user_book_matrix, book_info

In [34]:
def recommend_books(book_id, similarity_matrix, num_recommendations=10):
    if book_id not in similarity_matrix.index:
        print(f"Book ID {book_id} not found in similarity matrix.")
        return []
    
    similar_scores = similarity_matrix[book_id]
    similar_books = similar_scores.sort_values(ascending=False).drop(book_id)
    
    return similar_books.head(num_recommendations).index.tolist()

In [35]:
def recommend_books_svd(book_id, similarity_matrix, num_recommendations=10):
    if book_id not in similarity_matrix.index:
        print(f"Book ID {book_id} not found in similarity matrix.")
        return []
    
    similar_scores = similarity_matrix[book_id]
    similar_books = similar_scores.sort_values(ascending=False).drop(book_id)
    
    return similar_books.head(num_recommendations).index.tolist()

In [36]:
connection = connect_to_db()

In [37]:
#user_book_matrix, book_info = load_data(connection) # da errore

In [38]:
query_books = """
    SELECT 
        id AS Unified_Id, 
        title AS Title, 
        author AS authors, 
        category AS categories, 
        description
    FROM books;
"""

# Esegui la query e carica i risultati in un DataFrame
book_info = pd.read_sql(query_books, connection)

# Mostra le colonne e le prime righe per verificare i dati
print("Columns in book_info:", book_info.columns)
print(book_info.head())

Columns in book_info: Index(['unified_id', 'title', 'authors', 'categories', 'description'], dtype='object')
   unified_id                                              title  \
0         879  1491: new revelations of the americas before c...   
1       63125           go tell it on the mountain (a dell book)   
2      180730                            the third galaxy reader   
3       25375                                         brick lane   
4       63126  go tell it on the mountain (a dell contemporar...   

               authors           categories  \
0  ['Charles C. Mann']          ['History']   
1              Unknown              Unknown   
2   ['Horace L. Gold']  ['Science fiction']   
3       ['Monica Ali']          ['Fiction']   
4    ['James Baldwin']              Unknown   

                                         description  
0  Hoe de wereld zich ontwikkelde na de ontdekkin...  
1                                              Empty  
2                                 

  book_info = pd.read_sql(query_books, connection)


In [39]:
user_book_matrix, book_info = load_data(connection)

  book_info = pd.read_sql(query_books, connection).set_index('id')
  reviews_data = pd.read_sql(query_reviews, connection)


In [40]:
print("Matrice utente-libro:")
print(user_book_matrix.head())
print("\nInformazioni sui libri:")
print(book_info.head())

Matrice utente-libro:
id              10      25      388     879     920     1000    1031    \
user_id                                                                  
A106016KSI0YQ      0.0     0.0     0.0     0.0     0.0     0.0     0.0   
A107C4RVRF0OP      0.0     0.0     0.0     0.0     0.0     0.0     0.0   
A10872FHIJAKKD     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
A10A1S5NAQBT21     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
A10BZSGALQPS0V     0.0     0.0     0.0     0.0     0.0     0.0     0.0   

id              1062    1108    1179    ...  202294  202295  202297  202339  \
user_id                                 ...                                   
A106016KSI0YQ      0.0     0.0     0.0  ...     0.0     0.0     0.0     0.0   
A107C4RVRF0OP      0.0     0.0     0.0  ...     0.0     0.0     0.0     0.0   
A10872FHIJAKKD     0.0     0.0     0.0  ...     0.0     0.0     0.0     0.0   
A10A1S5NAQBT21     0.0     0.0     0.0  ...     0.0     0.0     

In [41]:
book_similarity = cosine_similarity(user_book_matrix.T)
book_similarity_df = pd.DataFrame(book_similarity, index=user_book_matrix.columns, columns=user_book_matrix.columns)

print("Matrice di similarità semplice creata.")

Matrice di similarità semplice creata.


In [42]:
k = 50
user_book_sparse = csr_matrix(user_book_matrix.T)

# Esegui SVD
U, sigma, Vt = svds(user_book_sparse.T, k=k)
sigma_matrix = np.diag(sigma)
book_factors = np.dot(sigma_matrix, Vt)

# Calcola la similarità tra i libri basata sui fattori SVD
book_similarity_svd = cosine_similarity(book_factors.T)
book_similarity_svd_df = pd.DataFrame(book_similarity_svd, index=user_book_matrix.columns, columns=user_book_matrix.columns)

print("Matrice di similarità basata su SVD creata.")

Matrice di similarità basata su SVD creata.


In [43]:
book_id = 35847

recommendations_simple = recommend_books(book_id, book_similarity_df, num_recommendations=10)
print("Recommended books (simple) for Book ID", book_id, ":", recommendations_simple)

recommendations_svd = recommend_books_svd(book_id, book_similarity_svd_df, num_recommendations=10)
print("Recommended books (SVD) for Book ID", book_id, ":", recommendations_svd)

Recommended books (simple) for Book ID 35847 : [35851, 137236, 83293, 180520, 2311, 28676, 146248, 10041, 47747, 119261]
Recommended books (SVD) for Book ID 35847 : [35851, 137236, 28676, 2311, 180520, 83293, 10041, 146248, 47747, 1373]


In [44]:
# Dettagli del libro originale
original_book = book_info.loc[book_id]
print("\nDettagli del libro originale:")
display(pd.DataFrame([original_book]).rename(index={book_id: "Original"}))

# Dettagli dei libri raccomandati (similarità semplice)
recommended_books_simple = book_info.loc[recommendations_simple]
print("\nLibri raccomandati (similarità semplice):")
display(recommended_books_simple.reset_index().rename(columns={"index": "Unified_Id"}))

# Dettagli dei libri raccomandati (SVD)
recommended_books_svd = book_info.loc[recommendations_svd]
print("\nLibri raccomandati (SVD):")
display(recommended_books_svd.reset_index().rename(columns={"index": "Unified_Id"}))


Dettagli del libro originale:


Unnamed: 0,title,author,category,description,price
Original,congo,['Dirk Jan Koch'],['Political Science'],Als Dirk-Jan Koch bij Buitenlandse Zaken aange...,10.0



Libri raccomandati (similarità semplice):


Unnamed: 0,id,title,author,category,description,price
0,35851,congo: the movie storybook,"['Ron Fontes', 'Justine Korman', 'Michael Cric...",['Fiction'],"On an expedition to return Amy, his talking go...",20.0
1,137236,sphere,['Michael Crichton'],['Fiction'],"From the author of Jurassic Park, Timeline, an...",22.0
2,83293,jurassic park,['Michael Crichton'],['Fiction'],An American bioengineering research firm erect...,11.0
3,180520,the terminal man.,['Michael Crichton'],['Fiction'],"From the bestselling author of Jurassic Park, ...",24.0
4,2311,a case of need,Unknown,Unknown,Empty,15.0
5,28676,case of need,['Michael Crichton'],['Abortion'],Was it murder? Was it horribly botched surgery...,13.0
6,146248,the andromeda strain,['Michael Crichton'],['Fiction'],"From the author of Jurassic Park, Timeline, an...",24.0
7,10041,airframe,['Michael Crichton'],['Fiction'],A fatal mid-air collision involving a commerci...,9.0
8,47747,eaters of the dead,['Michael Crichton'],['Fiction'],An ambassador of the tenth-century Caliph of B...,13.0
9,119261,prey,['Michael Chinery'],['Predation (Biology)'],Tropical rainforests cover only about 6% of th...,4.0



Libri raccomandati (SVD):


Unnamed: 0,id,title,author,category,description,price
0,35851,congo: the movie storybook,"['Ron Fontes', 'Justine Korman', 'Michael Cric...",['Fiction'],"On an expedition to return Amy, his talking go...",20.0
1,137236,sphere,['Michael Crichton'],['Fiction'],"From the author of Jurassic Park, Timeline, an...",22.0
2,28676,case of need,['Michael Crichton'],['Abortion'],Was it murder? Was it horribly botched surgery...,13.0
3,2311,a case of need,Unknown,Unknown,Empty,15.0
4,180520,the terminal man.,['Michael Crichton'],['Fiction'],"From the bestselling author of Jurassic Park, ...",24.0
5,83293,jurassic park,['Michael Crichton'],['Fiction'],An American bioengineering research firm erect...,11.0
6,10041,airframe,['Michael Crichton'],['Fiction'],A fatal mid-air collision involving a commerci...,9.0
7,146248,the andromeda strain,['Michael Crichton'],['Fiction'],"From the author of Jurassic Park, Timeline, an...",24.0
8,47747,eaters of the dead,['Michael Crichton'],['Fiction'],An ambassador of the tenth-century Caliph of B...,13.0
9,1373,3001: the final odyssey (g k hall large print ...,['Arthur Charles Clarke'],['Fiction'],In the year 2110 technology has cured most of ...,4.0


In [45]:
def combined_recommendation(book_id, simple_similarity_matrix, svd_similarity_matrix, num_recommendations=5):
    # Primi 5 consigli con similarità semplice
    simple_recommendations = recommend_books(book_id, simple_similarity_matrix, num_recommendations=num_recommendations)
    
    # Consigli aggiuntivi con similarità SVD, escludendo quelli già raccomandati
    svd_recommendations = recommend_books_svd(book_id, svd_similarity_matrix, num_recommendations=num_recommendations*2) # Prende extra per sicurezza
    svd_recommendations = [rec for rec in svd_recommendations if rec not in simple_recommendations][:num_recommendations]
    
    combined_recommendations = simple_recommendations + svd_recommendations
    
    return combined_recommendations

In [46]:
book_id = 1108

combined_recommendations = combined_recommendation(book_id, book_similarity_df, book_similarity_svd_df)

In [47]:
original_book = book_info.loc[book_id]
print("\nDettagli del libro originale:")
display(pd.DataFrame([original_book]).rename(index={book_id: "Original"}))

print("Raccomandazioni combinate per il libro ID", book_id)
display(book_info.loc[combined_recommendations].reset_index())


Dettagli del libro originale:


Unnamed: 0,title,author,category,description,price
Original,2001: a space odyssey,['Arthur C. Clarke'],Unknown,Empty,17.0


Raccomandazioni combinate per il libro ID 1108


Unnamed: 0,id,title,author,category,description,price
0,1179,2010: odyssey two,['Arthur Charles Clarke'],['Human-computer interaction'],"To the spaceship Discovery, floating in the si...",21.0
1,1373,3001: the final odyssey (g k hall large print ...,['Arthur Charles Clarke'],['Fiction'],In the year 2110 technology has cured most of ...,4.0
2,124762,rendezvous with rama,['Arthur C. Clarke'],['Rama (Imaginary space vehicle)'],Empty,15.0
3,180953,"the time machine,: an invention,","['H. G. Wells', 'Leon E. Stover']",['Biography & Autobiography'],The Time Machine is one of the most enduring w...,11.0
4,180951,the time machine : an invention (literary text...,['Frederick Leypoldt'],['American literature'],Empty,18.0
5,163223,the illustrated man (corgi science fiction),Unknown,Unknown,Empty,4.0
6,163222,the illustrated man,['Ray Bradbury'],['Fiction'],Eighteen science fiction stories deal with lov...,10.0
7,75413,"i, robot",['Isaac Asimov'],['Robots'],Earth is ruled by master-machines but the Thre...,12.0
8,167865,the martian chronicles (time reading program),['Ray Bradbury'],['Fiction'],The tranquility of Mars is disrupted by humans...,4.0
9,167864,the martian chronicles,['Ray Bradbury'],['Fiction'],The tranquility of Mars is disrupted by humans...,20.0
