In [1]:
from rdflib import Graph, Literal, RDF, RDFS, Namespace

# Crear un nuevo grafo
g = Graph()

# Definir namespaces
NS = Namespace("http://example.org/ns#")

# Clases
g.add((NS.Book, RDF.type, RDFS.Class))
g.add((NS.Author, RDF.type, RDFS.Class))
g.add((NS.Genre, RDF.type, RDFS.Class))
g.add((NS.ReadInstance, RDF.type, RDFS.Class))
g.add((NS.Writer, RDF.type, RDFS.Class))
g.add((NS.Writer, RDFS.subClassOf, NS.Author))
g.add((NS.Illustrator, RDF.type, RDFS.Class))
g.add((NS.Illustrator, RDFS.subClassOf, NS.Author))
g.add((NS.Contributor, RDF.type, RDFS.Class))
g.add((NS.Contributor, RDFS.subClassOf, NS.Author))
g.add((NS.Editor, RDF.type, RDFS.Class))
g.add((NS.Editor, RDFS.subClassOf, NS.Author))
g.add((NS.Translator, RDF.type, RDFS.Class))
g.add((NS.Translator, RDFS.subClassOf, NS.Author))
g.add((NS.Reader, RDF.type, RDFS.Class))
g.add((NS.GenreAssignment, RDF.type, RDFS.Class))
g.add((NS.Rating, RDF.type, RDFS.Class))  # Añadido Rating

# Propiedades de Book
g.add((NS.hasPages, RDF.type, RDF.Property))
g.add((NS.hasPages, RDFS.domain, NS.Book))
g.add((NS.hasPages, RDFS.range, RDFS.Literal))

g.add((NS.isSimilarTo, RDF.type, RDF.Property))
g.add((NS.isSimilarTo, RDFS.domain, NS.Book))
g.add((NS.isSimilarTo, RDFS.range, NS.Book))

g.add((NS.aboutBook, RDF.type, RDF.Property))
g.add((NS.aboutBook, RDFS.domain, NS.Book))
g.add((NS.aboutBook, RDFS.range, NS.ReadInstance))

g.add((NS.hasGenre, RDF.type, RDF.Property))
g.add((NS.hasGenre, RDFS.domain, NS.Book))
g.add((NS.hasGenre, RDFS.range, NS.GenreAssignment))

g.add((NS.publishedOn, RDF.type, RDF.Property))
g.add((NS.publishedOn, RDFS.domain, NS.Book))
g.add((NS.publishedOn, RDFS.range, RDFS.Literal))

g.add((NS.writtenBy, RDF.type, RDF.Property))
g.add((NS.writtenBy, RDFS.domain, NS.Book))
g.add((NS.writtenBy, RDFS.range, NS.Writer))

g.add((NS.illustratedBy, RDF.type, RDF.Property))
g.add((NS.illustratedBy, RDFS.domain, NS.Book))
g.add((NS.illustratedBy, RDFS.range, NS.Illustrator))

g.add((NS.contributedBy, RDF.type, RDF.Property))
g.add((NS.contributedBy, RDFS.domain, NS.Book))
g.add((NS.contributedBy, RDFS.range, NS.Contributor))

g.add((NS.editedBy, RDF.type, RDF.Property))
g.add((NS.editedBy, RDFS.domain, NS.Book))
g.add((NS.editedBy, RDFS.range, NS.Editor))

g.add((NS.translatedBy, RDF.type, RDF.Property))
g.add((NS.translatedBy, RDFS.domain, NS.Book))
g.add((NS.translatedBy, RDFS.range, NS.Translator))

g.add((NS.ratingCount, RDF.type, RDF.Property))  # Añadido ratingCount
g.add((NS.ratingCount, RDFS.domain, NS.Book))
g.add((NS.ratingCount, RDFS.range, RDFS.Literal))

# Propiedades de Author
g.add((NS.hasName, RDF.type, RDF.Property))
g.add((NS.hasName, RDFS.domain, NS.Author))
g.add((NS.hasName, RDFS.range, RDFS.Literal))

g.add((NS.hasAvgRating, RDF.type, RDF.Property))
g.add((NS.hasAvgRating, RDFS.domain, NS.Author))
g.add((NS.hasAvgRating, RDFS.range, RDFS.Literal))

g.add((NS.ratedBy, RDF.type, RDF.Property))
g.add((NS.ratedBy, RDFS.domain, NS.Author))
g.add((NS.ratedBy, RDFS.range, NS.Reader))

# Propiedades de GenreAssignment
g.add((NS.genreType, RDF.type, RDF.Property))
g.add((NS.genreType, RDFS.domain, NS.GenreAssignment))
g.add((NS.genreType, RDFS.range, NS.Genre))

g.add((NS.hasProbability, RDF.type, RDF.Property))
g.add((NS.hasProbability, RDFS.domain, NS.GenreAssignment))
g.add((NS.hasProbability, RDFS.range, RDFS.Literal))

# Propiedades de Reader
g.add((NS.hasReadingInstance, RDF.type, RDF.Property))
g.add((NS.hasReadingInstance, RDFS.domain, NS.Reader))
g.add((NS.hasReadingInstance, RDFS.range, NS.ReadInstance))

# Propiedades de ReadInstance
g.add((NS.hasRating, RDF.type, RDF.Property))
g.add((NS.hasRating, RDFS.domain, NS.ReadInstance))
g.add((NS.hasRating, RDFS.range, RDFS.Literal))


# Serializar el grafo en formato RDF/XML y imprimir directamente
# print(g.serialize(format='xml'))

<Graph identifier=Ndfd963435c844b85a200fee295032516 (<class 'rdflib.graph.Graph'>)>

In [2]:
import pandas as pd
from rdflib import Literal
import ast

# Cargar datos desde CSV
books_df = pd.read_csv('Data/books_final.csv')

# Iterar sobre cada fila
for index, row in books_df.iterrows():
    book_uri = NS[f"Book_{row['book_id']}"]  # Crea un URI único para cada libro
    g.add((book_uri, RDF.type, NS.Book))
    
    # Añadir triple para título del libro
    g.add((book_uri, NS.title, Literal(row['title'])))
    
    # Añadir triples para writers
    writers = ast.literal_eval(row['writers'])
    for writer_id in writers:
        if writer_id:
            writer_uri = NS[f"Writer_{writer_id.strip()}"]
            g.add((book_uri, NS.writtenBy, writer_uri))
    
    # Añadir triples para illustrators
    illustrators = ast.literal_eval(row['illustrators'])
    for illustrator_id in illustrators:
        if illustrator_id:
            illustrator_uri = NS[f"Illustrator_{illustrator_id.strip()}"]
            g.add((book_uri, NS.illustratedBy, illustrator_uri))

    # Añadir triples para contributors
    contributors = ast.literal_eval(row['contributors'])
    for contributor_id in contributors:
        if contributor_id:
            contributor_uri = NS[f"Contributor_{contributor_id.strip()}"]
            g.add((book_uri, NS.contributedBy, contributor_uri))

    # Añadir triples para editors
    editors = ast.literal_eval(row['editors'])
    for editor_id in editors:
        if editor_id:
            editor_uri = NS[f"Editor_{editor_id.strip()}"]
            g.add((book_uri, NS.editedBy, editor_uri))

    # Añadir triples para translators
    translators = ast.literal_eval(row['translators'])
    for translator_id in translators:
        if translator_id:
            translator_uri = NS[f"Translator_{translator_id.strip()}"]
            g.add((book_uri, NS.translatedBy, translator_uri))
    
    # Añadir triple para año de publicación
    if not pd.isna(row['publication_year']):
        g.add((book_uri, NS.publishedOn, Literal(int(row['publication_year']))))
    
    # Añadir triples para libros similares
    similar_books = ast.literal_eval(row['similar_books'])
    for similar_book_id in similar_books:
        if similar_book_id:
            similar_book_uri = NS[f"Book_{similar_book_id.strip()}"]
            g.add((book_uri, RDF.type, NS.Book))
            g.add((book_uri, NS.isSimilarTo, similar_book_uri))

In [3]:
# Cargar datos desde CSV
authors_df = pd.read_csv('Data/authors_final.csv')

# Iterar sobre cada fila
for index, row in authors_df.iterrows():
    author_uri = NS[f"Author_{row['author_id']}"]  # Crea un URI único para cada autor
    g.add((author_uri, RDF.type, NS.Author))

    # Añadir triple para el nombre del autor
    g.add((author_uri, NS.hasName, Literal(row['name'])))
    
    # Añadir triple para el rating promedio
    if not pd.isna(row['average_rating']):
        g.add((author_uri, NS.hasAvgRating, Literal(row['average_rating'])))

    # Añadir triple para la cantidad de ratings
    if not pd.isna(row['ratings_count']):
        g.add((author_uri, NS.ratedBy, Literal(row['ratings_count'])))

In [4]:
import pandas as pd
from rdflib import Literal, URIRef

# Cargar datos desde CSV
interactions_df = pd.read_csv('Data/interactions_final.csv')

# Iterar sobre cada fila
for index, row in interactions_df.iterrows():
    book_uri = NS[f"Book_{row['book_id']}"]
    user_uri = NS[f"Reader_{row['user_id']}"]
    read_instance_uri = URIRef(f"http://example.org/ns#ReadInstance_{index}")  # Creando un URI único para cada instancia de lectura
    g.add((user_uri, RDF.type, NS.Reader))

    # Crear la instancia de ReadInstance
    g.add((read_instance_uri, RDF.type, NS.ReadInstance))
    g.add((read_instance_uri, NS.aboutBook, book_uri))
    g.add((read_instance_uri, NS.ratedBy, user_uri))

    # Añadir el rating si está presente
    if row['rating'] > 0:  # Suponiendo que un rating de 0 significa no rating
        g.add((read_instance_uri, NS.hasRating, Literal(row['rating'])))

In [5]:
# Cargar datos desde CSV
genres_df = pd.read_csv('Data/genres_pages_x_books.csv')

genre_columns = ['Fantasy', 'Fiction', 'Mystery', 'Poetry', 'History', 'Romance', 'Non-fiction', 'Children', 'Young-adult', 'Comics']
for genre in genre_columns:
    genre_uri = NS[f"{genre}"]
    g.add((genre_uri, RDF.type, NS.Genre))

# Iterar sobre cada fila
for index, row in genres_df.iterrows():
    book_uri = NS[f"Book_{row['book_id']}"]

    # Añadir número de páginas si está disponible
    if not pd.isna(row['num_pages']):
        g.add((book_uri, NS.hasPages, Literal(int(row['num_pages']))))

    # Añadir géneros y sus probabilidades
    genre_columns = ['Fantasy', 'Fiction', 'Mystery', 'Poetry', 'History', 'Romance', 'Non-fiction', 'Children', 'Young-adult', 'Comics']
    for genre in genre_columns:
        probability = row[genre]
        if probability > 0:  # Solo añadir si la probabilidad no es cero
            genre_uri = NS[f"{genre}"]
            genre_assignment_uri = URIRef(f"http://example.org/ns#GenreAssignment_{index}_{genre}")
            g.add((genre_assignment_uri, RDF.type, NS.GenreAssignment))
            g.add((genre_assignment_uri, NS.genreType, genre_uri))
            g.add((genre_assignment_uri, NS.hasProbability, Literal(probability)))
            g.add((book_uri, NS.hasGenre, genre_assignment_uri))

In [6]:
g.serialize(destination='Data/graph_final.ttl', format='turtle')

<Graph identifier=Ndfd963435c844b85a200fee295032516 (<class 'rdflib.graph.Graph'>)>

In [7]:
import pandas as pd

triples = []
for s, p, o in g:
    triples.append((str(s), str(p), str(o)))

data = pd.DataFrame(triples)
data.columns = ['subject', 'predicate', 'object']
data.to_csv('Data/final_data.csv',index=False)