In [1]:
json_name = 'data.json'
pkl_name = 'casos.pkl'
csv_name = 'casos.csv'
carpeta = ''
pkl_name_ll = 'llibres.pkl'
csv_name_ll = 'llibres.csv'

In [2]:
import requests
import gzip
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.preprocessing import MinMaxScaler
# np seed = 0
np.random.seed(0)

In [3]:
# if casos.pkl exists, load it
try:
    casos = pd.read_pickle(carpeta+pkl_name)
    get = False
except:
    get = True

In [4]:
if get:
    # URL del archivo JSON comprimido
    url = 'https://datarepo.eng.ucsd.edu/mcauley_group/gdrive/goodreads/goodreads_reviews_dedup.json.gz'

    # Realizar la solicitud GET al servidor
    response = requests.get(url, stream=True)

    # Verificar si la solicitud fue exitosa (código de estado 200)
    if response.status_code == 200:
        # Descomprimir el contenido del archivo
        with gzip.GzipFile(fileobj=response.raw) as f:
            # Leer las primeras 500 filas del JSON
            primeras_500_filas = [json.loads(next(f)[:-1].decode('utf-8')) for _ in range(500000)]

        print("JSON creat.")
    else:
        print(f"Error al descargar el archivo. Código de estado: {response.status_code}")

In [5]:
if get:
    # Read eoo.json only user_id, book_id, rating
    df = pd.DataFrame(primeras_500_filas)
    df = df[['user_id', 'book_id', 'rating']]

In [6]:
if get:
    # Plot rating distribution and save to eoo/rating_distribution.png
    sns.set_style('darkgrid')
    plt.figure(figsize=(10, 6))
    sns.countplot(x='rating', data=df)
    plt.xlabel('Rating')
    plt.ylabel('Count')
    plt.title('Rating Distribution')
    plt.savefig(f'{carpeta}rating_distribution.png')

In [7]:
if get:
    # Give me unique users
    unique_users = df['user_id'].unique()

In [8]:
if get:
    # Make a database with unique users, list of books rated and list of rating for each book
    df_aux = pd.DataFrame(columns=['user_id', 'books', 'ratings'])

    for user in unique_users:
        # Filter by user
        user_df = df[df['user_id'] == user]
        # Get list of books rated by user
        books = user_df['book_id'].tolist()
        # Get list of ratings for each book
        ratings = user_df['rating'].tolist()
        # Create a dictionary with books and ratings
        user_dict = dict(zip(books, ratings))
        # Save user, books and ratings in df_aux using pd.concat
        df_aux = pd.concat([df_aux, pd.DataFrame({'user_id': [user], 'books': [books], 'ratings': [ratings]})])

    df_aux = df_aux.reset_index(drop=True)

    print("Dataset joined. Unique users:", len(df_aux))

In [9]:
if get:
    # Plot how many books each user has rated and save to eoo/books_rated_before.png
    # x: each user
    # y: number of books rated
    plt.figure(figsize=(10, 6))
    plt.xlabel('user_id')
    plt.ylabel('Number of books rated')
    plt.title('Number of books rated by each user')
    plt.plot(df_aux['user_id'], df_aux['books'].apply(lambda x: len(x)))
    plt.savefig(f'{carpeta}books_rated_before.png')

In [10]:
if get:
    min_books = 10
    max_books = 20

    # Remove users that have rated less than 10 books and more than 50
    df_aux = df_aux[df_aux['books'].apply(lambda x: len(x) >= min_books and len(x) <= max_books)]
    df_aux = df_aux.reset_index(drop=True)

    print(f"Dataset filtered with users with more than {min_books} and less than {max_books} books reviewed. Unique users:", len(df_aux))

In [11]:
if get:
    # Plot how many books each user has rated and save to eoo/books_rated_after.png
    # x: each user
    # y: number of books rated
    plt.figure(figsize=(10, 6))
    plt.xlabel('user_id')
    plt.ylabel('Number of books rated')
    plt.title('Number of books rated by each user')
    plt.plot(df_aux['user_id'], df_aux['books'].apply(lambda x: len(x)))
    plt.savefig(f'{carpeta}books_rated_after.png')

In [12]:
if get:
    # For each user get 3 last books and their ratings and put them in a new column "llibres_recomanata" i "puntuacions_llibres". Then remove the 3 books from the list of books rated by the user.
    df_aux['llibres_recomanats'] = df_aux['books'].apply(lambda x: x[-3:])
    df_aux['puntuacions_llibres'] = df_aux['ratings'].apply(lambda x: x[-3:])
    df_aux['books'] = df_aux['books'].apply(lambda x: x[:-3])
    df_aux['ratings'] = df_aux['ratings'].apply(lambda x: x[:-3])

    print("Done creating new columns.")

    # Change "books" and "ratings" columns to "llibres_usuari" and "val_llibres"
    df_aux = df_aux.rename(columns={'books': 'llibres_usuari', 'ratings': 'val_llibres'})

In [13]:
if get:
    df_aux.to_pickle(pkl_name)
    df_aux.to_csv(csv_name, index=False)
casos = pd.read_pickle(carpeta+pkl_name)

In [14]:
try:
    llibres = pd.read_pickle(carpeta+pkl_name_ll)
    get = False
except:
    get = True

In [15]:
if get:
    # For each row, add all the books from "llibres_usuari" and "llibres_recomanats" to a set
    set_llibres = set()
    for index, row in casos.iterrows():
        for llibre in row['llibres_usuari']:
            set_llibres.add(llibre)
        for llibre in row['llibres_recomanats']:
            set_llibres.add(llibre)

    set_llibres = list(set_llibres)
    print(len(set_llibres))

In [16]:
if get:
    fitxer = "/Users/ucemarc/Downloads/goodreads_books.json"
    # Crear un DataFrame vacío para almacenar los libros que coincidan
    df_llibres = pd.DataFrame(columns=['isbn', 'book_id', 'similar_books', 'average_rating', 'description', 'authors', 'isbn13', 'num_pages', 'publication_year', 'title', 'language_code', 'format', 'series'])

    # Leer el archivo línea por línea
    i = 1
    with open(fitxer, 'r', encoding='utf-8') as file:
        for line in file:
            book = json.loads(line)
            if book['book_id'] in set_llibres:
                print(i)
                # Only keep the columns "isbn", "book_id", "similar_books", "average_rating", "similar_books", "description", "authors", "isbn13", "num_pages", "publication_year", "title" and "language_code"
                book = {k: book[k] for k in ['isbn', 'book_id', 'similar_books', 'average_rating', 'similar_books', 'description', 'authors', 'isbn13', 'num_pages', 'publication_year', 'title', 'language_code', 'format', 'series']}
                aut = []
                for author in book['authors']:
                    aut.append(author['author_id'])
                book['authors'] = aut
                # Convert the dictionary to a DataFrame
                book = pd.DataFrame([book], index=[0])
                # Add the book to the DataFrame
                df_llibres = pd.concat([df_llibres, pd.DataFrame(book, index=[0])])
                i += 1
    df_llibres.to_csv("llibres.csv", index=False)
    df_llibres.to_pickle("llibres.pkl")

In [17]:
# If column "genres" exists in llibres.pkl then get = False
try:
    llibres = pd.read_pickle(carpeta+pkl_name_ll)
    llibres['genres']
    get = False
except:
    get = True
    df_llibres = pd.read_csv(carpeta+csv_name_ll)

In [18]:
if get:
    fitxer = "/Users/ucemarc/Downloads/goodreads_book_genres_initial.json"

    # Crear un DataFrame vacío para almacenar los libros que coincidan
    df_genres = pd.DataFrame(columns=['book_id', 'genres'])

    with open(fitxer, 'r', encoding='utf-8') as file:
        for line in file:
            book = json.loads(line)
            if book['book_id'] in set_llibres:
                # Only keep the columns "isbn", "book_id", "similar_books", "average_rating", "similar_books", "description", "authors", "isbn13", "num_pages", "publication_year", "title" and "language_code"
                book = {k: book[k] for k in ['book_id', 'genres']}
                # Get only the keys of the dictionary
                book['genres'] = list(book['genres'].keys())
                # Convert the dictionary to a DataFrame
                book = pd.DataFrame([book], index=[0])
                # Add the book to the DataFrame
                df_genres = pd.concat([df_genres, pd.DataFrame(book, index=[0])])
                df_genres.to_csv("genres.csv", index=False)

In [19]:
if get:
    # Merge df_llibres and df_genres on book_id
    df_llibres['book_id'] = df_llibres['book_id'].astype(int)
    df_genres['book_id'] = df_genres['book_id'].astype(int)
    df_llibres= pd.merge(df_llibres, df_genres, on='book_id', how='inner')
    df_llibres.to_csv("llibres.csv", index=False)

In [20]:
if get:
    # Check how many unique genres there are
    unique_genres = set()
    for index, row in df_llibres.iterrows():
        for genre in row['genres']:
            unique_genres.add(genre)
    print(len(unique_genres))
    print(unique_genres)

In [21]:
if get:
    # Replace 'history, historical fiction, biography' to 'history'
    df_llibres['genres'] = df_llibres['genres'].apply(lambda x: ['history' if i == 'history, historical fiction, biography' else i for i in x])
    # Replace 'fantasy, paranormal' to 'fantasy'
    df_llibres['genres'] = df_llibres['genres'].apply(lambda x: ['fantasy' if i == 'fantasy, paranormal' else i for i in x])
    # Replace 'mystery, thriller, crime' to 'mystery'
    df_llibres['genres'] = df_llibres['genres'].apply(lambda x: ['mystery' if i == 'mystery, thriller, crime' else i for i in x])
    # Replace 'comics, graphic' to 'comics'
    df_llibres['genres'] = df_llibres['genres'].apply(lambda x: ['comics' if i == 'comics, graphic' else i for i in x])
    df_llibres.to_csv(carpeta+csv_name_ll, index=False)
    df_llibres.to_pickle(carpeta+pkl_name_ll)

In [22]:
if get:
    # Check how many unique genres there are
    unique_genres = set()
    for index, row in df_llibres.iterrows():
        for genre in row['genres']:
            unique_genres.add(genre)
    print(len(unique_genres))
    print(unique_genres)

In [23]:
llibres = pd.read_pickle(carpeta+pkl_name_ll)
casos = pd.read_pickle(carpeta+pkl_name)

In [24]:
categories = {
    "estil_literari": ["realisme", "romanticisme", "naturalisme", "simbolisme", "modernisme", "realisme magico", "postmodernisme"],
    "complexitat": ["baixa", "mitjana", "alta"],
    "caracteristiques": ["simples", "complexes"],
    "desenvolupament_del_personatge": ["baix", "mitja", "alt"],
    "accio_o_reflexio": ["accio", "reflexio"],
    "epoca": ["actual", "passada", "futura"],
    "detall_cientific": ["baix", "mitja", "alta"]
}

In [25]:
def make_vector(length1, length2, unique_min, unique_max, categorie):
    # Número de valores únicos (entre 2 y 4)
    num_unique_values = np.random.randint(unique_min, unique_max)

    # Seleccionar valores únicos de forma aleatoria
    unique_values = np.random.choice(categories[categorie], size=num_unique_values, replace=False)

    # Crear el vector de 10 posiciones
    vector1 = [np.random.choice(unique_values) for _ in range(length1)]
    vector2 = [np.random.choice(unique_values) for _ in range(length2)]
    return vector1, vector2

In [26]:
# Funció auxiliar per actualitzar els diccionaris
def actualitzar_diccionaris(llibre_id, valor, diccionari):
    if valor in diccionari[llibre_id]:
        diccionari[llibre_id][valor] += 1
    else:
        diccionari[llibre_id][valor] = 1

# Inicialització de diccionaris per a cada atribut
estil_literari = [{} for _ in range(len(llibres))]
complexitat = [{} for _ in range(len(llibres))]
caracteristiques = [{} for _ in range(len(llibres))]
desenvolupament_del_personatge = [{} for _ in range(len(llibres))]
accio_o_reflexio = [{} for _ in range(len(llibres))]
epoca = [{} for _ in range(len(llibres))]
detall_cientific = [{} for _ in range(len(llibres))]

for index, row in casos.iterrows():
    len_llibres_usuari = len(row['llibres_usuari'])
    len_llibres_recomanats = len(row['llibres_recomanats'])
    estil_literari1, estil_literari2 = make_vector(len_llibres_usuari, len_llibres_recomanats, 2, 4, "estil_literari")
    complexitat1, complexitat2 = make_vector(len_llibres_usuari, len_llibres_recomanats, 1, 3, "complexitat")
    caracteristiques1, caracteristiques2 = make_vector(len_llibres_usuari, len_llibres_recomanats, 1, 3, "caracteristiques")
    desenvolupament_del_personatge1, desenvolupament_del_personatge2 = make_vector(len_llibres_usuari, len_llibres_recomanats, 1, 3, "desenvolupament_del_personatge")
    accio_o_reflexio1, accio_o_reflexio2 = make_vector(len_llibres_usuari, len_llibres_recomanats, 1, 2, "accio_o_reflexio")
    epoca1, epoca2 = make_vector(len_llibres_usuari, len_llibres_recomanats, 1, 3, "epoca")
    detall_cientific1, detall_cientific2 = make_vector(len_llibres_usuari, len_llibres_recomanats, 1, 3, "detall_cientific")

    for i in range(len_llibres_usuari):
        llibre_id_usuari = llibres[llibres["book_id"] == int(row['llibres_usuari'][i])].index[0]
        actualitzar_diccionaris(llibre_id_usuari, estil_literari1[i], estil_literari)
        actualitzar_diccionaris(llibre_id_usuari, complexitat1[i], complexitat)
        actualitzar_diccionaris(llibre_id_usuari, caracteristiques1[i], caracteristiques)
        actualitzar_diccionaris(llibre_id_usuari, desenvolupament_del_personatge1[i], desenvolupament_del_personatge)
        actualitzar_diccionaris(llibre_id_usuari, accio_o_reflexio1[i], accio_o_reflexio)
        actualitzar_diccionaris(llibre_id_usuari, epoca1[i], epoca)
        actualitzar_diccionaris(llibre_id_usuari, detall_cientific1[i], detall_cientific)

    for i in range(len_llibres_recomanats):
        llibre_id_recomanat = llibres[llibres["book_id"] == int(row['llibres_recomanats'][i])].index[0]
        actualitzar_diccionaris(llibre_id_recomanat, estil_literari2[i], estil_literari)
        actualitzar_diccionaris(llibre_id_recomanat, complexitat2[i], complexitat)
        actualitzar_diccionaris(llibre_id_recomanat, caracteristiques2[i], caracteristiques)
        actualitzar_diccionaris(llibre_id_recomanat, desenvolupament_del_personatge2[i], desenvolupament_del_personatge)
        actualitzar_diccionaris(llibre_id_recomanat, accio_o_reflexio2[i], accio_o_reflexio)
        actualitzar_diccionaris(llibre_id_recomanat, epoca2[i], epoca)
        actualitzar_diccionaris(llibre_id_recomanat, detall_cientific2[i], detall_cientific)

In [27]:
# Choose the most voted value for each book
for i in range(len(llibres)):
    if len(estil_literari[i]) > 0:
        estil_literari[i] = max(estil_literari[i], key=estil_literari[i].get)
    if len(complexitat[i]) > 0:
        complexitat[i] = max(complexitat[i], key=complexitat[i].get)
    if len(caracteristiques[i]) > 0:
        caracteristiques[i] = max(caracteristiques[i], key=caracteristiques[i].get)
    if len(desenvolupament_del_personatge[i]) > 0:
        desenvolupament_del_personatge[i] = max(desenvolupament_del_personatge[i], key=desenvolupament_del_personatge[i].get)
    if len(accio_o_reflexio[i]) > 0:
        accio_o_reflexio[i] = max(accio_o_reflexio[i], key=accio_o_reflexio[i].get)
    if len(epoca[i]) > 0:
        epoca[i] = max(epoca[i], key=epoca[i].get)
    if len(detall_cientific[i]) > 0:
        detall_cientific[i] = max(detall_cientific[i], key=detall_cientific[i].get)

In [28]:
print(len(estil_literari))
print(len(llibres))

15666
15666


In [29]:
# Afegir les noves columnes al DataFrame
llibres["estil_literari"] = estil_literari
llibres["complexitat"] = complexitat
llibres["caracteristiques"] = caracteristiques
llibres["desenvolupament_del_personatge"] = desenvolupament_del_personatge
llibres["accio_o_reflexio"] = accio_o_reflexio
llibres["epoca"] = epoca
llibres["detall_cientific"] = detall_cientific

In [30]:
llibres.to_pickle(pkl_name_ll)
llibres.to_csv(csv_name_ll, index=False)

In [43]:
# FUNCIÓ PER ELIMINAR DE SIMILARS AQUELLS LLIBRES QUE NO ESTAN A LA BASE DE DADES
# Carregar les dades del CSV
df = pd.read_csv(csv_name_ll)

# Funció per convertir la cadena de la llista en una llista real i netejar-la
def neteja_similars(similars, ids_valids):
    # Convertir la cadena a una llista
    similars_list = similars.strip("[]").replace("'", "").split(", ")
    # Mantenir només els IDs que estan presents en ids_valids
    return [id for id in similars_list if id in ids_valids]

# Obtenir els book_id com a conjunt per a una cerca més ràpida
ids_valids = set(df['book_id'].astype(str))
df['similar_books'] = df['similar_books'].apply(lambda x: neteja_similars(x, ids_valids))

# Funció per assignar 'noisbn'
def assigna_noisbn(valor):
    if pd.isna(valor) or valor == 'NaN':
        return 'noisbn'
    else:
        return valor

# Aplicar la funció a les columnes isbn i isbn13
df['isbn'] = df['isbn'].apply(assigna_noisbn)
df['isbn13'] = df['isbn13'].apply(assigna_noisbn)


# Funció per assignar 'no_identificat' a language_code
def assigna_noidentificat(valor):
    if valor == 'eng' or valor == 'en-US' or valor == 'en-GB' or valor == 'en-CA' or valor == 'en':
        return 'en'
    if pd.isna(valor) or valor == 'NaN' or valor =='--':
        return 'no_identificat'
    else:
        return valor

# Aplicar la funció a les columnes isbn i isbn13
df['language_code'] = df['language_code'].apply(assigna_noidentificat)

df.to_csv(csv_name_ll, index=False)
df.to_pickle(pkl_name_ll)

In [32]:
df = pd.read_pickle(pkl_name_ll)
# imprimir les diferents categories de "format"
print(df['format'].unique())

['ebook' 'tapa blanda' 'audio' 'tapa dura']


In [33]:
# Unificar les diferents categories de audio
df['format'] = df['format'].apply(lambda x: 'audio' if x in ['Audible Audio', 'Audio CD', 'Audio Cassette', 'Audio', 'Audiobook', 'audio cd', 'MP3 CD'] else x)
# Unificar les diferents categories de ebook
df['format'] = df['format'].apply(lambda x: 'ebook' if x in ['ebook', 'Kindle Edition', 'HTML', 'Kindle', 'chapbook/ebook', 'Serialized Digital Download'] else x)
# Unificar les diferents categories de paper
df['format'] = df['format'].apply(lambda x: 'tapa blanda' if x in ['Paperback', 'Mass Market Paperback', 'paper', 'Trade Paperback', 'pocket', 'Softcover', 'Trade paperback', 'Paper Back', 'Perfect Paperback', 'paperback', 'Trade Paper', 'Paberback', 'Softcover with Flap', 'Tapa blanda con solapas'] else x)
# Unificar les diferents categories de hardcover
df['format'] = df['format'].apply(lambda x: 'tapa dura' if x in ['Hardcover', 'Board book', 'Board Book', 'Hardback', 'hardcover', 'issue', 'Broche', 'Klappenbroschur', 'Nook', 'Library Binding', 'Gebunden', 'Wen Ku', 'Leather Bound', 'Musc. Supplies', 'Podiobook', 'Brossura', 'Nook Book', 'Spiral-bound', 'Novelty Book', 'Glf `dy,', 'Misc. Supplies', 'Broschiert', 'Unknown Binding'] else x)
# Unificar les diferents categories de comic
df['format'] = df['format'].apply(lambda x: 'tapa blanda' if x in ['comics', 'Thirteen interactive chapters.', 'Graphic Novel', 'Digital comic', 'Comic Book'] else x)
# Poner los nan a las otras categorias siguiendo la distribución de las categorias actuales
df['format'] = df['format'].apply(lambda x: np.random.choice(['tapa blanda', 'tapa dura', 'ebook']) if pd.isna(x) else x)

In [34]:
# imprimir les diferents categories de "format"
print(df['format'].unique())
# Print how many books there are for each format
print(df['format'].value_counts())

['ebook' 'tapa blanda' 'audio' 'tapa dura']
format
tapa blanda    7802
tapa dura      4916
ebook          2799
audio           149
Name: count, dtype: int64


In [35]:
df.to_csv(csv_name_ll, index=False)
df.to_pickle(pkl_name_ll)

In [36]:
# Cambiamos los valores menores a 1.5 en average_rating a la media
df['average_rating'] = df['average_rating'].apply(lambda x: np.random.uniform(1.5, 5) if x < 1.5 else x)
df.to_csv(csv_name_ll, index=False)
df.to_pickle(pkl_name_ll)

In [37]:
# Check value count for num_pages including nan
print(df['num_pages'].value_counts(dropna=False))

num_pages
NaN       2135
320.0      300
288.0      256
304.0      245
336.0      241
          ... 
677.0        1
615.0        1
2201.0       1
3164.0       1
649.0        1
Name: count, Length: 941, dtype: int64


In [38]:
import requests

def obtener_info_libro(isbn):
    base_url = "https://openlibrary.org/api/books"
    params = {
        "bibkeys": f"ISBN:{isbn}",
        "format": "json",
        "jscmd": "data",
    }

    try:
        response = requests.get(base_url, params=params)
        data = response.json()

        if f"ISBN:{isbn}" in data:
            book_info = data[f"ISBN:{isbn}"]
            return book_info  # Devuelve todos los campos disponibles
        else:
            return "No se encontró información para ese ISBN."
    except Exception as e:
        return f"Error: {str(e)}"

# Para los libros que no tienen num_pages, obtener el número de páginas de OpenLibrary
for index, row in df.iterrows():
    if pd.isna(row['num_pages']):
        isbn = row['isbn13']
        if isbn != 'noisbn':
            info_libro = obtener_info_libro(isbn)
            if info_libro != "No se encontró información para ese ISBN.":
                try:
                    num_pages = info_libro['number_of_pages']
                    df.at[index, 'num_pages'] = num_pages
                    print(f"ISBN: {isbn} - Pages: {num_pages}")
                except:
                    print(f"ISBN: {isbn} - No info")
            else:
                print(f"ISBN: {isbn} - No info 2")

ISBN: 9780800759490 - Pages: 207
ISBN: 9781632150066 - No info
ISBN: 9780316021555 - Pages: 448
ISBN: 9780765312273 - No info
ISBN: 9781743580158 - No info 2
ISBN: 9783442723430 - Pages: 636
ISBN: 9780312331467 - Pages: 308
ISBN: 9780345451323 - Pages: 340
ISBN: 9781585679119 - Pages: 206
ISBN: 9780826328090 - Pages: 228


KeyboardInterrupt: 

In [None]:
import requests

def obtener_numero_de_paginas(isbn):
    # Construir la URL para la consulta a la API
    url = f"https://www.googleapis.com/books/v1/volumes?q=isbn:{isbn}"

    # Realizar la solicitud
    respuesta = requests.get(url)

    # Verificar si la solicitud fue exitosa
    if respuesta.status_code == 200:
        datos = respuesta.json()

        # Obtener la información del libro
        if "items" in datos:
            libro = datos["items"][0]
            if "pageCount" in libro["volumeInfo"]:
                return libro["volumeInfo"]["pageCount"]
            else:
                return "Número de páginas no disponible"
        else:
            return "ISBN no encontrado"
    else:
        return "Error en la solicitud"

# Para los libros que no tienen num_pages, obtener el número de páginas de OpenLibrary
for index, row in df.iterrows():
    if pd.isna(row['num_pages']):
        isbn = row['isbn13']
        if isbn != 'noisbn':
            num_pag = obtener_numero_de_paginas(isbn)
            if num_pag != "Error en la solicitud" or num_pag != "ISBN no encontrado" or num_pag != "Número de páginas no disponible":
                df.at[index, 'num_pages'] = num_pag
                print(f"ISBN: {isbn} - Pages: {num_pag}")
            else:
                print(f"ISBN: {isbn} - No info 2")



# Lista de ISBNs
isbns = ["9780321534965", "9780321563842", "otro_isbn"]

# Tu API Key
api_key = "TU_API_KEY"

# Consultar cada ISBN y obtener el número de páginas
for isbn in isbns:
    print(f"ISBN: {isbn} - Número de páginas: {obtener_numero_de_paginas(isbn, api_key)}")


In [None]:
# Check value count for num_pages including nan
print(df['num_pages'].value_counts(dropna=False))

num_pages
NaN       1375
320.0      323
288.0      274
304.0      266
336.0      256
          ... 
706.0        1
1248.0       1
4081.0       1
1116.0       1
649.0        1
Name: count, Length: 952, dtype: int64
