In [None]:
import pandas as pd
import glob
import os
import datetime as dt
import shutil

from datetime import datetime
from google.colab import files

import requests
from bs4 import BeautifulSoup
import json

In [None]:
class Book:
    def __init__(self, name='', author='', rate=0, opinion='', read_date='', shelves=[], average_rate=0, href='', isbn=''):
        self.name = name
        self.author = author
        self.rate = rate
        self.opinion = opinion
        self.read_date = read_date
        self.shelves = shelves
        self.average_rate = average_rate
        self.href = href
        self.isbn = isbn

def get_pages_count():
    url = 'https://lubimyczytac.pl/profil/ksiazki'
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    pages_info = soup.select_one('.paginationList__info span').get_text()
    pages_count = int(pages_info.split()[-1])
    return pages_count

def get_books_details(hrefs):
    books_details = {}
    for href in hrefs:
        response = requests.get(href)
        soup = BeautifulSoup(response.text, 'html.parser')
        isbn = soup.find('meta', property='books:isbn')['content']
        author = soup.find('meta', property='books:author')['content']
        rating = int(soup.find('meta', property='books:rating:value')['content'])
        books_details[href] = {'isbn': isbn, 'author': author, 'rating': rating, 'href': href}
    return books_details

def import_books():
    pages_count = get_pages_count()
    obj_id = input("Enter your object ID: ")

    books = {}
    for page_num in range(1, pages_count + 1):
        url = f'https://lubimyczytac.pl/profil/ksiazki?page={page_num}&listId=booksFilteredList&showFirstLetter=0&paginatorType=Standard&porzadek=malejaco&own=1&objectId={obj_id}&own=1&paginatorType=Standard'
        response = requests.get(url)
        soup = BeautifulSoup(response.json()['data']['content'], 'html.parser')

        book_nodes = soup.select('.authorAllBooks__single')
        for book_node in book_nodes:
            title = book_node.select_one('.authorAllBooks__singleTextTitle').text.strip()
            href = book_node.select_one('.authorAllBooks__singleTextTitle')['href']
            author = book_node.select_one('.authorAllBooks__singleTextAuthor').get_text(strip=True)
            shelves = [shelf.get_text(strip=True) for shelf in book_node.select('.authorAllBooks__singleTextShelfRight a')]
            rates = [rate.get_text(strip=True) for rate in book_node.select('.listLibrary__ratingStarsNumber')]
            try:
                opinion = book_node.select_one('.comment-cloud__body .p-collapsed').text.strip()
            except AttributeError:
                opinion = ''
            try:
                read_date = book_node.select_one(".authorAllBooks__singleImg div").find_all('div')[1].text.strip()
            except AttributeError:
                read_date = ''
            books[href] = Book(title, author, int(rates[0]), opinion, read_date, shelves, int(rates[1]), href)

    books_details = get_books_details(books.keys())
    for href, details in books_details.items():
        books[href].isbn = details['isbn']

    return list(books.values())

if __name__ == "__main__":
    books_data = import_books()
    with open('lc_books.json', 'w', encoding='utf-8') as json_file:
        json.dump(books_data, json_file, ensure_ascii=False, indent=4)
    print("Data has been exported to 'lc_books.json'.")


In [None]:
uploaded_filename = 'lc_books.json'

# Check if a file with the same name exists and delete it if it exists
if os.path.exists(uploaded_filename):
    creation_time = os.path.getctime(uploaded_filename)
    creation_date = dt.datetime.fromtimestamp(creation_time).strftime('%Y-%m-%d %H:%M:%S')

    os.remove(uploaded_filename)
    print(f"Deleted the existing file {uploaded_filename} created on {creation_date}")

# Upload a new version of the file
uploaded = files.upload()

lc_books_path = uploaded_filename

lc_books = f'/content/drive/MyDrive/Dane z aplikacji/Lubimyczytac/{uploaded_filename}'
shutil.copy(lc_books_path, lc_books)

print(f"The file {uploaded_filename} has been moved to appropriate folder in Google Drive.")


In [None]:
df = pd.read_json(lc_books)
df.columns

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Import data from a JSON file
df = pd.read_json(lc_books)
df = df[df['shelves'] != '[Chcę przeczytać]']

# Convert the 'read_date' column to a date format
df['read_date'] = pd.to_datetime(df['read_date'])

# General statistics
total_books_read = len(df)
#overall_average_rating = df['rate'].mean()

# Statistics for each year
df['year'] = df['read_date'].dt.year
books_read_per_year = df['year'].value_counts().sort_index()
yearly_average_rating = df.groupby('year')['rate'].mean()

# Top 10 most frequently appearing authors
top_10_authors = df['author'].value_counts().head(10)

# Plot the number of books read per year
books_read_per_year.plot(kind='bar', figsize=(10, 6))
plt.title('Number of Books Read Each Year')
plt.xlabel('Year')
plt.ylabel('Number of Books')
plt.show()

# Plot the yearly average rating
yearly_average_rating.plot(kind='line', figsize=(10, 6))
plt.title('Yearly Average Rating of Read Books')
plt.xlabel('Year')
plt.ylabel('Average Rating')
plt.show()

# Display the results
print(f'Total books read: {total_books_read}')
#print(f'Overall average rating: {overall_average_rating:.2f}')
print('\nNumber of books read per year:')
print(books_read_per_year)
print('\nYearly average rating:')
print(yearly_average_rating)
print('\nTop 10 authors:')
print(top_10_authors)
