In [70]:
import re
import json
from pathlib import Path
import psycopg2
import pandas as pd

In [71]:
# Read and transform the JSON-like data

text = Path("task1_d.json").read_text(encoding='utf-8')
data_transformed = re.sub(r":(\w+)=>", r'"\1":', text)  
books = json.loads(data_transformed)

In [79]:
print(books[:10])

[{'id': 10292064894005717421, 'title': 'Look Homeward, Angel', 'author': 'Prof. Teressa Kautzer', 'genre': 'Humor', 'publisher': 'Brill Publishers', 'year': 2010, 'price': '$87.25'}, {'id': 13029911509625386835, 'title': 'The Yellow Meads of Asphodel', 'author': 'Domingo Weimann', 'genre': 'Reference book', 'publisher': 'Sams Publishing', 'year': 2018, 'price': '$31.99'}, {'id': 12880574241579659568, 'title': 'A Catskill Eagle', 'author': 'Dayle Orn', 'genre': 'Comic/Graphic Novel', 'publisher': 'Apress', 'year': 2011, 'price': '€5.99'}, {'id': 13301315742612799364, 'title': 'Der Richter und sein Henker', 'author': 'Elias von Kolb', 'genre': 'Tall tale', 'publisher': 'Centaurus Verlag', 'year': 1995, 'price': '$75.00'}, {'id': 16372759776603821045, 'title': 'After Many a Summer Dies the Swan', 'author': 'Carter Legros', 'genre': 'Metafiction', 'publisher': 'University of Minnesota Press', 'year': 2004, 'price': '$52.0'}, {'id': 16225690258143553542, 'title': 'The Man Within', 'author':

In [80]:
# Connect to the PostgreSQL database

conn = psycopg2.connect(
    dbname="booksdb",
    user="postgres",
    password="12345",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

In [81]:
# Create the books table

cur.execute("""
CREATE TABLE books (
    id NUMERIC PRIMARY KEY,
    title TEXT,
    author TEXT,
    genre TEXT,
    publisher TEXT,
    year INT,
    price TEXT
);
""")

In [82]:
# Insert data into the books table

for book in books:
    cur.execute("""
        INSERT INTO books (id, title, author, genre, publisher, year, price)
        VALUES (%s, %s, %s, %s, %s, %s, %s)
    """, (
        book.get("id"),
        book.get("title"),
        book.get("author"),
        book.get("genre"),
        book.get("publisher"),
        book.get("year"),
        book.get("price")
    ))

conn.commit()

In [83]:
# Query the database and load results into a DataFrame to display the summary table

sql = """
    SELECT
        year AS publication_year,
        COUNT(*) AS book_count,
        ROUND(
            AVG(
                CASE
                    WHEN price LIKE '%$%' THEN CAST(REPLACE(price, '$', '') AS NUMERIC)
                    ELSE CAST(REPLACE(price, '€', '') AS NUMERIC) * 1.2
                END
            ), 2
        ) AS average_price_usd
    FROM books
    GROUP BY year
    ORDER BY year;
"""

df = pd.read_sql_query(sql, conn)

print(df)

    publication_year  book_count  average_price_usd
0               1871          43              48.08
1               1883          56              52.51
2               1886          54              54.73
3               1904          37              54.74
4               1905          59              50.62
5               1938          42              46.76
6               1955          49              54.83
7               1958          32              44.17
8               1986         104              45.63
9               1987         120              50.44
10              1988         153              49.91
11              1989         103              50.39
12              1990         122              52.12
13              1991          94              50.01
14              1992         101              50.46
15              1993         114              51.80
16              1994         131              50.55
17              1995         112              44.93
18          

  df = pd.read_sql_query(sql, conn)


In [84]:
cur.close()
conn.close()