In [1]:
import sqlite3
import pandas as pd

# Connect to SQLite database
database_path="../Datasets/database/longlist.db"
conn = sqlite3.connect(database_path)

# Create a cursor object
cursor = conn.cursor()

In [2]:
# Get database info: List all tables
tables_query = "SELECT name FROM sqlite_master WHERE type='table';"
tables = pd.read_sql(tables_query, conn)
print("Database Tables:\n", tables)

Database Tables:
           name
0      authors
1     authored
2        books
3   publishers
4      ratings
5  translators
6   translated


In [3]:
# Get column info for the 'episodes' table
table="authors"
columns_query = f"PRAGMA table_info({table});"
columns = pd.read_sql(columns_query, conn)
print("\nTable Schema for 'episodes':\n", columns)


Table Schema for 'episodes':
    cid     name     type  notnull dflt_value  pk
0    0       id  INTEGER        0       None   1
1    1     name     TEXT        0       None   0
2    2  country     TEXT        0       None   0
3    3    birth  INTEGER        0       None   0


In [7]:
# Close connection
conn.close()

### Loading from SQL file

In [16]:
conn = sqlite3.connect(database_path)
cursor = conn.cursor()
query_path="Queries/longlist/"

In [None]:
# load from a file
query="load_test.sql"
qpath=query_path+query

# load query
with open(qpath, "r") as file:
    query=file.read()
# execute query
df = pd.read_sql(query, conn)
print("---", df)
conn.close()

---     profession                  name
0   translator    Adrian Nathan West
1   translator     Alison L. Strayer
2   translator          Angela Rodel
3   translator  Aniruddhan Vasudevan
4   translator      Anna Moschovakis
..         ...                   ...
69  translator      Sora Kim-Russell
70  translator        Stephen Snyder
71  translator       Susan Bernofsky
72  translator          Tiffany Tsao
73  translator     Ngũgĩ wa Thiong'o

[74 rows x 2 columns]


### General

In [None]:
# Connect to the database
conn = sqlite3.connect(database_path)
cursor = conn.cursor()

# Query 1: Select all publishers
df_publishers = pd.read_sql("SELECT * FROM publishers", conn)
print("\nPublishers:\n", df_publishers)

# Query 2: Find ID of 'MacLehose Press'
df_publisher_id = pd.read_sql("SELECT id FROM publishers WHERE publisher = 'MacLehose Press'", conn)
print("\nMacLehose Press ID:\n", df_publisher_id)

# Query 3: Find books published by MacLehose Press
df_books = pd.read_sql("SELECT title FROM books WHERE publisher_id = 12", conn)
print("\nBooks by MacLehose Press:\n", df_books)

# Query 4: Find books by nested query
df_books_nested = pd.read_sql("""
    SELECT title FROM books WHERE publisher_id = (
        SELECT id FROM publishers WHERE publisher = 'MacLehose Press'
    )""", conn)
print("\nBooks by MacLehose Press (Nested Query):\n", df_books_nested)

# Query 5: Find ratings for 'In Memory of Memory'
df_ratings = pd.read_sql("""
    SELECT rating FROM ratings WHERE book_id = (
        SELECT id FROM books WHERE title = 'In Memory of Memory'
    )""", conn)
print("\nRatings for 'In Memory of Memory':\n", df_ratings)

# Query 6: Find the average rating for 'In Memory of Memory'
df_avg_rating = pd.read_sql("""
    SELECT AVG(rating) FROM ratings WHERE book_id = (
        SELECT id FROM books WHERE title = 'In Memory of Memory'
    )""", conn)
print("\nAverage Rating for 'In Memory of Memory':\n", df_avg_rating)

# Query 7: Find author of 'Flights'
df_author = pd.read_sql("""
    SELECT name FROM authors WHERE id = (
        SELECT author_id FROM authored WHERE book_id = (
            SELECT id FROM books WHERE title = 'Flights'
        )
    )""", conn)
print("\nAuthor of 'Flights':\n", df_author)

# Query 8: Find books by 'Fernanda Melchor'
df_books_melchor = pd.read_sql("""
    SELECT title FROM books WHERE id IN (
        SELECT book_id FROM authored WHERE author_id = (
            SELECT id FROM authors WHERE name = 'Fernanda Melchor'
        )
    )""", conn)
print("\nBooks by Fernanda Melchor:\n", df_books_melchor)

# Query 9: Find books with 'love' in the title
df_love_books = pd.read_sql("SELECT title FROM books WHERE title LIKE '%love%'", conn)
print("\nBooks with 'love' in title:\n", df_love_books)

# Query 10: Find books starting with 'The'
df_the_books = pd.read_sql("SELECT title FROM books WHERE title LIKE 'The %'", conn)
print("\nBooks starting with 'The':\n", df_the_books)

# Query 11: Find books matching uncertain title spelling
df_uncertain_book = pd.read_sql("SELECT title FROM books WHERE title LIKE 'P_re'", conn)
print("\nBooks matching uncertain spelling:\n", df_uncertain_book)

# Query 12: Find average rating of all books
df_avg_all = pd.read_sql("SELECT ROUND(AVG(rating), 2) AS 'Average Rating' FROM ratings", conn)
print("\nAverage rating of all books:\n", df_avg_all)

# Query 13: Find maximum and minimum rating
df_max_rating = pd.read_sql("SELECT MAX(rating) FROM ratings", conn)
df_min_rating = pd.read_sql("SELECT MIN(rating) FROM ratings", conn)
print("\nMaximum rating:\n", df_max_rating)
print("\nMinimum rating:\n", df_min_rating)

# Query 14: Count total votes (pages)
df_total_pages = pd.read_sql("SELECT SUM(pages) FROM books", conn)
print("\nTotal number of pages:\n", df_total_pages)

# Query 15: Count total books and translators
df_total_books = pd.read_sql("SELECT COUNT(*) FROM books", conn)
df_total_translators = pd.read_sql("SELECT COUNT(name) FROM translators", conn)
print("\nTotal number of books:\n", df_total_books)
print("\nTotal number of translators:\n", df_total_translators)

# Query 16: Count unique publishers
df_unique_publishers = pd.read_sql("SELECT COUNT(DISTINCT publisher) FROM publishers", conn)
print("\nTotal unique publishers:\n", df_unique_publishers)

# Close connection
conn.close()


### Sets

In [None]:
# Connect to the database
conn = sqlite3.connect(database_path)
cursor = conn.cursor()

# Query 1: UNION - Combines authors and translators into one result set
df_union = pd.read_sql("""
    SELECT 'author' AS profession, name FROM authors
    UNION
    SELECT 'translator' AS profession, name FROM translators;
""", conn)
print("\nAuthors and Translators:\n", df_union)

# Query 2: INTERSECT - Finds authors who are also translators
df_intersect = pd.read_sql("""
    SELECT name FROM authors
    INTERSECT
    SELECT name FROM translators;
""", conn)
print("\nAuthors who are also Translators:\n", df_intersect)

# Query 3: Finds books translated by Sophie Hughes
df_sophie_books = pd.read_sql("""
    SELECT book_id FROM translated WHERE translator_id = (
        SELECT id FROM translators WHERE name = 'Sophie Hughes'
    );
""", conn)
print("\nBooks translated by Sophie Hughes:\n", df_sophie_books)

# Query 4: Finds books translated by Margaret Jull Costa
df_margaret_books = pd.read_sql("""
    SELECT book_id FROM translated WHERE translator_id = (
        SELECT id FROM translators WHERE name = 'Margaret Jull Costa'
    );
""", conn)
print("\nBooks translated by Margaret Jull Costa:\n", df_margaret_books)

# Query 5: INTERSECT - Finds books translated by both Sophie Hughes and Margaret Jull Costa
df_common_books = pd.read_sql("""
    SELECT book_id FROM translated WHERE translator_id = (
        SELECT id FROM translators WHERE name = 'Sophie Hughes'
    )
    INTERSECT
    SELECT book_id FROM translated WHERE translator_id = (
        SELECT id FROM translators WHERE name = 'Margaret Jull Costa'
    );
""", conn)
print("\nBooks translated by both Sophie Hughes and Margaret Jull Costa:\n", df_common_books)

# Query 6: Retrieves the titles of books translated by both translators
df_common_titles = pd.read_sql("""
    SELECT title FROM books WHERE id IN (
        SELECT book_id FROM translated WHERE translator_id = (
            SELECT id FROM translators WHERE name = 'Sophie Hughes'
        )
        INTERSECT
        SELECT book_id FROM translated WHERE translator_id = (
            SELECT id FROM translators WHERE name = 'Margaret Jull Costa'
        )
    );
""", conn)
print("\nTitles of books translated by both Sophie Hughes and Margaret Jull Costa:\n", df_common_titles)

# Query 7: EXCEPT - Finds translators who are not authors
df_translators_not_authors = pd.read_sql("""
    SELECT name FROM translators
    EXCEPT
    SELECT name FROM authors;
""", conn)
print("\nTranslators who are not Authors:\n", df_translators_not_authors)

# Close the connection
conn.close()


### Groups

In [None]:
# Connect to the database
conn = sqlite3.connect(database_path)
cursor = conn.cursor()

# Query 1: Finds average rating for each book
df_avg_ratings = pd.read_sql("""
    SELECT book_id, ROUND(AVG(rating), 2) AS "average rating" FROM ratings
    GROUP BY book_id;
""", conn)
print("\nAverage rating for each book:\n", df_avg_ratings)

# Query 2: Joins titles with average ratings
df_avg_ratings_with_titles = pd.read_sql("""
    SELECT title, ROUND(AVG(rating), 2) AS "average rating" FROM ratings
    JOIN books ON books.id = ratings.book_id
    GROUP BY book_id;
""", conn)
print("\nAverage rating per book title:\n", df_avg_ratings_with_titles)

# Query 3: Chooses books with a rating of 4.0 or higher
df_high_rated_books = pd.read_sql("""
    SELECT title, ROUND(AVG(rating), 2) AS "average rating" FROM ratings
    JOIN books ON books.id = ratings.book_id
    GROUP BY book_id
    HAVING "average rating" > 4.0;
""", conn)
print("\nBooks with an average rating above 4.0:\n", df_high_rated_books)

# Query 4: Chooses books with a rating of 4.0 or higher, ordered by rating
df_high_rated_books_ordered = pd.read_sql("""
    SELECT book_id, ROUND(AVG(rating), 2) AS "average rating" FROM ratings
    GROUP BY book_id
    HAVING "average rating" > 4.0
    ORDER BY "average rating" DESC;
""", conn)
print("\nBooks with an average rating above 4.0, ordered by rating:\n", df_high_rated_books_ordered)

# Close the connection
conn.close()

### Nested

In [None]:
# Connect to the database
conn = sqlite3.connect(database_path)
cursor = conn.cursor()

# Query 1: Finds all books published by MacLehose Press
df_maclehose_books = pd.read_sql("""
    SELECT title FROM books WHERE publisher_id = (
        SELECT id FROM publishers WHERE publisher = 'MacLehose Press'
    );
""", conn)
print("\nBooks published by MacLehose Press:\n", df_maclehose_books)

# Query 2: Finds all ratings for "In Memory of Memory"
df_memory_ratings = pd.read_sql("""
    SELECT rating FROM ratings WHERE book_id = (
        SELECT id FROM books WHERE title = 'In Memory of Memory'
    );
""", conn)
print("\nRatings for 'In Memory of Memory':\n", df_memory_ratings)

# Query 3: Finds average rating for "In Memory of Memory"
df_memory_avg_rating = pd.read_sql("""
    SELECT AVG(rating) FROM ratings WHERE book_id = (
        SELECT id FROM books WHERE title = 'In Memory of Memory'
    );
""", conn)
print("\nAverage rating for 'In Memory of Memory':\n", df_memory_avg_rating)

# Query 4: Finds author who wrote "The Birthday Party"
df_birthday_author = pd.read_sql("""
    SELECT name FROM authors WHERE id = (
        SELECT author_id FROM authored WHERE book_id = (
            SELECT id FROM books WHERE title = 'The Birthday Party'
        )
    );
""", conn)
print("\nAuthor of 'The Birthday Party':\n", df_birthday_author)

# Query 5: Finds all books by Fernanda Melchor
df_melchor_books = pd.read_sql("""
    SELECT title FROM books WHERE id IN (
        SELECT book_id FROM authored WHERE author_id = (
            SELECT id FROM authors WHERE name = 'Fernanda Melchor'
        )
    );
""", conn)
print("\nBooks by Fernanda Melchor:\n", df_melchor_books)

# Query 6: Finds books by multiple authors (Fernanda Melchor, Annie Ernaux)
df_multiple_authors_books = pd.read_sql("""
    SELECT title FROM books WHERE id IN (
        SELECT book_id FROM authored WHERE author_id IN (
            SELECT id FROM authors WHERE name IN ('Fernanda Melchor', 'Annie Ernaux')
        )
    );
""", conn)
print("\nBooks by Fernanda Melchor or Annie Ernaux:\n", df_multiple_authors_books)

# Close the connection
conn.close()
