# Advanced Database Project
### Group 22
* Tommaso Tragno - fc64699
* Duarte Gonçalves - fc64465
* Agnieszka Radomska - fc64357
* Enzo Chatalov - fc54414

In [1]:
import pandas as pd
import json
import pymongo as pm
import mysql.connector
import time
from sqlalchemy import create_engine,text
from sqlalchemy.exc import PendingRollbackError

## Configuration file

create a `config.json` file with the following structure:

```
{
    "mongo": {
        "username": "your_mongo_username",
        "password": "your_mongo_password",
        "host": "your_mongo_host",
        "port": "your_mongo_port"
    },
    "mysql": {
        "username": "your_mysql_username",
        "password": "your_mysql_password",
        "host": "your_mysql_host",
        "port": "your_mysql_port"
    }
}
```

In [2]:
# load configuration file with password for mongoDB and mySQL
with open('config.json', 'r') as f:
    config = json.load(f)

# Extract username and password for MongoDB and MySQL
mongo_username = config["mongo"]["username"]
mongo_password = config["mongo"]["password"]
mongo_host = config["mongo"]["host"]
mongo_port = config["mongo"]["port"]
mysql_username = config["mysql"]["username"]
mysql_password = config["mysql"]["password"]
mysql_host = config["mysql"]["host"]
mysql_port = config["mysql"]["port"]

In [10]:
# Path specification
path = './kagglehub/datasets/arashnic/book-recommendation-dataset/versions/3'

## Data validation
1. Load the `.csv` file from the path specified;
2. Drop the rows that do not contains a primary key
3. Fill the `na` cells with a predefined value
4. Drop eventualy doplicates
5. Convert the string data into the proper data type

In [11]:
# Load dataset into pandas dataframe
df_books = pd.read_csv(f'{path}/Books.csv')
df_ratings = pd.read_csv(f'{path}/Ratings.csv')
df_users = pd.read_csv(f'{path}/Users.csv')

print('Check NA values presence before data validation:')
print(f'Books data frame: {df_books.isna().any().any()}')
print(f'Ratings data frame: {df_ratings.isna().any().any()}')
print(f'Users data frame: {df_users.isna().any().any()}')

print('\nCheck missing values for primary key columns:')
# Users data validation
orig = df_users.shape[0]
df_users = df_users.dropna(subset=['User-ID'])
count = orig - df_users.shape[0]
print(f'Dropped {count} rows form Users')
df_users = df_users.fillna({'Location': 'not available', 'Age': '0'})

# Books data validation
orig = df_books.shape[0]
df_books = df_books.dropna(subset=['ISBN'])
count = orig - df_books.shape[0]
print(f'Dropped {count} rows from Books')
df_books = df_books.fillna({
    'Book-Title': 'not available', 
    'Book-Author': 'not available', 
    'Year-Of-Publication': '0',
    'Publisher': 'not available', 
    'Image-URL-S': 'not available', 
    'Image-URL-M': 'not available', 
    'Image-URL-L': 'not available'
})

# Ratings data validation
orig = df_ratings.shape[0]
df_ratings = df_ratings.dropna(subset=['User-ID', 'ISBN'])
count = orig - df_ratings.shape[0]
print(f'Dropped {count} rows from Ratings')
df_ratings = df_ratings.fillna({'Book-Rating': '0'})

# Tu wlatuje FIX3
print('\nCheck duplicated rows:')
orig = df_users.shape[0]
df_users = df_users.drop_duplicates()
count = orig - df_users.shape[0]
print(f'Dropped {count} duplicated rows form Users')
orig = df_books.shape[0]
df_books = df_books.drop_duplicates()
count = orig - df_books.shape[0]
print(f'Dropped {count} duplicated rows form Books')
orig = df_ratings.shape[0]
df_ratings = df_ratings.drop_duplicates()
count = orig - df_ratings.shape[0]
print(f'Dropped {count} duplicated rows form Ratings')

# data type conversion
df_users['User-ID'] = pd.to_numeric(df_users['User-ID'], errors='coerce').fillna(0).astype(int)
df_users['Age'] = pd.to_numeric(df_users['Age'], errors='coerce').fillna(0).astype(int)

df_ratings['User-ID'] = pd.to_numeric(df_ratings['User-ID'], errors='coerce').fillna(0).astype(int)
df_ratings['Book-Rating'] = pd.to_numeric(df_ratings['Book-Rating'], errors='coerce').fillna(0).astype(int)

df_books['Year-Of-Publication'] = pd.to_numeric(df_books['Year-Of-Publication'], errors='coerce').fillna(0).astype(int)


print('\nCheck NA values presence after data validation:')
print(f'Books data frame: {df_books.isna().any().any()}')
print(f'Ratings data frame: {df_ratings.isna().any().any()}')
print(f'Users data frame: {df_users.isna().any().any()}')

Check NA values presence before data validation:
Books data frame: True
Ratings data frame: False
Users data frame: True

Check missing values for primary key columns:
Dropped 0 rows form Users
Dropped 0 rows from Books
Dropped 0 rows from Ratings

Check duplicated rows:
Dropped 0 duplicated rows form Users
Dropped 0 duplicated rows form Books
Dropped 0 duplicated rows form Ratings

Check NA values presence after data validation:
Books data frame: False
Ratings data frame: False
Users data frame: False


# MongoDB
## Connects and populate the No-SQL database

In [3]:
# Connect to MongoDB locally
client = pm.MongoClient(f'mongodb://{mongo_host}:{mongo_port}',
                             username = mongo_username,
                             password = mongo_password)

Run this if the database is already populated

In [None]:
db = client["project"]

books = db["books"]
ratings = db["ratings"]
users = db["users"]

Create a new collection for different structure tests

In [13]:
books_new = db["books_new"]
users_new = db["users_new"]

Or this if it is not

In [None]:
client.drop_database("project")

db = client["project"]

books = db["books"]
ratings = db["ratings"]
users = db["users"]

start_time = time.time()
books.insert_many(df_books.to_dict(orient="records"), ordered=False)
ratings.insert_many(df_ratings.to_dict(orient="records"), ordered=False)
users.insert_many(df_users.to_dict(orient="records"), ordered=False)
mdbInsertion = time.time() - start_time

# Add a new field to all documents in the 'books' collection
books.update_many({}, {"$set": {"Global_Rating": 0.00}})

print("Data inserted into MongoDB collections successfully.")

Data inserted into MongoDB collections successfully.


## New collection structure

In [14]:
# Prepare a dictionary to store book documents
books_dict = {}

# Iterate through the books DataFrame
for _, book_row in df_books.iterrows():
    isbn = book_row['ISBN']
    
    # Initialize the book document
    books_dict[isbn] = {
        "isbn": isbn,
        "title": book_row['Book-Title'],
        "author": book_row['Book-Author'],
        "publisher": book_row['Publisher'],
        "year": int(book_row['Year-Of-Publication']),
        "global_rating": 0,  # Set a default global rating
        "images": {
            "small": book_row['Image-URL-S'],
            "medium": book_row['Image-URL-M'],
            "large": book_row['Image-URL-L']
        },
        "ratings": []  # Initialize an empty ratings list
    }

# Iterate through the ratings DataFrame
for _, rating_row in df_ratings.iterrows():
    isbn = rating_row['ISBN']
    user_id = rating_row['User-ID']
    book_rating = int(rating_row['Book-Rating'])

    # Append the rating to the corresponding book document
    if isbn in books_dict:
        books_dict[isbn]["ratings"].append({
            "user_id": user_id,
            "rating": book_rating
        })

# Calculate global ratings for each book
for isbn, book in books_dict.items():
    if book["ratings"]:
        # Calculate average rating for the book
        total_rating = sum([rating["rating"] for rating in book["ratings"]])
        book["global_rating"] = round(total_rating / len(book["ratings"]), 2)

start_time = time.time()
# Insert the documents into the books collection
books_new.insert_many(list(books_dict.values()))
users_new.insert_many(df_users.to_dict(orient="records"), ordered=False)
mdbInsertion2 = time.time() - start_time

print(f'Data successfully loaded into the MongoDB books collection in {mdbInsertion2} seconds!')

Data successfully loaded into the MongoDB books collection in 5.960985898971558 seconds!


## Queries
### Simple 
#### 1- All books published in the year 2000

In [8]:
year = 2000
start_time = time.time()
books_in_year = books.find({"Year-Of-Publication": year})
mdbSimple1 = time.time() - start_time
print(f"Total Number of Books Published in the year {year}: {books.count_documents({'Year-Of-Publication': year})}")
for book in books_in_year:
    bookTitle = book.get("Book-Title")
    bookISBN = book.get("ISBN")
    print(f"ISBN: {bookISBN}, Book Title: {bookTitle}")

Total Number of Books Published in the year 2000: 17234
ISBN: 0425176428, Book Title: What If?: The World's Foremost Military Historians Imagine What Might Have Been
ISBN: 080652121X, Book Title: Hitler's Secret Bankers: The Myth of Swiss Neutrality During the Holocaust
ISBN: 0061076031, Book Title: Mary-Kate &amp; Ashley Switching Goals (Mary-Kate and Ashley Starring in)
ISBN: 0345417623, Book Title: Timeline
ISBN: 3442446937, Book Title: Tage der Unschuld.
ISBN: 0375406328, Book Title: Lying Awake
ISBN: 0553582909, Book Title: Icebound
ISBN: 0842342702, Book Title: Left Behind: A Novel of the Earth's Last Days (Left Behind #1)
ISBN: 0312970242, Book Title: The Angel Is Near
ISBN: 0375410538, Book Title: Anil's Ghost
ISBN: 0340767936, Book Title: Turning Thirty
ISBN: 0446677450, Book Title: Rich Dad, Poor Dad: What the Rich Teach Their Kids About Money--That the Poor and Middle Class Do Not!
ISBN: 0446608653, Book Title: The Alibi
ISBN: 0812575954, Book Title: The Deal
ISBN: 067104285

#### 2- All users that are older than 30 years old

In [8]:
age = 30
start_time = time.time()
users_older_than_30 = users.find({"Age": {"$gt": age}})
mdbSimple2 = time.time() - start_time
print(f"Total Number of Users older than {age}: {users.count_documents({"Age": {"$gt": age}})}")
for user in users_older_than_30:
    userID = user.get("User-ID")
    print(f"User ID: {userID}")

Total Number of Users older than 30: 91816
User ID: 6
User ID: 21
User ID: 25
User ID: 27
User ID: 33
User ID: 38
User ID: 40
User ID: 44
User ID: 46
User ID: 51
User ID: 54
User ID: 63
User ID: 64
User ID: 67
User ID: 70
User ID: 72
User ID: 75
User ID: 85
User ID: 89
User ID: 90
User ID: 93
User ID: 94
User ID: 99
User ID: 100
User ID: 103
User ID: 104
User ID: 105
User ID: 112
User ID: 114
User ID: 117
User ID: 119
User ID: 124
User ID: 125
User ID: 129
User ID: 132
User ID: 133
User ID: 139
User ID: 144
User ID: 148
User ID: 157
User ID: 158
User ID: 164
User ID: 165
User ID: 168
User ID: 172
User ID: 174
User ID: 176
User ID: 177
User ID: 182
User ID: 189
User ID: 190
User ID: 191
User ID: 196
User ID: 199
User ID: 206
User ID: 210
User ID: 216
User ID: 218
User ID: 221
User ID: 228
User ID: 242
User ID: 245
User ID: 251
User ID: 252
User ID: 255
User ID: 259
User ID: 260
User ID: 261
User ID: 281
User ID: 284
User ID: 299
User ID: 301
User ID: 302
User ID: 308
User ID: 311
User I

### Complex
#### 1- Update all ratings from UserID "276890" to 8

In [None]:
def complexQuery1_MongoDB():
    userID = 276890
    print("Ratings before the update:")
    for rating in ratings.find({"User-ID": userID}):
        print(rating)

    start_time = time.time()
    ratings.update_many({"User-ID": userID}, {"$set": {"Book-Rating": 8}})
    endTime = time.time() - start_time

    print("Ratings after the update:")
    for rating in ratings.find({"User-ID": userID}):
        print(rating)

    return endTime

mdbComplex1_beforeIdx = complexQuery1_MongoDB()

Ratings before the update:
{'_id': ObjectId('6748f3a86d98cf89c711a539'), 'User-ID': 276890, 'ISBN': '0140270272', 'Book-Rating': 0}
Ratings after the update:
{'_id': ObjectId('6748f3a86d98cf89c711a539'), 'User-ID': 276890, 'ISBN': '0140270272', 'Book-Rating': 8}


#### 2 - Add a new column in the Books table with the mean ratings of every book

##### Original Code - without optimizations

In [None]:
"""pipeline = [
    {
        "$group": {
            "_id": "$ISBN",  # Group by ISBN
            "average_rating": {"$avg": "$Book-Rating"}  # Calculate the average rating
        }
    },
    {
        "$project": {
            "_id": 1,
            "average_rating": {"$round": ["$average_rating", 2]}  # Round to 2 decimal places
        }
    }
]

start = time.time()
average_ratings = list(ratings.aggregate(pipeline))

for record in average_ratings:
    isbn = record["_id"]
    avg_rating = record["average_rating"]
    
    # Update the book in the books collection
    books.update_one({"ISBN": isbn}, {"$set": {"Global_Rating": avg_rating}})

mdbComplex2 = time.time() - start    
print("Global ratings added to the books collection successfully.")"""


##### 1st Optimization Option - Bulk Write

In [None]:
def complexQuery2_MongoDB():

    pipeline = [
    {
        "$group": {
            "_id": "$ISBN",  # Group by ISBN
            "average_rating": {"$avg": "$Book-Rating"}  # Calculate the average rating
        }
    },
    {
        "$project": {
            "_id": 1,
            "average_rating": {"$round": ["$average_rating", 2]}  # Round to 2 decimal places
        }
    }
    ]
    
    average_ratings = list(ratings.aggregate(pipeline))

    # Prepare bulk update operations
    bulk_updates = []

    for record in average_ratings:
        isbn = record["_id"]
        avg_rating = record["average_rating"]
        bulk_updates.append(
        pm.UpdateOne({"ISBN": isbn}, {"$set": {"Global_Rating": avg_rating}})
    )

    # Perform the bulk update
    start = time.time()
    books.bulk_write(bulk_updates)
    endTime = time.time() - start
    print("Global ratings added to the books collection successfully.")

    return endTime

mdbComplex2_beforeIndex1 = complexQuery2_MongoDB()

##### 1st optimization on the new structure

In [15]:
# Step 1: Fetch all books with their ratings
print("Fetching books with ratings...")
start = time.time()
books = list(books_new.find({}, {"_id": 0, "isbn": 1, "ratings": 1}))
print(f"Fetched {len(books)} books in {time.time() - start} seconds.")

# Step 2: Compute global ratings
print("Computing global ratings...")
bulk_updates = []
for book in books:
    isbn = book["isbn"]
    ratings = book.get("ratings", [])

    # Compute the average rating if ratings exist
    if ratings:
        average_rating = round(sum(rating["rating"] for rating in ratings) / len(ratings), 2)
    else:
        average_rating = 0  # Default global rating if no ratings exist

    # Prepare bulk update for global_rating
    bulk_updates.append(
        pm.UpdateOne(
            {"isbn": isbn},  # Match book by ISBN
            {"$set": {"global_rating": average_rating}}  # Update global_rating field
        )
    )

# Step 3: Execute bulk updates
print("Executing bulk updates...")
start = time.time()
if bulk_updates:
    books_new.bulk_write(bulk_updates)
print(f"Bulk updates completed in {time.time() - start} seconds.")

Fetching books with ratings...
Fetched 271360 books in 0.857306957244873 seconds.
Computing global ratings...
Executing bulk updates...
Bulk updates completed in 11725.978121995926 seconds.


##### 2nd Optimization Option - Bulk Write with condition

In [None]:
pipeline = [
    {
        "$group": {
            "_id": "$ISBN",  # Group by ISBN
            "average_rating": {"$avg": "$Book-Rating"}  # Calculate the average rating
        }
    },
    {
        "$project": {
            "_id": 1,
            "average_rating": {"$round": ["$average_rating", 2]}  # Round to 2 decimal places
        }
    }
]

start = time.time()
average_ratings = list(ratings.aggregate(pipeline))



# Prepare bulk update operations
bulk_updates = []

for record in average_ratings:
    isbn = record["_id"]
    avg_rating = record["average_rating"]

    # Only add update if the new rating is different
    current_doc = books.find_one({"ISBN": isbn}, {"Global_Rating": 1})
    if current_doc and current_doc.get("Global_Rating") != avg_rating:
        bulk_updates.append(
            pm.UpdateOne({"ISBN": isbn}, {"$set": {"Global_Rating": avg_rating}})
        )

# Perform the bulk update
books.bulk_write(bulk_updates)
mdbComplex2_beforeIndex2 = time.time() - start
print("Global ratings added to the books collection successfully.")

##### 3rd otion Option - Compute Everything in Python

In [9]:
# Load ratings data into a pandas DataFrame
df_ratings = pd.DataFrame(list(ratings.find({}, {"ISBN": 1, "Book-Rating": 1, "_id": 0})))

# Load books data into a pandas DataFrame
df_books = pd.DataFrame(list(books.find({}, {"ISBN": 1, "_id": 0})))

# Compute the average rating for each ISBN
average_ratings = df_ratings.groupby("ISBN")["Book-Rating"].mean().round(2).reset_index()

# Merge average ratings with the books DataFrame
books_with_ratings = df_books.merge(average_ratings, on="ISBN", how="left").fillna({"Book-Rating": 0})

# Prepare bulk update operations
bulk_updates = [
    pm.UpdateOne(
        {"ISBN": row["ISBN"]},
        {"$set": {"Global_Rating": row["Book-Rating"]}}
    )
    for _, row in books_with_ratings.iterrows()
]

# Perform the bulk write operation
start = time.time()
if bulk_updates:
    books.bulk_write(bulk_updates)
mdbComplex2_beforeIndex3 = time.time() - start

print(f"Global ratings added to the books collection successfully in {mdbComplex2_beforeIndex3:.2f} seconds.")

Global ratings added to the books collection successfully in 13824.80 seconds.


#### Optimizations Comparation

In [None]:
comparison_table = pd.DataFrame([[mdbComplex2_beforeIndex1, mdbComplex2_beforeIndex2, mdbComplex2_beforeIndex3]], columns=["1st Option", "2nd Option", "3rd Option"], index=["2nd Complex Query"])
comparison_table.to_csv("optimizationTImes_table.csv")
print(comparison_table)

### Indexing

In [None]:
ratings.create_index([("ISBN", pm.ASCENDING)])
books.create_index([("ISBN", pm.ASCENDING)])
users.create_index([("User-ID", pm.ASCENDING)])

print(f'Index on ratings created: {ratings.index_information}')
print(f'Index on books created: {books.index_information}')
print(f'Index on users created: {users.index_information}')

In [None]:
mdbComplex1_afterIdx = complexQuery1_MongoDB()
mdbComplex2_afterIdx = complexQuery2_MongoDB()

# MySQL
## Connects to MySql database, create the schema and populate the tables

In [None]:
# Connect to mySQL locally
mydb = mysql.connector.connect(
    host=mysql_host,
    port=mysql_port,
    user=mysql_username,
    password=mysql_password
)

cursor = mydb.cursor()

In [None]:
cursor.execute("DROP DATABASE IF EXISTS project")

In [None]:
cursor.execute("CREATE DATABASE IF NOT EXISTS project")
cursor.execute("USE project")

cursor.execute("""
    CREATE TABLE IF NOT EXISTS users (
        user_id INT PRIMARY KEY,
        location VARCHAR(255),
        age INT
    )
""")

cursor.execute("""
    CREATE TABLE IF NOT EXISTS books (
        ISBN VARCHAR(20) PRIMARY KEY,
        Book_Title VARCHAR(255),
        Book_Author VARCHAR(255),
        Year_Of_Publication INT,
        Publisher VARCHAR(255),
        Image_URL_S VARCHAR(255),
        Image_URL_M VARCHAR(255),
        Image_URL_L VARCHAR(255),
        Global_Rating FLOAT
    )
""")

cursor.execute("""
    CREATE TABLE IF NOT EXISTS ratings (
        User_ID INT,
        ISBN VARCHAR(20),
        Book_Rating INT,
        FOREIGN KEY (User_ID) REFERENCES users(user_id),
        FOREIGN KEY (ISBN) REFERENCES books(ISBN),
        PRIMARY KEY (User_ID, ISBN)
    )
""")
start_time = time.time()
# added try - except statement to catch the problematic rows
for _, row in df_books.iterrows():
    try:
        cursor.execute(
            "INSERT IGNORE INTO books (ISBN, Book_Title, Book_Author, Year_Of_Publication, Publisher, Image_URL_S, Image_URL_M, Image_URL_L) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)",
            (row['ISBN'], row['Book-Title'], row['Book-Author'], row['Year-Of-Publication'], row['Publisher'], row['Image-URL-S'], row['Image-URL-M'], row['Image-URL-L'])
        )
    except:
        print(row)

for _, row in df_users.iterrows():
    try:
        cursor.execute(
            "INSERT IGNORE INTO users (user_id, location, age) VALUES (%s, %s, %s)",
            (row['User-ID'], row['Location'], row['Age']) # Tutaj wleciał FIX1
        )
    except:
        print(row)

for _, row in df_ratings.iterrows():
    try:
        cursor.execute(
            "INSERT IGNORE INTO ratings (User_ID, ISBN, Book_Rating) VALUES (%s, %s, %s)",
            (row['User-ID'], row['ISBN'], row['Book-Rating'])
        )
    except:
        print(row)

mydb.commit()
sqlInsertion = time.time() - start_time

print("Data inserted successfully.")



## Queries
### Simple
#### 1- All books published in the year 2000

In [None]:
start_time = time.time()
cursor.execute("SELECT ISBN, Book_Title FROM books WHERE Year_Of_Publication = 2000")
sql_books_in_year = cursor.fetchall()
sqlSimple1 = time.time() - start_time
print(f'There are {len(sql_books_in_year)} users older than 30 years:')
for book in sql_books_in_year:
    bookTitle = book[1]
    bookISBN = book[0]
    print(f"ISBN: {bookISBN}, Book Title: {bookTitle}")

#### 2- All users that are older than 30 years old

In [None]:
start_time = time.time()
cursor.execute("SELECT user_id FROM users WHERE age > 30")
sql_users_above_30 = cursor.fetchall()
sqlSimple2 = time.time() - start_time

print(f'There are {len(sql_users_above_30)} users older than 30 years:')
for user in sql_users_above_30:
    print(f"UserID: {user[0]}")

### Complex
#### 1- Update all ratings from UserID "276890" to 8

In [None]:
def complexQuery1_MySQL():
    userID = 276890
    new_rating = 8

    print("Ratings before the update:")
    cursor.execute("SELECT * FROM ratings WHERE User_ID = %s", (userID,))
    for rating in cursor.fetchall():
        print(rating)

    start_time = time.time()
    cursor.execute("UPDATE ratings SET Book_Rating = %s WHERE User_ID = %s", (new_rating, userID))
    mydb.commit()
    endTime = time.time() - start_time

    print("Ratings after the update:")
    cursor.execute("SELECT * FROM ratings WHERE User_ID = %s", (userID,))
    for rating in cursor.fetchall():
        print(rating)

    return endTime

sqlComplex1_beforeIdx = complexQuery1_MySQL()

#### 2 - Add a new column in the Books table with the mean ratings of every book

In [None]:
def complexQuery2_MySQL():
    start_time = time.time()
    cursor.execute("""
        SELECT 
            ISBN, 
            ROUND(AVG(`Book_Rating`), 2) AS average_rating
        FROM ratings
        GROUP BY ISBN
    """)

    # Fetch all results
    average_ratings = cursor.fetchall()

    # Update the books table with the Global_Rating
    update_query = """
        UPDATE books 
        SET Global_Rating = %s 
        WHERE ISBN = %s
    """

    for isbn, avg_rating in average_ratings:
        cursor.execute(update_query, (avg_rating, isbn))

    # Commit the changes
    mydb.commit()
    endTime = time.time() - start_time
    return endTime

sqlComplex2_beforeIdx = complexQuery2_MySQL()

## Indexing

In [None]:
cursor.execute("CREATE INDEX idx_book ON books(ISBN)")
cursor.execute("CREATE INDEX idx_ratings ON ratings(ISBN)")
cursor.execute("CREATE INDEX idx_user ON users(user_ID)")
print('Index added ')

In [None]:
sqlComplex1_afterIdx = complexQuery1_MySQL()
sqlComplex2_afterIdx = complexQuery2_MySQL()

# Time Comparison

In [None]:
query_times = {
    "Insertion": {"MongoDB": mdbInsertion, "MySQL": sqlInsertion},
    "Simple Query 1": {"MongoDB": mdbSimple1, "MySQL": sqlSimple1},
    "Simple Query 2": {"MongoDB": mdbSimple2, "MySQL": sqlSimple2},
    "Complex Query 1 (Before Index)": {"MongoDB": mdbComplex1_beforeIdx, "MySQL": sqlComplex1_beforeIdx},
    "Complex Query 1 (After Index)": {"MongoDB": mdbComplex1_afterIdx, "MySQL": sqlComplex1_afterIdx},
    "Complex Query 2 (Before Index)": {"MongoDB": mdbComplex2_beforeIdx, "MySQL": sqlComplex2_beforeIdx},
    "Complex Query 2 (After Index)": {"MongoDB": mdbComplex2_afterIdx, "MySQL": sqlComplex2_afterIdx}
}


comparison_table = pd.DataFrame(query_times)
comparison_table.to_csv("comparison_tableV2.csv")
print(comparison_table)