In [2]:
import pandas as pd
import json
from pymongo import MongoClient
import mysql.connector
import time
from sqlalchemy import create_engine,text
from sqlalchemy.exc import PendingRollbackError
import kagglehub

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# load configuration file with password for mongoDB and mySQL
with open('config.json', 'r') as f:
    config = json.load(f)

# Extract username and password for MongoDB and MySQL
mongo_username = config["mongo"]["username"]
mongo_password = config["mongo"]["password"]
mongo_host = config["mongo"]["host"]
mongo_port = config["mongo"]["port"]
mysql_username = config["mysql"]["username"]
mysql_password = config["mysql"]["password"]
mysql_host = config["mysql"]["host"]
mysql_port = config["mysql"]["port"]

In [6]:
# Download latest dataset version for football matches
# path = kagglehub.dataset_download("martj42/international-football-results-from-1872-to-2017")

# Download latest dataset version for books recommendation
path = kagglehub.dataset_download("arashnic/book-recommendation-dataset")
print(path)

/home/tommaso/.cache/kagglehub/datasets/arashnic/book-recommendation-dataset/versions/3


In [12]:
# Load dataset into pandas dataframe
df_books = pd.read_csv(f'{path}/Books.csv', sep = ',', dtype={'Year-Of-Publication': str})
df_ratings = pd.read_csv(f'{path}/Ratings.csv')
df_users = pd.read_csv(f'{path}/Users.csv')

In [None]:
# Connect to MongoDB locally
client = MongoClient(f'mongodb://{mongo_host}:{mongo_port}',
                             username = mongo_username,
                             password = mongo_password)

client.drop_database("project")

db = client["project"]

db["books"].drop()
db["ratings"].drop()
db["users"].drop()

books = db["books"]
ratings = db["ratings"]
users = db["users"]

books.insert_many(df_books.to_dict(orient="records"), ordered=False)
ratings.insert_many(df_ratings.to_dict(orient="records"), ordered=False)
users.insert_many(df_users.to_dict(orient="records"), ordered=False)

In [13]:
# Connect to mySQL locally
db_url = f'mysql+mysqlconnector://{mysql_username}:{mysql_password}@{mysql_host}:{mysql_port}'
engine = create_engine(db_url)

database_name = 'project'
table_names = ['books', 'ratings', 'users']

with engine.connect() as connection:
    connection.execute(text(f"DROP DATABASE IF EXISTS {database_name}"))
    connection.execute(text(f"CREATE DATABASE {database_name}"))

engine.dispose()

db_url = f'mysql+mysqlconnector://{mysql_username}:{mysql_password}@{mysql_host}:{mysql_port}/{database_name}'
engine = create_engine(db_url)

try:
    df_books.to_sql(table_names[0], con=engine, if_exists='replace', index=False, method='multi', chunksize=1000)
    df_ratings.to_sql(table_names[1], con=engine, if_exists='replace', index=False, method='multi', chunksize=1000)
    df_users.to_sql(table_names[2], con=engine, if_exists='replace', index=False, method='multi', chunksize=1000)
    print("Data inserted successfully!")

finally:
    engine.dispose()

In [None]:
with engine.connect() as connection:
    #result = connection.execute(text("SELECT * FROM goalscorers WHERE penalty = TRUE"))
    #for row in result:
    #    print(row)

    #result = connection.execute(text("SELECT * FROM results WHERE home_team = 'Scotland'"))
    #for row in result:
    #    print(row)

    result = connection.execute(text("SELECT * FROM shootouts WHERE home_team = first_shooter"))
    for row in result:
        print(row)