In [1]:
import sys
import os
import logging
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.sql import text
from sqlalchemy.exc import SQLAlchemyError




sys.path.append(os.path.abspath('../'))
from src.params import Params
from src.client import DatabaseClient


In [2]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)  # Configura el nivel INFO para el logger principal

handler = logging.StreamHandler()
handler.setLevel(logging.INFO)  # Configura el nivel INFO para el handler
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)

logger.addHandler(handler)

In [3]:
# Path to the Grammy's dataset
csv_file = '../data/external/the_grammy_awards.csv'


df = pd.read_csv(csv_file)

df.head()

Unnamed: 0,year,title,published_at,updated_at,category,nominee,artist,workers,img,winner
0,2019,62nd Annual GRAMMY Awards (2019),2020-05-19T05:10:28-07:00,2020-05-19T05:10:28-07:00,Record Of The Year,Bad Guy,Billie Eilish,"Finneas O'Connell, producer; Rob Kinelski & Fi...",https://www.grammy.com/sites/com/files/styles/...,True
1,2019,62nd Annual GRAMMY Awards (2019),2020-05-19T05:10:28-07:00,2020-05-19T05:10:28-07:00,Record Of The Year,"Hey, Ma",Bon Iver,"BJ Burton, Brad Cook, Chris Messina & Justin V...",https://www.grammy.com/sites/com/files/styles/...,True
2,2019,62nd Annual GRAMMY Awards (2019),2020-05-19T05:10:28-07:00,2020-05-19T05:10:28-07:00,Record Of The Year,7 rings,Ariana Grande,"Charles Anderson, Tommy Brown, Michael Foster ...",https://www.grammy.com/sites/com/files/styles/...,True
3,2019,62nd Annual GRAMMY Awards (2019),2020-05-19T05:10:28-07:00,2020-05-19T05:10:28-07:00,Record Of The Year,Hard Place,H.E.R.,"Rodney “Darkchild” Jerkins, producer; Joseph H...",https://www.grammy.com/sites/com/files/styles/...,True
4,2019,62nd Annual GRAMMY Awards (2019),2020-05-19T05:10:28-07:00,2020-05-19T05:10:28-07:00,Record Of The Year,Talk,Khalid,"Disclosure & Denis Kosiak, producers; Ingmar C...",https://www.grammy.com/sites/com/files/styles/...,True


In [None]:
# Parames instance
params = Params()

# Connection to the database
db_client = DatabaseClient(params)

2025-03-29 14:41:56,991 - INFO - Successfully connected to the database.


In [5]:
table_name = 'grammys_raw'

try:
    # Write the DataFrame to the database table
    df.to_sql(table_name, con=db_client.engine, if_exists='replace', index=False)
    logging.info(f"CSV data has been successfully loaded into the table '{table_name}'.")
except Exception as e:
    logging.error(f"Failed to load CSV data into the table '{table_name}'.")
    logging.error(f"Error details: {e}")


2025-03-29 14:41:57,516 - INFO - CSV data has been successfully loaded into the table 'grammys_raw'.


In [6]:
def execute_queries(queries, db_client):
    """
    Execute a list of SQL queries and log the results.
    
    Parameters:
    - queries: List of SQL query strings.
    - db_client: DatabaseClient object containing the connection engine.
    """
    try:
        with db_client.engine.connect() as connection:
            for query in queries:
                logging.info(f"Executing query: {query}")
                # Convert query string to executable SQLAlchemy text object
                executable_query = text(query)
                result = connection.execute(executable_query)
                rows = result.fetchall()
                
                # Print and log the query results
                logging.info("Query results:")
                for row in rows:
                    logging.info(row)
    except SQLAlchemyError as e:
        logging.error("Failed to execute queries.")
        logging.error(f"Error details: {e}")

In [7]:
# Define las consultas
queries = [
    "SELECT COUNT(*) FROM grammys_raw",
    "SELECT COUNT(*) FROM information_schema.columns WHERE table_name = 'grammys_raw'"
]

# Ejecuta las consultas
execute_queries(queries, db_client)

2025-03-29 14:41:57,552 - INFO - Executing query: SELECT COUNT(*) FROM grammys_raw
2025-03-29 14:41:57,568 - INFO - Query results:
2025-03-29 14:41:57,572 - INFO - (4810,)
2025-03-29 14:41:57,576 - INFO - Executing query: SELECT COUNT(*) FROM information_schema.columns WHERE table_name = 'grammys_raw'
2025-03-29 14:41:57,604 - INFO - Query results:
2025-03-29 14:41:57,605 - INFO - (10,)


In [10]:
df.shape

(4810, 10)

In [9]:
table_name = 'grammys_raw'

try:
    db_client.close()
except Exception as e:
    logging.error(f"Failed to close connection to the database.")
    logging.error(f"Error details: {e}")

2025-03-29 14:41:57,643 - INFO - Connection to database closed successfully.
