In [None]:
# !pip install psycopg2-binary

In [4]:
import psycopg2
from psycopg2 import sql, connect, extensions
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
import os
import pandas as pd
import shutil
import csv

In [5]:
DB_PARAMS = {
    "user": "postgres",
    "password": "123456",
    "host": "127.0.0.1",
    "port": "5432",
    "database": "postgres"  
}

DB_NAME = "lego_database"

In [6]:
def database_exists(conn, db_name):
    with conn.cursor() as cur:
        cur.execute("SELECT 1 FROM pg_database WHERE datname=%s", (db_name,))
        return cur.fetchone() is not None

In [7]:
def execute_sql_from_file(conn, file_path, **kwargs):
    with open(file_path, 'r') as file:
        sql = file.read().format(**kwargs)
    with conn.cursor() as cur:
        cur.execute(sql)

In [18]:
def create_database(connection_params, db_name):
    try:        
        conn = psycopg2.connect(**connection_params)
        conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
        with conn.cursor() as cur:
            cur.execute("SELECT 1 FROM pg_database WHERE datname = %s", (db_name,))
            exists = cur.fetchone()
            if exists:
                print(f"Database '{db_name}' already exists.")
            else:
                cur.execute(f"CREATE DATABASE {db_name}")                
                print(f"Database '{db_name}' created successfully.")
            DB_PARAMS["database"] = db_name
        conn.close()

    except Exception as e:
        print(f"Error creating database: {e}")

In [9]:
def create_tables(db_params):
    sql_files = [
    'create_colors_table.sql',
    'create_part_categories_table.sql',
    'create_parts_table.sql',
    'create_themes_table.sql'
    'create_sets_table.sql',
    'create_inventories_table.sql',
    'create_inventory_parts_table.sql',
    'create_inventory_sets_table.sql'
]

    try:
        with psycopg2.connect(**db_params) as conn:
            conn.set_session(autocommit=True)
            
            for sql_file in sql_files:
                execute_sql_from_file(conn, f'sql/{sql_file}')
                print(f"Table from '{sql_file}' created.")
    except Exception as e:
        print(f"Error creating tables: {e}")

In [19]:
create_database(DB_PARAMS, DB_NAME)

Database 'lego_database' already exists.


In [11]:
create_tables(DB_PARAMS)

Table from 'create_colors_table.sql' created.
Table from 'create_part_categories_table.sql' created.
Table from 'create_parts_table.sql' created.
Error creating tables: [Errno 2] No such file or directory: 'sql/create_themes_table.sqlcreate_sets_table.sql'


In [63]:
# import csv
# import sqlite3

# # Conexión a la base de datos (ajustar según sea necesario)
# conn = sqlite3.connect('lego_database.db')
# cursor = conn.cursor()

# # Función modificada para insertar datos desde un archivo CSV a una tabla
# def insert_data_from_csv(file_path, table_name):
#     with open(file_path, newline='', encoding='utf-8') as csvfile:
#         reader = csv.reader(csvfile)
#         columns = next(reader)  # Obtiene el encabezado para las columnas
#         placeholders = ', '.join(['?'] * len(columns))  # Crea marcadores de posición
#         columns_formatted = ', '.join(columns)  # Formatea las columnas para la consulta SQL
#         sql = f'INSERT INTO {table_name} ({columns_formatted}) VALUES ({placeholders})'
#         for row in reader:
#             print(row)
#             print(sql)
#             cursor.execute(sql, row)
#     conn.commit()

In [97]:
def insert_data_from_csv_to_db(db_params, file_path, table_name):
    with open(file_path, newline='', encoding='utf-8') as csvfile:
        reader = csv.reader(csvfile)
        columns = next(reader)  # Get column names from the first row
        records = [[None if item == '' else item for item in row] for row in reader]  # Convert empty strings to None
        
        conn = connect(**db_params)
        conn.set_isolation_level(extensions.ISOLATION_LEVEL_AUTOCOMMIT)
        cursor = conn.cursor()
        
        placeholders = ', '.join(['%s'] * len(columns))  # Create placeholders for each column
        all_placeholders = ', '.join([f"({placeholders})"] * len(records))  # Create a group of placeholders for all records
        query = sql.SQL("INSERT INTO {table} ({fields}) VALUES {values} ON CONFLICT DO NOTHING").format(
            table=sql.Identifier(table_name),
            fields=sql.SQL(', ').join(map(sql.Identifier, columns)),
            values=sql.SQL(all_placeholders)
        )

        flat_records = [item for sublist in records for item in sublist]  # Flatten the list of records

        try:
            cursor.execute(query, flat_records)
            print("All records inserted successfully.")
        except psycopg2.IntegrityError as e:
            if "violates foreign key constraint" in str(e):
                missing_part_num = str(e).split("Key (part_num)=(")[1].split(")")[0]
                print(f"Error: part_num {missing_part_num} is not present in the 'parts' table.")
            else:
                print(f"Error executing the query: {e}")
        except (Exception, psycopg2.DatabaseError) as error:
            print(f"Error executing the query: {error}")
        finally:
            cursor.close()
            conn.close()
            print("Connection closed.")

colors: No depende de otras tablas.  
part_categories: No depende de otras tablas.  
themes: Aunque tiene una relación jerárquica consigo misma, podemos insertar primero los temas sin padres (o insertar todos los temas y luego actualizar aquellos con parent_id).  
parts: Depende de part_categories.    
sets: Depende de themes.  
inventories: Depende de sets para el campo set_num.  
inventory_parts: Depende de inventories, parts, y colors.  
inventory_sets: Depende de inventories y sets

In [57]:
table = 'colors'
insert_data_from_csv_to_db(DB_PARAMS, f'./raw/{table}.csv', table)

Todos los registros insertados exitosamente.
Conexión cerrada.


In [58]:
table = 'part_categories'
insert_data_from_csv_to_db(DB_PARAMS, f'./raw/{table}.csv', table)

Todos los registros insertados exitosamente.
Conexión cerrada.


In [71]:
table = 'themes'
insert_data_from_csv_to_db(DB_PARAMS, f'./raw/{table}.csv', table)

Todos los registros insertados exitosamente.
Conexión cerrada.


In [72]:
table = 'parts'
insert_data_from_csv_to_db(DB_PARAMS, f'./raw/{table}.csv', table)

Todos los registros insertados exitosamente.
Conexión cerrada.


In [73]:
table = 'sets'
insert_data_from_csv_to_db(DB_PARAMS, f'./raw/{table}.csv', table)

Todos los registros insertados exitosamente.
Conexión cerrada.


In [74]:
table = 'inventories'
insert_data_from_csv_to_db(DB_PARAMS, f'./raw/{table}.csv', table)

Todos los registros insertados exitosamente.
Conexión cerrada.


In [104]:
query = "SELECT DISTINCT part_num FROM parts"
unique_part_nums =  execute_query(DB_PARAMS, query)
unique_part_nums = [part_num[0] for part_num in unique_part_nums]
unique_part_nums 

Conexión cerrada.


['`3069bpr0180',
 '0687b1',
 '0901',
 '0902',
 '0903',
 '0904',
 '1',
 '10',
 '10016414',
 '10019stk01',
 '10026stk01',
 '10029stk01',
 '10036stk01',
 '10039',
 '10048',
 '10049',
 '10049pr0001',
 '10050',
 '10051',
 '10051pr01',
 '10052',
 '10053',
 '10054',
 '10054pr0001',
 '10054pr0002',
 '10055',
 '10055pr0001',
 '10056',
 '10056pr0001',
 '10057',
 '10057pr0001',
 '10057pr0002',
 '10058',
 '10061',
 '10062',
 '10063',
 '10064',
 '10065',
 '10066',
 '10066pr0001',
 '10066pr0002',
 '10066pr0003',
 '10075cdb01',
 '10111',
 '10111apr0006',
 '10113',
 '10113pr0001',
 '10113pr0002',
 '10119',
 '10124',
 '10124pr0001',
 '10126',
 '10127',
 '10127stk01',
 '10127stk02',
 '10127stk03',
 '10128',
 '10128pr0001',
 '10128pr0002',
 '10129stk01',
 '10134stk01',
 '10144stk01',
 '10154',
 '10154pr0001',
 '10159stk01',
 '10164',
 '10164pr0001',
 '10165c01',
 '10166',
 '10166pr0001',
 '10166pr0003',
 '10168pb01',
 '10169',
 '10170',
 '10172',
 '10173',
 '10177',
 '10178',
 '10178pr0001a',
 '10178pr00

In [111]:
table = 'inventory_parts'
path = f'./raw/{table}.csv'

shutil.copyfile(path, f'./raw/{table}_OLDER.csv')

df_inventory_parts = pd.read_csv(path)
df_to_keep = df_inventory_parts[df_inventory_parts['part_num'].isin(unique_part_nums)]
df_to_keep.to_csv(path, index=False)

print("File updated and _OLDER copy created.")

File updated and _OLDER copy created.


In [108]:
insert_data_from_csv_to_db(DB_PARAMS, f'./raw/{table}.csv', table)

Todos los registros insertados exitosamente.
Conexión cerrada.


In [109]:
table = 'inventory_sets'
insert_data_from_csv_to_db(DB_PARAMS, f'./raw/{table}.csv', table)

Todos los registros insertados exitosamente.
Conexión cerrada.


In [40]:
def execute_query(db_params, query, params=None):
    conn = None
    try:
        conn = psycopg2.connect(**db_params)
        conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
        cursor = conn.cursor()
        
        if params:
            cursor.execute(sql.SQL(query), params)
        else:
            cursor.execute(sql.SQL(query))
        
        if query.strip().lower().startswith(("select", "with")):
            records = cursor.fetchall()
            return records
        else:
            conn.commit()
            print("Consulta ejecutada exitosamente.")
            return None
        
    except (Exception, psycopg2.DatabaseError) as error:
        print(f"Error al ejecutar la consulta: {error}")
        return None
    finally:
        if cursor:
            cursor.close()
        if conn:
            conn.close()

## **1.** Colores más utilizados en los 90.  
Identifica cuáles son los 10 colores más frecuentemente usados en los sets de LEGO durante la década de los 90.

In [41]:
query = """
    SELECT c.name AS color_name, SUM(ip.quantity) AS total_quantity
    FROM sets s
    JOIN inventories i ON s.set_num = i.set_num
    JOIN inventory_parts ip ON i.id = ip.inventory_id
    JOIN parts p ON ip.part_num = p.part_num
    JOIN colors c ON ip.color_id = c.id
    WHERE s.year BETWEEN 1990 AND 1999
    GROUP BY c.name
    ORDER BY total_quantity DESC
    LIMIT 10;
"""

execute_query(DB_PARAMS, query)

[('Black', 61341),
 ('Light Gray', 36189),
 ('White', 32899),
 ('Red', 30469),
 ('Yellow', 25469),
 ('Blue', 19499),
 ('Green', 5437),
 ('Dark Gray', 5246),
 ('Brown', 3065),
 ('Trans-Neon Green', 1601)]

## **2.** Colores únicos.  
Determina la cantidad de colores que son únicos en toda la base de datos.

In [42]:
query = """
    SELECT COUNT(DISTINCT id) AS unique_colors
    FROM colors;
"""

execute_query(DB_PARAMS, query)

[(135,)]

## **3.** Tendencia de piezas por sets a lo largo de los años.
Analiza cómo ha evolucionado la cantidad de piezas incluidas en los sets de LEGO a través del tiempo.

In [44]:
query = """
    WITH yearly_avg_parts AS (
        SELECT s.year, AVG(s.num_parts) AS avg_num_parts
        FROM sets s
        GROUP BY s.year
        ORDER BY s.year
    )
    SELECT 
        year,
        avg_num_parts,
        LAG(avg_num_parts, 1) OVER (ORDER BY year) AS prev_year_avg_num_parts,
        avg_num_parts - LAG(avg_num_parts, 1) OVER (ORDER BY year) AS diff_from_prev_year
    FROM yearly_avg_parts;
"""

execute_query(DB_PARAMS, query)

[(1950, Decimal('10.1428571428571429'), None, None),
 (1953,
  Decimal('16.5000000000000000'),
  Decimal('10.1428571428571429'),
  Decimal('6.3571428571428571')),
 (1954,
  Decimal('12.3571428571428571'),
  Decimal('16.5000000000000000'),
  Decimal('-4.1428571428571429')),
 (1955,
  Decimal('36.8571428571428571'),
  Decimal('12.3571428571428571'),
  Decimal('24.5000000000000000')),
 (1956,
  Decimal('18.5000000000000000'),
  Decimal('36.8571428571428571'),
  Decimal('-18.3571428571428571')),
 (1957,
  Decimal('42.6190476190476190'),
  Decimal('18.5000000000000000'),
  Decimal('24.1190476190476190')),
 (1958,
  Decimal('44.4523809523809524'),
  Decimal('42.6190476190476190'),
  Decimal('1.8333333333333334')),
 (1959,
  Decimal('16.2500000000000000'),
  Decimal('44.4523809523809524'),
  Decimal('-28.2023809523809524')),
 (1960,
  Decimal('175.3333333333333333'),
  Decimal('16.2500000000000000'),
  Decimal('159.0833333333333333')),
 (1961,
  Decimal('70.5882352941176471'),
  Decimal('175.

# Consultas.

In [67]:
query = "delete from themes"
execute_query(DB_PARAMS, query)

Consulta ejecutada exitosamente.
Conexión cerrada.


In [15]:
query = "select * from colors"
execute_query(DB_PARAMS, query)

Error al ejecutar la consulta: relation "colors" does not exist
LINE 1: select * from colors
                      ^

Conexión cerrada.


In [50]:
query = "INSERT INTO colors (id, name, rgb, is_trans) VALUES ('-1', 'Unknown', '0033B2', 'f')"

execute_query(DB_PARAMS, query)

Consulta ejecutada exitosamente.
Conexión cerrada.


# Observaciones.
En themes tuve que cambiar a None donde habia cadenas vacias para columans tipo int. Sino, no funcionaba el insert.  

para inventory_parts se agrego ON CONFLICT (pk_column) DO UPDATE SET para que el registro se actualice cuando encuentre conflicto por pk duplicada. Tambien se implemento un proceso que traia los id de la tabla parts y comparaba entre sus regsitros cuales apuntaban a un id inexistente en parts. Se creo una copia de seguridad del archivo y estos registrso fueron eliminados  previo a la insersion.

Otras tablas no hubo problemas.