In [44]:
# librerias analytics
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as plt

# librerias db
import boto3
import psycopg2
import pymysql
import configparser

# librerias para generar datos
import random
from faker import Faker
from datetime import datetime

# referencias de archivos
import constants as cons
import ddl

In [2]:
config = configparser.ConfigParser()
config.read(cons.CONFIG_FILE)

['config.cfg']

In [3]:
rds_conn = boto3.client(
    'rds',
    aws_access_key_id=config.get(cons.USER, cons.ACCESS_KEY),
    aws_secret_access_key=config.get(cons.USER, cons.SECRET_KEY),
    region_name='us-east-2'
)

In [4]:
rds_instances_ids = []
aws_response = rds_conn.describe_db_instances()

for db_instance in aws_response['DBInstances']:
    db_instance_identifier = db_instance['DBInstanceIdentifier']
    db_engine = db_instance['Engine']
    if (db_engine == 'mysql'):
      aws_mysql = db_instance
    else:
      aws_postgres = db_instance
    print(f"La instancia '{db_instance_identifier}' utiliza el motor de base de datos '{db_engine}'")

La instancia 'pj-final-db-v' utiliza el motor de base de datos 'mysql'


## Instancia MySQL

In [4]:
try: 
    response = rds_conn.create_db_instance(
        DBInstanceIdentifier=config.get(cons.DB, cons.INSTANCE_ID),
        DBName=config.get(cons.DB, cons.DB_NAME),
        MasterUsername=config.get(cons.DB, cons.DB_USERNAME),
        MasterUserPassword=config.get(cons.DB, cons.DB_PASSWORD),
        Port=int(config.get(cons.DB, cons.PORT)),
        DBInstanceClass= cons.INSTANCE_CLASS,
        Engine=cons.ENGINE,
        PubliclyAccessible=True,
        AllocatedStorage=cons.ALLOC_STORAGE,
        VpcSecurityGroupIds=[config.get(cons.VPC, cons.SEC_GROUP)]
    )
except rds_conn.exceptions.DBInstanceAlreadyExistsFault:
    print("La instancia ya existe")
except Exception as ex:
    print("Error!! ", ex)

La instancia ya existe


# Hostname de instancia

In [5]:
try:
    instance = rds_conn.describe_db_instances(
        DBInstanceIdentifier=config.get(cons.DB, cons.INSTANCE_ID)
    )
    RDS_HOSTNAME = instance.get('DBInstances')[0].get('Endpoint').get('Address')
    print(RDS_HOSTNAME)
except Exception as ex:
    print("Error!! ", ex)

pj-final-db-v.cq2nw4bt4dzn.us-east-2.rds.amazonaws.com


# Conexion y crear tablas

In [6]:
import ddl

In [7]:
host = config.get('DB', 'DB_INSTANCE_ID')
user = config.get('DB', 'DB_USERNAME') 
password = config.get('DB', 'DB_PASSWORD')
database = config.get('DB', 'DB_NAME')
port = int(config.get('DB', 'DB_PORT'))

conn = pymysql.connect(host=RDS_HOSTNAME,
                       user=user,
                       password=password,
                       database=database,
                       port=port)

In [8]:
def create_table(table_name):
    try:
        ddl_statement = getattr(ddl, f'DDL_CREATE_{table_name.upper()}')
        cursor = conn.cursor()
        cursor.execute(ddl_statement)
        conn.commit()
        print(f"Table {table_name} created successfully")
    except Exception as ex:
        print(f"Table {table_name} not created", ex)

In [9]:
tables = ['categoria', 'articulo', 'persona', 'rol', 'usuario', 'ingreso', 'venta', 'detalle_venta', 'detalle_ingreso']
for table in tables:
    create_table(table)

Table categoria created successfully
Table articulo created successfully
Table persona created successfully
Table rol created successfully
Table usuario created successfully
Table ingreso created successfully
Table venta created successfully
Table detalle_venta created successfully
Table detalle_ingreso created successfully


# Generate Mock Data

## funciones para generar datos

In [11]:
fake = Faker()

In [24]:
# metodo para generar varias palabras random para los campos de descripcion
def generate_text(words_number = 20):
  words = fake.words(
      nb = words_number,
      ext_word_list = None,
      unique = False
  )
  text = ' '.join(words).lower()
  return text

In [25]:
# categorias
def generate_categorias():

  category_data = []

  category_names = [
    "Automotive", "Baby", "Beauty", "Books", "Clothing",
    "Computers", "Electronics", "Games", "Garden", "Grocery",
    "Health", "Home", "Industrial", "Jewelry", "Kids",
    "Movies", "Music", "Outdoors", "Shoes", "Sports",
    "Tools", "Toys"
    ]  # generated with mockaroo

  for name in category_names:
    nombre = name
    descripcion = generate_text(10)
    estado = random.randint(0, 1)
    # sql = f"INSERT INTO categoria(nombre, descripcion, estado) VALUES ({nombre}, {descripcion}, {estado});"
    category_data.append((nombre, descripcion, estado))

  return category_data

In [28]:
# articulos
def generate_articulos():

  product_data = []

  for i in range(200):
    idcategoria = random.randint(1, 22)
    codigo = fake.ean13(prefixes=('45', '55'))
    nombre = fake.word().capitalize()
    precio_venta = round(random.uniform(1.0, 500.0), 2)
    stock = random.randint(0, 1000)
    descripcion = generate_text(10)
    imagen = fake.file_name(category = 'image')
    estado = random.randint(0, 1)

    product_data.append( (idcategoria, codigo, nombre, precio_venta, stock, descripcion, imagen, estado) )

  return product_data

In [26]:
# roles
def generate_roles():

  role_data = []

  roles = ['Administrator', 'Manager', 'Cashier']
  for role in roles:
    nombre = role
    descripcion = generate_text(5)
    estado = random.randint(0, 1)
    role_data.append( (nombre, descripcion, estado) )

  return role_data

In [27]:
# usuarios, 50 en total
def generate_usuarios():

  user_data = []

  for i in range(50):
    nombre = fake.name()
    tipo_documento = random.choice(['DPI', 'Passport'])
    direccion = fake.street_address()
    telefono = fake.phone_number()
    email = fake.email()
    clave = fake.password(length=12, special_chars=True, digits=True, upper_case=True, lower_case=True)
    estado = random.randint(0, 1)

    if i % 25 == 0:
      idrol = 1
    elif i % 10 == 0:
      idrol = 2
    else:
      idrol = 3

    if tipo_documento == 'Passport':
      num_documento = fake.passport_number()
    else:
      num_documento = fake.random_number(digits=13)

    user_data.append( (idrol, nombre, tipo_documento, num_documento, direccion, telefono, email, clave, estado) )

  return user_data

In [16]:
# personas , 100 proveedores y 500 clientes
def generate_personas():

  personas_data = []

  # provedores
  for i in range(100):
    tipo_persona = 'Supplier'
    nombre = fake.name()
    tipo_documento = random.choice(['DPI', 'Passport'])
    direccion = fake.street_address()
    telefono = fake.phone_number()
    email = fake.email()

    if tipo_documento == 'Passport':
      num_documento = fake.passport_number()
    else:
      num_documento = fake.random_number(digits=13)

    personas_data.append( (tipo_persona, nombre, tipo_documento, num_documento, direccion, telefono, email) )

  # clientes
  for i in range(500):
    tipo_persona = 'Client'
    nombre = fake.name()
    tipo_documento = random.choice(['DPI', 'Passport'])
    direccion = fake.street_address()
    telefono = fake.phone_number()
    email = fake.email()

    if tipo_documento == 'Passport':
      num_documento = fake.passport_number()
    else:
      num_documento = fake.random_number(digits=13)

    personas_data.append( (tipo_persona, nombre, tipo_documento, num_documento, direccion, telefono, email) )

  return personas_data

In [17]:
# ingresos
def generate_ingreso():
    ingreso_data = []

    for i in range(150):
        idproveedor = random.randint(1, 100)
        idusuario = random.randint(1, 50)
        tipo_comprobante = random.choice(['Invoice', 'Receipt'])
        serie_comprobante = fake.bothify(text='?').upper()
        num_comprobante = serie_comprobante + fake.bothify(text='######')
        fecha = fake.date_between(start_date = datetime(2020, 1, 1).date(), end_date = datetime(2023, 12, 31).date())
        impuesto = round(random.uniform(0, 15), 2)
        total = round(random.uniform(100, 10000), 2)
        estado = random.choice(['Processed', 'Cancelled', 'Pending'])

        ingreso_data.append((idproveedor, idusuario, tipo_comprobante, serie_comprobante, num_comprobante, fecha, impuesto, total, estado))

    return ingreso_data

In [18]:
# detalle ingresos
def generate_detalle_ingreso():
    detalle_data = []

    for i in range(150):
        idingreso = random.randint(1, 150)
        idarticulo = random.randint(1, 300)
        cantidad = random.randint(1, 2000)
        precio = round(random.uniform(1.0, 1000.0), 2)

        detalle_data.append((idingreso, idarticulo, cantidad, precio))

    return detalle_data

In [19]:
# ventas
def generate_venta():
    venta_data = []

    for i in range(1000):
        idcliente = random.randint(110, 600)
        idusuario = random.randint(1, 50)
        tipo_comprobante = random.choice(['Invoice', 'Receipt'])
        serie_comprobante = fake.bothify(text='?').upper()
        num_comprobante = serie_comprobante + fake.bothify(text='######')
        fecha = fake.date_between(start_date = datetime(2020, 1, 1).date(), end_date = datetime(2023, 12, 31).date())
        impuesto = round(random.uniform(0, 15), 2)
        total = round(random.uniform(100, 10000), 2)
        estado = random.choice(['Processed', 'Cancelled', 'Pending'])

        venta_data.append((idcliente, idusuario, tipo_comprobante, serie_comprobante, num_comprobante, fecha, impuesto, total, estado))

    return venta_data

In [20]:
# detalle ventas

def generate_detalle_venta():
    detalle_venta_data = []

    for i in range(10000):
        idventa = random.randint(1, 1000)
        idarticulo = random.randint(1, 300)
        cantidad = random.randint(1, 10)
        precio = round(random.uniform(1.0, 750.0), 2)
        descuento = round(precio * random.uniform(0.05, 0.25), 2)

        detalle_venta_data.append((idventa, idarticulo, cantidad, precio, descuento))

    return detalle_venta_data

## Generar los datos

In [31]:
# generate data

category_data = generate_categorias()
product_data = generate_articulos()
role_data = generate_roles()
user_data = generate_usuarios()
personas_data = generate_personas()
ingreso_data = generate_ingreso()
detalle_data = generate_detalle_ingreso()
venta_data = generate_venta()
detalle_venta_data = generate_detalle_venta()

## Funcion de Bulk Insert

In [32]:
def bulk_insert(conn, insert_query, data_list):
    cursor = conn.cursor()  
    try:
        # bulk insert
        cursor.executemany(insert_query, data_list)
        conn.commit()
        print("Bulk data inserted successfully")
    except pymysql.Error as error:
        print("Error inserting bulk data", error)
    finally:
        cursor.close()

## Queries de insert

In [33]:
# QUERIES DE INSERTAR PARA MYSQL

insert_MYSQL_categoria = """
    INSERT INTO categoria (nombre, descripcion, estado)
    VALUES (%s, %s, %s)
"""

insert_MYSQL_articulo = """
    INSERT INTO articulo (idcategoria, codigo, nombre, precio_venta, stock, descripcion, imagen, estado)
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
"""

insert_MYSQL_persona = """
    INSERT INTO persona (tipo_persona, nombre, tipo_documento, num_documento, direccion, telefono, email)
    VALUES (%s, %s, %s, %s, %s, %s, %s)
"""

insert_MYSQL_rol = """
    INSERT INTO rol (nombre, descripcion, estado)
    VALUES (%s, %s, %s)
"""

insert_MYSQL_usuario = """
    INSERT INTO usuario (idrol, nombre, tipo_documento, num_documento, direccion, telefono, email, clave, estado)
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
"""

insert_MYSQL_ingreso = """
    INSERT INTO ingreso (idproveedor, idusuario, tipo_comprobante, serie_comprobante, num_comprobante, fecha, impuesto, total, estado)
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
"""

insert_MYSQL_venta = """
    INSERT INTO venta (idcliente, idusuario, tipo_comprobante, serie_comprobante, num_comprobante, fecha, impuesto, total, estado)
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
"""

insert_MYSQL_detalle_venta = """
    INSERT INTO detalle_venta (idventa, idarticulo, cantidad, precio, descuento)
    VALUES (%s, %s, %s, %s, %s)
"""

insert_MYSQL_detalle_ingreso = """
    INSERT INTO detalle_ingreso (idingreso, idarticulo, cantidad, precio)
    VALUES (%s, %s, %s, %s)
"""

## Insertar datos a la DB

In [34]:
bulk_insert(conn, insert_MYSQL_categoria, category_data)
bulk_insert(conn, insert_MYSQL_articulo, product_data)
bulk_insert(conn, insert_MYSQL_rol, role_data)
bulk_insert(conn, insert_MYSQL_usuario, user_data)
bulk_insert(conn, insert_MYSQL_persona, personas_data)
bulk_insert(conn, insert_MYSQL_ingreso, ingreso_data)
bulk_insert(conn, insert_MYSQL_detalle_ingreso, detalle_data)
bulk_insert(conn, insert_MYSQL_venta, venta_data)
bulk_insert(conn, insert_MYSQL_detalle_venta, detalle_venta_data)

Bulk data inserted successfully
Bulk data inserted successfully
Bulk data inserted successfully
Bulk data inserted successfully
Bulk data inserted successfully
Bulk data inserted successfully
Bulk data inserted successfully
Bulk data inserted successfully
Bulk data inserted successfully


## SELECT PARA VERIFICAR

In [None]:
def select_table(table_name):

In [35]:
SELECT_CATEGORIA = "SELECT * FROM categoria;"
SELECT_ARTICULO = "SELECT * FROM articulo;"
SELECT_PERSONA = "SELECT * FROM persona;"
SELECT_ROL = "SELECT * FROM rol;"
SELECT_USUARIO = "SELECT * FROM usuario;"
SELECT_INGRESO = "SELECT * FROM ingreso;"
SELECT_VENTA = "SELECT * FROM venta;"
SELECT_DETALLE_VENTA = "SELECT * FROM detalle_venta;"
SELECT_DETALLE_INGRESO = "SELECT * FROM detalle_ingreso;"

In [42]:
def select_table(conn, ddl_statement):
    try:
        cursor = conn.cursor()
        cursor.execute(ddl_statement)
        results = cursor.fetchall()
        if results:
            print(f"\n{ddl_statement}\n")
            for row in results:
                print(row)
        else:
            print("No hay datos.")
    except pymysql.Error as error:
        print(f"Error: {error}")

In [43]:
select_table(conn, SELECT_CATEGORIA)
select_table(conn, SELECT_ARTICULO)
select_table(conn, SELECT_PERSONA)
select_table(conn, SELECT_ROL)
select_table(conn, SELECT_USUARIO)
select_table(conn, SELECT_INGRESO)
select_table(conn, SELECT_VENTA)
select_table(conn, SELECT_DETALLE_VENTA)
select_table(conn, SELECT_DETALLE_INGRESO)


SELECT * FROM categoria;

(1, 'Automotive', 'bed media employee that student probably tell board another listen', b'\x01')
(2, 'Baby', 'i personal federal investment dinner fine message section responsibility plant', b'\x00')
(3, 'Beauty', 'modern simple drug draw peace today same join these customer', b'\x00')
(4, 'Books', 'sound per money son officer respond its certainly political lawyer', b'\x01')
(5, 'Clothing', 'edge sense need hard one charge break service fill evening', b'\x00')
(6, 'Computers', 'natural investment everybody guy course wall commercial seven before until', b'\x00')
(7, 'Electronics', 'feel company bed next quickly staff despite recent expert season', b'\x00')
(8, 'Games', 'control reduce anything many lot current perform central create scene', b'\x01')
(9, 'Garden', 'just after teach friend week itself or to mean wear', b'\x01')
(10, 'Grocery', 'red truth hand author important have ok old mission issue', b'\x00')
(11, 'Health', 'opportunity exist nice watch fee