<a href="https://colab.research.google.com/github/rubuntu/uaa-417-sistemas-de-gestion-de-bases-de-datos-avanzados/blob/main/12_Classes_and_ORM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%matplotlib inline
import matplotlib
import seaborn as sns
matplotlib.rcParams['savefig.dpi'] = 144

In [None]:
%%capture
!apt-get install postgresql postgresql-contrib
!pip install ipython-sql psycopg2-binary

In [None]:
!service postgresql start
!sudo -u postgres createdb company
!sudo -u postgres psql -c "ALTER USER postgres PASSWORD 'postgres';"

 * Starting PostgreSQL 14 database server
   ...done.
ALTER ROLE


# Ejemplo de Class

Aquí crearemos algunas clases para representar algunos datos, es decir, personas de una empresa. La primera clase que debemos definir es "Persona", que es la clase general que representa a una persona.

In [None]:
class Person(object):
    def __init__(self, name, age, degree, salary):
        self.name = name
        self.age = age
        self.degree = degree
        self.salary = salary

    def describe(self):
        fmt = "{} is {} years old, holds a {}, with a salary of {}"
        return fmt.format(self.name, self.age, self.degree, self.salary)

Ahora bien, un título es algo que probablemente también queramos abstraer, así que vamos a crear una clase de título.

También podemos hacer alguna validación, por lo que podemos usar Mixin aquí.

In [None]:
class Validate(object):
    def _validate(self, ele, allowed):
        if ele not in allowed:
            raise ValueError('{} is not allowed for class {}'.format(ele, type(self)))

In [None]:
class Degree(Validate):
    ALLOWED_LEVELS = ['bachelors', 'masters', 'phd'] #class variable

    def __init__(self, level, area):
        self._validate(level, self.ALLOWED_LEVELS)
        self.level = level
        self.area = area

    def __repr__(self):
        return "{} in {}".format(self.level, self.area)

In [None]:
class Salary(Validate):
    ALLOWED_PERIODS = {'weekly': 52.0,
                       'biweekly': 26.0,
                       'monthly': 12,
                       'quarterly': 4.0,
                       'annually':1.0}
    def __init__(self, amt, pay_period):
        self._validate(pay_period, self.ALLOWED_PERIODS)
        self.amt = amt
        self.period = pay_period

    def __repr__(self):
        return "${:.2f} per year".format(self.amt*self.ALLOWED_PERIODS[self.period])

In [None]:
p = Person('Eric', 25, Degree('masters', 'physics'), Salary(200, 'biweekly'))
p.describe()

'Eric is 25 years old, holds a masters in physics, with a salary of $5200.00 per year'

In [None]:
class Employee(Person):
    def __init__(self, idn, production, name, age, degree, salary):
        super(Employee, self).__init__(name, age, degree, salary)
        self.id = idn
        self.production = production

    def __repr__(self):
         return "ID: {}, name: {}".format(self.id, self.name)

class Manager(Employee):
    def __init__(self, capacity, idn, production, name, age, degree, salary):
        super(Manager, self).__init__(idn, production, name, age, degree, salary)
        self.capacity = capacity

    def can_manage(self, n):
        return n <= self.capacity

In [None]:
class Group(object):
    def __init__(self):
        self.members = {}

    def add_member(self, member):
        self.members[member.id] = member

    def remove_member(self, member):
        del self.members[member.id]

    def get_member_ids(self):
        return self.members.keys()

In [None]:
class Team(Group):
    def __init__(self,idn, name, desc):
        self.name = name
        self.id = idn
        self.desc = desc

        self.manager = None
        super(Team, self).__init__()

    def add_manager(self, manager):
        if self.manager is None:
            self.manager = manager.id
            self.add_member(manager)
        else:
            raise ValueError("can only have a single manager")

    def remove_manager(self):
        if self.manager is None:
            raise ValueError("cannot remove that which does not exist")
        else:
            del self.members[self.manager]
            self.manager = None

    def check_health(self):
        num_members = len(self.members)
        if num_members > 0 and self.manager is None:
            return False, "no manager for employees"
        elif self.manager is not None:
            if self.members[self.manager].can_manage(num_members - 1): # don't count self
                return True, ""
            else:
                return False, "too many employees for manager"
        else:
            return True, ""

    def production(self):
        return sum([i.production for i in self.members.values()])

    def describe(self):
        return "team {} has {} members and produces ${}".format(self.name, len(self.members), self.production())

    def __repr__(self):
        return "ID: {}, Name: {}".format(self.id, self.name)

In [None]:
from collections import Counter
class Company(Group):
    def __init__(self, name):
        self.name = name
        super(Company, self).__init__()

    def production(self):
        return sum([i.production() for i in self.members.values()])

    def _single_assign(self):
        """check that each person is only on a single team"""
        ids = [j for i in self.members.values() for j in i.get_member_ids()]
        c = Counter(ids)
        return c.most_common(1)[0][1] == 1

    def check_health(self):
        problems = []
        if not self._single_assign():
            problems.append("employee assigned to more than one team")
        for k, v in self.members.items():
            health, reason = v.check_health()
            if not health:
                problem = "{} is not healthy because {}".format(v.name, reason)
                problems.append(problem)
        return problems

    def describe(self):
        problems = self.check_health()
        if not problems: #truthyness of lists
            print("{} is healthy and has production of ${}".format(self.name, self.production()))
            for k, team in self.members.items():
                print("  " + team.describe())

```mermaid
classDiagram
    class Company {
      -name
      +production()
      +check_health()
    }
    class Group {
      -members
      +add_member()
      +remove_member()
      +get_member_ids()
    }
    class Team {
      -name
      -id
      -desc
      -manager
      +add_manager()
      +remove_manager()
      +check_health()
      +production()
      +describe()
    }
    class Employee {
      -id
      -production
      +__repr__()
    }
    class Manager {
      -capacity
      +can_manage()
    }
    class Person {
      -name
      -age
      -degree
      -salary
      +describe()
    }
    class Validate {
      +_validate(ele, allowed)
    }
    class Degree {
      -level
      -area
      +__repr__()
    }
    class Salary {
      -amt
      -period
      +__repr__()
    }

    Company --> Group
    Group <|-- Team
    Team --> Manager
    Team --> Employee
    Person --> Employee
    Employee <|-- Manager
    Person --> Degree
    Person --> Salary
    Validate --> Degree
    Validate --> Salary
```

A menudo, estos datos se pueden asignar de forma bastante sencilla a una base de datos. Aquí describiremos una empresa

In [None]:
# Salaries
low_salary = Salary(1800, 'biweekly')
mid_salary = Salary(5600, 'biweekly')
high_salary = Salary(100000, 'annually')

# Degrees
b_business = Degree('bachelors', 'business')
m_math = Degree('masters', 'mathematics')
b_finance = Degree('bachelors', 'finance')
b_physics = Degree('bachelors', 'physics')
p_math = Degree('phd', 'mathematics')

# Employees
fred = Employee(1, 120000, "Fred", 28, low_salary, b_business)
amy = Employee(2, 136000, "Amy", 29, low_salary, b_finance)
erica = Employee(5, 140000, "Erica", 47, mid_salary, b_physics)
jerry = Employee(7, 165000, "Jerry", 45, mid_salary, b_business)
sam = Employee(8, 127000, "Sam", 35, low_salary, b_finance)
danny = Employee(9, 128000, "Danny", 32, low_salary, m_math)
carrie = Employee(10, 120000, "Carrie", 39, low_salary, m_math)

# Managers
sally = Manager(5, 4, 131000, "Sally", 50, mid_salary, b_business)
arnold = Manager(10, 3, 125000, "Arnold", 60, mid_salary, m_math)
molly = Manager(10, 6, 180000, "Molly", 62, high_salary, p_math)

# Teams
tech = Team(1, 'tech', 'handles technical duties')
sales = Team(2, 'sales', 'handles sales duties')
executive = Team(3, 'executive', 'makes large business decisions')

for emp in [fred, amy, carrie]:
    tech.add_member(emp)

tech.add_manager(sally)

for emp in [erica, jerry, sam, danny]:
    sales.add_member(emp)

sales.add_manager(arnold)

executive.add_manager(molly)

# Company
company = Company("the-company")
for team in [tech, sales, executive]:
    company.add_member(team)

In [None]:
company.describe()

the-company is healthy and has production of $1372000
  team tech has 4 members and produces $507000
  team sales has 5 members and produces $685000
  team executive has 1 members and produces $180000


In [None]:
company.members

{1: ID: 1, Name: tech, 2: ID: 2, Name: sales, 3: ID: 3, Name: executive}

In [None]:
company.members[1].members[10]

ID: 10, name: Carrie

## Mapeo de relación de objetos (ORM)

Existe un paralelismo entre las clases interrelacionadas que hemos definido y las tablas SQL. Así como teníamos una clase `Salario`, una clase `Título`, una clase `Empleado`, etc., podríamos imaginar una empresa con una tabla `Salarios`, una tabla `Títulos` y una tabla `Empleados`. Cada fila de una de estas tablas sería similar a un único objeto de ese tipo (es decir, una instancia de una clase).

Por lo tanto, podemos imaginar un _mapeo_ entre filas de una tabla SQL y objetos Python. Existen herramientas que concretan este mapeo, llamadas herramientas de mapeo de relación de objetos (ORM). Estas herramientas pueden conectarse a bases de datos y nos permiten tratar las relaciones en la base de datos como objetos Python. Esto puede ser muy útil para anotar relaciones SQL con constantes globales (como una fecha importante) y funciones de rutina para el análisis. También permite una integración perfecta de las relaciones de la base de datos en aplicaciones como objetos Python completos.

Utilizaremos una herramienta ORM llamada `SQLAlchemy` para construir una base de datos a partir de nuestro ejemplo anterior, completa no solo con los atributos de cada objeto sino también con las funciones de informes como `check_health()`.

In [None]:
import sqlalchemy as sql # this is not conventional

Seguiremos la analogía de clases y tablas; para crear una tabla en nuestra base de datos, declaramos una clase. Nuestras clases heredarán de una clase base especial que se conecta a `SQLAlchemy`. **Tenga cuidado**, hay mucho que hacer en estas definiciones de clase y se verán diferentes de otras definiciones de clase para admitir operaciones SQL. Si observamos a continuación, notaremos varios patrones:

1. Todas las clases heredan de Base, posiblemente a través de otras clases
1. Todas las clases están asociadas con una tabla SQL a través de una variable `__tablename__`
1. No existe un método `__init__` para inicializar atributos; en su lugar, los atributos se declaran como variables del tipo `sql.Column`
1. Los atributos están tipificados; a continuación, usamos los tipos `sql.Integer` y `sql.String`
1. Todas las clases contienen al menos una `primary_key`
1. Las relaciones entre clases/tablas se logran mediante el uso de `sql.orm.relationship` y `sql.ForeignKey`

Algunas relaciones se gestionan por herencia. Al igual que antes, dado que `Manager` hereda de `Employee`, no necesitamos agregar columnas para producción, nombre, salario, etc. A nivel de SQL, hay una tabla `managers` con columnas `idn` y `capacity` que se une a la tabla `employees` (por `idn`) cada vez que queremos saber el nombre de un gerente (o producción, o salario, etc.). Esto ayuda al backend de SQL a seguir buenos principios de diseño de bases de datos.

In [None]:
from sqlalchemy import Column, Integer, String, ForeignKey
from sqlalchemy.orm import relationship, declarative_base
from sqlalchemy.ext.hybrid import hybrid_property, hybrid_method

Base = declarative_base()

class Team(Base):
    __tablename__ = 'teams'

    idn = Column(Integer, primary_key=True)
    name = Column(String)

    members = relationship(
        'Employee',
        back_populates='team',
        cascade='all, delete-orphan')

    @hybrid_property
    def manager(self):
        for member in self.members:
            if isinstance(member, Manager):
                return member
        return None

    @hybrid_property
    def healthy(self):
        num_members = len(self.members)
        if len(self.members) > 0 and self.manager is None:
            return False
        elif self.manager is not None:
            if self.manager.can_manage(num_members - 1):
                return True
            else:
                return False
        else:
            return True

    @hybrid_property
    def production(self):
        return sum(member.production for member in self.members)

    def __repr__(self):
        return f'Team(idn={self.idn}, name={self.name})'

class Employee(Base):
    __tablename__ = 'employees'

    idn = Column(Integer, primary_key=True)
    name = Column(String)
    age = Column(Integer)
    production = Column(Integer)
    team_idn = Column(ForeignKey('teams.idn'))
    salary_amt = Column(Integer)
    salary_period = Column(Integer)
    degree_level = Column(String)
    degree_area = Column(String)
    employee_type = Column(String)

    team = relationship('Team', back_populates='members')

    __mapper_args__ = {
        'polymorphic_identity': 'employee',
        'polymorphic_on': employee_type
    }

    @hybrid_property
    def salary(self):
        return self.salary_amt * self.salary_period

    def __repr__(self):
        return f'Employee(idn={self.idn}, name={self.name}, age={self.age}, production={self.production}, team_idn={self.team_idn}, salary_amt={self.salary_amt}, salary_period={self.salary_period}, degree_level={self.degree_level}, degree_area={self.degree_area}, employee_type={self.employee_type})'

class Manager(Employee):
    __tablename__ = 'managers'

    idn = Column(ForeignKey('employees.idn'), primary_key=True)
    capacity = Column(Integer)

    __mapper_args__ = {
        'polymorphic_identity': 'manager',
    }

    @hybrid_method
    def can_manage(self, n):
        return n <= self.capacity

    def __repr__(self):
        return f'Employee(idn={self.idn}, age={self.age}, production={self.production}, team_idn={self.team_idn}, employee_type={self.employee_type}, capacity={self.capacity})'


```mermaid
erDiagram
    TEAM {
        int idn PK
        string name
    }
    
    EMPLOYEE {
        int idn PK
        string name
        int age
        int production
        int team_idn FK
        int salary_amt
        int salary_period
        string degree_level
        string degree_area
        string employee_type
    }

    MANAGER {
        int idn PK
        int emp_idn FK
        int capacity
    }

    TEAM ||--o{ EMPLOYEE : "has members"
    EMPLOYEE ||--|| TEAM : "belongs to"
    MANAGER ||--|| EMPLOYEE : "is a"
```

Los decoradores `@hybrid_method` y `@hybrid_attribute` son donde `SQLAlchemy` comienza a ayudarnos. A menudo nos interesará alguna combinación de atributos en una tabla. Por ejemplo, podríamos querer saber el salario anual de un empleado. Podemos calcularlo como su salario por período de pago multiplicado por la cantidad de períodos de pago en el año. En lugar de almacenar el salario anual en la tabla (lo que sería redundante), podemos indicarle a `SQLAlchemy` que calcule esto sobre la marcha y lo trate como un atributo. De esta manera, puedo interactuar con los atributos de mis objetos de la misma manera, ya sea que estén almacenados en la base de datos subyacente o no. Esto puede ser particularmente útil cuando el atributo tiene una definición compleja.

Veamos `SQLAlchemy` en acción. Crearemos un `engine` que administrará las conexiones de la base de datos. La palabra clave `echo=True` nos mostrará todo el SQL que se ejecuta mientras interactuamos con `SQLAlchemy`.

In [None]:
import psycopg2
from sqlalchemy import create_engine

#engine = sql.create_engine('sqlite:///company.db', echo=True)
# Configurar la conexión a PostgreSQL
DATABASE_TYPE = 'postgresql'
DBAPI = 'psycopg2'
ENDPOINT = 'localhost'  # Usualmente localhost
USER = 'postgres'
PASSWORD = 'postgres'
PORT = 5432  # El puerto de PostgreSQL por defecto
DATABASE = 'company'

# Crear una cadena de conexión para SQLAlchemy
engine = create_engine(f"{DATABASE_TYPE}+{DBAPI}://{USER}:{PASSWORD}@{ENDPOINT}:{PORT}/{DATABASE}")


Luego prepararemos la base de datos creando todas las tablas que heredan de `Base`.

In [None]:
Base.metadata.create_all(engine)

Crearemos un objeto `Session` que realiza el mapeo objeto-relación, es decir, el objeto `Session` es responsable de traducir nuestras acciones con los objetos que definimos anteriormente en SQL, que luego se pueden pasar a la base de datos a través del `engine`.

In [None]:
session = sql.orm.Session(engine)

Por último, creemos nuestros datos y agreguémoslos a la base de datos.

In [None]:
# Salaries
periods = {'weekly': 52.0,
           'biweekly': 26.0,
           'monthly': 12,
           'quarterly': 4.0,
           'annually':1.0}

low_salary = dict(salary_amt=1800, salary_period=periods['biweekly'])
mid_salary = dict(salary_amt=5600, salary_period=periods['biweekly'])
high_salary = dict(salary_amt=100000, salary_period=periods['annually'])

# Degrees
b_business = dict(degree_level='bachelors', degree_area='business')
m_math = dict(degree_level='masters', degree_area='mathematics')
b_finance = dict(degree_level='bachelors', degree_area='finance')
b_physics = dict(degree_level='bachelors', degree_area='physics')
p_math = dict(degree_level='phd', degree_area='mathematics')

In [None]:
def cat_dict(*args):
    all_keys = [key for dictionary in args for key in dictionary.keys()]
    if len(set(all_keys)) < len(all_keys):
        raise ValueError("Keys are not unique!")
    return {k: v for dictionary in args for k, v in dictionary.items()}

In [None]:
# Employees
fred = Employee(name='Fred', production=120000, age=28, **cat_dict(low_salary, b_business))
amy = Employee(name='Amy', production=136000, age=29, **cat_dict(low_salary, b_finance))
erica = Employee(name='Erica', production=140000, age=47, **cat_dict(mid_salary, b_physics))
jerry = Employee(name='Jerry', production=165000, age=45, **cat_dict(mid_salary, b_business))
sam = Employee(name='Sam', production=127000, age=35, **cat_dict(low_salary, b_finance))
danny = Employee(name='Danny', production=128000, age=32, **cat_dict(low_salary, m_math))
carrie = Employee(name='Carrie', production=120000, age=39, **cat_dict(low_salary, m_math))

# Managers
sally = Manager(capacity=5, production=131000, name="Sally", age=50, **cat_dict(mid_salary, b_business))
arnold = Manager(capacity=10, production=125000, name="Arnold", age=60, **cat_dict(mid_salary, m_math))
molly = Manager(capacity=10, production=180000, name="Molly", age=62, **cat_dict(high_salary, p_math))

# Teams
tech = Team(name='tech', members=[fred, amy, carrie, sally])
sales = Team(name='sales', members=[erica, jerry, sam, danny, arnold])
executive = Team(name='executive', members=[molly])

# add the teams to the database; this also adds all attached objects
session.add(tech)
session.add(sales)
session.add(executive)

# commit the changes
session.commit()

In [None]:
# we can still interact with the objects we've created as Python objects...
tech.members

[Employee(idn=1, name=Fred, age=28, production=120000, team_idn=1, salary_amt=1800, salary_period=26, degree_level=bachelors, degree_area=business, employee_type=employee),
 Employee(idn=2, name=Amy, age=29, production=136000, team_idn=1, salary_amt=1800, salary_period=26, degree_level=bachelors, degree_area=finance, employee_type=employee),
 Employee(idn=3, name=Carrie, age=39, production=120000, team_idn=1, salary_amt=1800, salary_period=26, degree_level=masters, degree_area=mathematics, employee_type=employee),
 Employee(idn=4, age=50, production=131000, team_idn=1, employee_type=manager, capacity=5)]

In [None]:
# or as SQL relations via the session
q = session.query(Team).filter(Team.name=='tech')

for team in q:
    for member in team.members:
        print(member)

Employee(idn=1, name=Fred, age=28, production=120000, team_idn=1, salary_amt=1800, salary_period=26, degree_level=bachelors, degree_area=business, employee_type=employee)
Employee(idn=2, name=Amy, age=29, production=136000, team_idn=1, salary_amt=1800, salary_period=26, degree_level=bachelors, degree_area=finance, employee_type=employee)
Employee(idn=3, name=Carrie, age=39, production=120000, team_idn=1, salary_amt=1800, salary_period=26, degree_level=masters, degree_area=mathematics, employee_type=employee)
Employee(idn=4, age=50, production=131000, team_idn=1, employee_type=manager, capacity=5)


In [None]:
# a hybrid_attribute behaves just like a regular attribute
# even though it is not actually stored in the database or the object
print(tech.production)
print(tech.healthy)

507000
True


Utilizando el método `query` de nuestra sesión, podemos realizar todas nuestras operaciones SQL habituales: filtrar, unir, agregar, etc.

In [None]:
a_employees = session.query(Employee).filter(Employee.name.like('A%'))
for a in a_employees:
    print(a)

Employee(idn=2, name=Amy, age=29, production=136000, team_idn=1, salary_amt=1800, salary_period=26, degree_level=bachelors, degree_area=finance, employee_type=employee)
Employee(idn=9, age=60, production=125000, team_idn=2, employee_type=manager, capacity=10)


In [None]:
for total in session.query(Team.name, sql.func.sum(Employee.salary)).join(Employee.team).group_by(Team.name):
    print(total)

('executive', 100000)
('tech', 286000)
('sales', 530400)
