# database
> Using Pydantic's BaseSettings object to manage SQLAlchemy Database engines.

In [None]:
#| default_exp database

In [None]:
#| exporti

from humble_database.utils import delegates

## SQL Alchemy version
import sqlalchemy
if int(sqlalchemy.__version__[0]) < 2:
    from sqlalchemy.engine.url import URL
    from sqlalchemy.engine import create_engine,Engine

else:
    from sqlalchemy import URL
    from sqlalchemy import create_engine, Engine
from snowflake.sqlalchemy import URL as SnowflakeURL
from sqlalchemy.orm import Session
from pydantic import SecretStr
from pydantic_settings import BaseSettings, SettingsConfigDict
from typing import Optional,Union
from abc import ABC, abstractproperty,abstractmethod
from contextlib import contextmanager
import pandas as pd
from sqlalchemy import text

ImportError: dlopen(/Users/schlinkertc/opt/anaconda3/envs/humble-database/lib/python3.11/site-packages/cryptography/hazmat/bindings/_rust.abi3.so, 0x0002): tried: '/Users/schlinkertc/opt/anaconda3/envs/humble-database/lib/python3.11/site-packages/cryptography/hazmat/bindings/_rust.abi3.so' (mach-o file, but is an incompatible architecture (have 'arm64', need 'x86_64')), '/System/Volumes/Preboot/Cryptexes/OS/Users/schlinkertc/opt/anaconda3/envs/humble-database/lib/python3.11/site-packages/cryptography/hazmat/bindings/_rust.abi3.so' (no such file), '/Users/schlinkertc/opt/anaconda3/envs/humble-database/lib/python3.11/site-packages/cryptography/hazmat/bindings/_rust.abi3.so' (mach-o file, but is an incompatible architecture (have 'arm64', need 'x86_64'))

In [None]:
#| hide 

from nbdev.showdoc import show_doc
import os

## SQL Alchemy Connection
> To create our Database connections, we will use SQL Alchemy's `URL` object

The following is from the SQL Alchemy fucntional documentation:

In [None]:
show_doc(URL.create)

## Database Settings
> A Pydantic model that controls argumetns passed to the SQL alchemy URL constructor

In [None]:
#| exporti

class DatabaseSettingsBase(BaseSettings):
    username: Optional[str]=None
    password: Optional[SecretStr]=None

In [None]:
#| exporti

class DatabaseSettings(DatabaseSettingsBase):
    drivername:str 
    host: Optional[str]=None
    port: Optional[int]=None
    database: Optional[str]=None
    query: dict[str,str]={}
    

In [None]:
#| exporti 

class SnowflakeSettings(DatabaseSettingsBase):

    """Passed to special Snowflake URL constructor"""
    model_config = SettingsConfigDict(env_prefix='snowflake_',env_file = '.env')
    
    account: str = 'rentprogress'
    database: Optional[str] = 'TRANSFORM_PROD'
    snowflake_schema: Optional[str] = Field(
        'EDW',
        env='snowflake_schema',
        alias='schema',
        description="""passed to the SQLAlchemy URL constructor as 'schema', 
        but we need to alias it to avoid clashing with 'BaseSettings'""")
    warehouse: Optional[str] = 'ADHOC_WH'
    role: Optional[str] = 'DATA_DEVELOPER'
    numpy: Optional[bool] = True

    

In [None]:
settings = DatabaseSettings(
    drivername='sqlite',
    database='test.db',
)
settings

In [None]:
url = URL.create(
    **settings.model_dump()
)
engine = create_engine(url)
engine

#### Why use Pydantic for Database Settings? 
> Pydantic's `BaseSettings` object comes with support for environment variables and secret strings

In [None]:
# set some environment variables
os.environ['username']='humble_chuck'
os.environ['password']='super secret passkey'

# pydantic automatically reads them
settings = DatabaseSettings(drivername='mysql')
assert settings.username == 'humble_chuck'

# pydantic also keeps your password safe from displays
print(f"password prints as a secret: {settings.password}")

# by default, __init__ values take precedence over environment variables
alternate_settings = DatabaseSettings(drivername='mysql',username='some_other_person')
assert alternate_settings.username == 'some_other_person'

for k in ('username','password'):
    os.environ.pop(k)

In [None]:
#| export

class AbstractDatabaseClass(ABC):

    """
    Abstract Base Class used to define methods for all Database Connections. 

    A seperate Base Class is required because Snowflake Database Settings will behave differently, and thus they will need their own Database Settings class.
    """

    def query_to_records(
        self,
        query_string:str,
    ):
        with self._engine.connect() as conn:
            results = [row for row in conn.execute(text(query_string)).mappings()]
        return results

    @delegates(pd.read_sql_query)
    def query_to_df(
        self,
        query_string,
        **kwargs
    ):
        f"""{pd.read_sql_query.__doc__}"""
        with self._engine.connect() as conn:
            df = pd.read_sql_query(query_string,conn,**kwargs)
        return df

    @contextmanager
    def engine_scope(self,**kwargs):
        with self._engine.connect() as conn: 
            try:
                yield conn
                conn.commit()
            except:
                conn.rollback()
                raise
            finally:
                conn.close()
                
    @contextmanager
    def session_scope(self,bind=None,**kwargs):
        """Provide a transactional scope around a series of operations."""

        session = Session(bind=self._engine,**kwargs)
        try:
            yield session
            session.commit()
        except:
            session.rollback()
            raise
        finally:
            session.close()


In [None]:
#| export 

class Database(DatabaseSettings,AbstractDatabaseClass):
    """
    Create a Database connection with default functionality. 
    
    Inherits attributes from `DatabaseSettings` to manage credentials. Inherits methods from `AbstractDatabaseClass`.

    Use the class as-is to quickly create a database connection, or create a subclass to control connections to a specific database.
    
    More on SQL Alchemy engines from [SQLAlchemy docs](https://docs.sqlalchemy.org/en/13/core/engines.html):
    
    - The Engine is the starting point for any SQLAlchemy application. It’s “home base” for the actual database and its DBAPI.  
    - An Engine references both a Dialect and a Pool, which together interpret the DBAPI’s module functions as well as the behavior of the database 
    - Pool object which will establish a DBAPI connection at localhost:5432 when a connection request is first received  
    -- Note that the Engine and its underlying Pool do **not** establish the first actual DBAPI connection until the Engine.connect() method is called, or an operation which is dependent on this method such as Engine.execute() is invoked.
    - In this way, Engine and Pool can be said to have a lazy initialization behavior.  
    - The Engine, once created, can either be used directly to interact with the database, or can be passed to a Session object to work with the ORM.  
    """
    _engine:Engine = None
    _engine_url:URL = None

    def __init__(
        self,
        **kwargs
    ):
        # settings __init__
        super().__init__(**kwargs)
        if hasattr(self.password,'get_secret_value'):
            
            password = self.password.get_secret_value()
            
        else:
            password = self.password
        url = URL.create(
            drivername=self.drivername,
            username=self.username,
            password=password,
            host=self.host,
            port=self.port,
            database=self.database,
            query=self.query
        )
        self._engine_url=url
        self._engine=create_engine(url)

    
    model_config = SettingsConfigDict(
        #allows for attributes of `database settings` to be set as defaults in subclasses without type annotation
        ignored_types=(int,str,dict),
        arbitrary_types_allowed=True
    )
    

In [None]:
#| export 

class Snowflake(SnowflakeSettings,AbstractDatabaseClass):
    """
    Create a Database connection with default functionality. 
    
    Inherits attributes from `DatabaseSettings` to manage credentials. Inherits methods from `AbstractDatabaseClass`.

    Use the class as-is to quickly create a database connection, or create a subclass to control connections to a specific database.
    
    More on SQL Alchemy engines from [SQLAlchemy docs](https://docs.sqlalchemy.org/en/13/core/engines.html):
    
    - The Engine is the starting point for any SQLAlchemy application. It’s “home base” for the actual database and its DBAPI.  
    - An Engine references both a Dialect and a Pool, which together interpret the DBAPI’s module functions as well as the behavior of the database 
    - Pool object which will establish a DBAPI connection at localhost:5432 when a connection request is first received  
    -- Note that the Engine and its underlying Pool do **not** establish the first actual DBAPI connection until the Engine.connect() method is called, or an operation which is dependent on this method such as Engine.execute() is invoked.
    - In this way, Engine and Pool can be said to have a lazy initialization behavior.  
    - The Engine, once created, can either be used directly to interact with the database, or can be passed to a Session object to work with the ORM.  
    """
    _engine:Engine = None
    _engine_url:URL = None

    def __init__(
        self,
        **kwargs
    ):
        # settings __init__
        super().__init__(**kwargs)
        if hasattr(self.password,'get_secret_value'):
            
            password = self.password.get_secret_value()
            
        else:
            password = self.password
        url = SnowflakeURL(
            user=self.username,
            password=password,
            account=self.account,
            warehouse=self.warehouse,
            database=self.database,
            schema = self.snowflake_schema,
            numpy=self.numpy
        )
        self._engine_url=url
        self._engine=create_engine(url)

    
    model_config = SettingsConfigDict(
        #allows for attributes of `database settings` to be set as defaults in subclasses without type annotation
        ignored_types=(int,str,dict),
        arbitrary_types_allowed=True,
        
    )

## Examples:

To use the `Database` as-is, pass the desired variables to the constructor: 

In [None]:
db = Database(drivername='sqlite',database='test.db')
print(db)

users = pd.DataFrame({
    'id':[1,2,3],
    'user':['larry','moe','curly']
})

with db.engine_scope() as conn:
    users.to_sql('users',conn,if_exists='replace',index=False)

queried = db.query_to_df("select * from users")

assert queried.equals(users)
os.remove('test.db')

### Create a sub-class to handle connections to a specific database

In [None]:
class RNACentralDatabase(Database):
    model_config = SettingsConfigDict(env_prefix='rna_db_')
    
    drivername='postgresql'
    database='pfmegrnargs'
    host='hh-pgsql-public.ebi.ac.uk'
    port=5432
    

os.environ['rna_db_username']='reader'
os.environ['rna_db_password']='NWDMCE5xdipIjRrp'

rna_db = RNACentralDatabase()
rna_db

In [None]:
rna_db.query_to_df("""select * from rnc_database limit 5""")

In [None]:
rna_db.query_to_records("select id,timestamp from rnc_database limit 1")[0]

In [None]:
with rna_db.engine_scope() as conn: 
    result = conn.execute(text("""select * from rnc_database limit 5"""))
    print(result.fetchall()[0])

In [None]:
#| hide

!nbdev_export