In [None]:
# default_exp core

# Data Dev Tools

> API details.

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#export

from sqlalchemy import create_engine
import hashlib
import os.path
import pandas as pd

class Database:
    # http://docs.sqlalchemy.org/en/latest/core/engines.html
    """
    Reference Object for Sql Alchemy Engine 
    
    dbtype: one of ['sqlite','pymysql','snowflake']
    kwargs:
        sqlite 
            - path=<path-to-database file>
        pymysql
            - username,password,hostname,database
        snowflake
            - username,password,account,database,schema,warehouse,role
        
    """
    DB_ENGINES = {
        'sqlite': 'sqlite:////{path}',
        'pymysql': 'mysql+pymysql://{username}:{password}@{hostname}/{database}?charset=UTF8MB4',
        'snowflake':"snowflake://{username}:{password}@{account}/{database}/{schema}?warehouse={warehouse}&role={role}",
    }

    # Main DB Connection Ref Obj
    engine = None
    def __init__(self, dbtype,Base=None,Meta=None,**kwargs):
        dbtype = dbtype.lower()
        if dbtype in self.DB_ENGINES.keys():
            engine_url = self.DB_ENGINES[dbtype].format(**kwargs)
            self.engine = create_engine(engine_url)
            print(self.engine)
            
            if Base:
                self.base = Base
            if Meta:
                self.meta = Meta
        else:
            print("DBType is not found in DB_ENGINE")

In [None]:
#hide
from dotenv import load_dotenv
from os import environ
load_dotenv('.env')

In [None]:
mlb_db = Database(
    dbtype='pymysql',
    username=environ.get('db_user'),
    password=environ.get('db_password'),
    hostname=environ.get('db_host'),
    database='MLB'
)

Engine(mysql+pymysql://admin:***@mydatabase.cjk1vmqlqaty.us-east-2.rds.amazonaws.com/MLB?charset=UTF8MB4)


In [None]:
mlb_db.engine.table_names()

['actions',
 'credits',
 'game',
 'game_players',
 'matchups',
 'movements',
 'pitches',
 'players',
 'plays',
 'team_records',
 'teams',
 'venue']

In [None]:
#export 

def cached_query(self,sql_query, parameters=None):
    """
    Method to query data from `Database` Ref Obj and return pandas dataframe
    Parameters
    ----------
    sql_query : str
        saved SQL query
    parameters : dict, optional
        populates named placeholders in query template. 

    Returns
    -------
    df_raw : DataFrame
        Pandas DataFrame with raw data resulting from query
    """
    if parameters:
        sql_query = sql_query.format(**parameters)
    
    # Hash the query
    query_hash = hashlib.sha1(sql_query.encode()).hexdigest()

    # Create the filepath
    file_path = os.path.join("_cache","{}.csv".format(query_hash))

    # Read the file or execute query 
    if os.path.exists(file_path):
        df_raw = pd.read_csv(file_path)
    else:
        with self.engine.connect() as connection:
            try:
                df_raw = pd.read_sql(sql_query, con=connection, params=parameters)
            except (KeyboardInterrupt, SystemExit):
                connecton.close()
            finally:
                connection.close()
        if not os.path.isdir("_cache"):
            os.makedirs("_cache")
        df_raw.to_csv(file_path, index=False)

    return df_raw
Database.cached_query = cached_query

In [None]:
show_doc(cached_query)

<h4 id="cached_query" class="doc_header"><code>cached_query</code><a href="__main__.py#L3" class="source_link" style="float:right">[source]</a></h4>

> <code>cached_query</code>(**`sql_query`**, **`parameters`**=*`None`*)

Method to query data from `Database` Ref Obj and return pandas dataframe
Parameters
----------
sql_query : str
    saved SQL query
parameters : dict, optional
    populates named placeholders in query template. 

Returns
-------
df_raw : DataFrame
    Pandas DataFrame with raw data resulting from query

In [None]:
example_query = "SELECT * FROM pitches {WHERE} limit 100;"

mlb_db.cached_query(
    example_query,parameters={"WHERE":"WHERE details_call_code = 'F'"})

Unnamed: 0,gamePk,atBatIndex,playEndTime,index,details_call_code,details_call_description,details_description,details_code,details_ballColor,details_trailColor,...,hitData_totalDistance,hitData_trajectory,hitData_hardness,hitData_location,hitData_coordinates_coordX,hitData_coordinates_coordY,details_runnerGoing,reviewDetails_isOverturned,reviewDetails_reviewType,reviewDetails_challengeTeamId
0,563411,0,2018-10-29T00:17:56.000Z,0,F,Strike - Foul,Foul,F,"rgba(170, 21, 11, 1.0)","rgba(188, 0, 33, 1.0)",...,,,,,,,,,,
1,563411,1,2018-10-29T00:19:24.000Z,1,F,Strike - Foul,Foul,F,"rgba(170, 21, 11, 1.0)","rgba(0, 34, 255, 1.0)",...,,,,,,,,,,
2,563411,3,2018-10-29T00:21:42.000Z,1,F,Strike - Foul,Foul,F,"rgba(170, 21, 11, 1.0)","rgba(188, 0, 33, 1.0)",...,,,,,,,,,,
3,563411,6,2018-10-29T00:29:58.000Z,0,F,Strike - Foul,Foul,F,"rgba(170, 21, 11, 1.0)","rgba(188, 0, 33, 1.0)",...,,,,,,,,,,
4,563411,8,2018-10-29T00:35:55.000Z,5,F,Strike - Foul,Foul,F,"rgba(170, 21, 11, 1.0)","rgba(187, 0, 69, 1.0)",...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,564723,74,2019-03-05T23:01:25.139Z,0,F,Foul,Foul,F,"rgba(170, 21, 11, 1.0)",,...,,,,,,,,,,
96,564724,3,2019-03-07T20:21:15.022Z,1,F,Foul,Foul,F,"rgba(170, 21, 11, 1.0)",,...,,,,,,,,,,
97,564725,34,2019-03-08T21:48:24.514Z,0,F,Foul,Foul,F,"rgba(170, 21, 11, 1.0)",,...,,,,,,,,,,
98,564725,34,2019-03-08T21:48:24.514Z,1,F,Foul,Foul,F,"rgba(170, 21, 11, 1.0)",,...,,,,,,,,,,


In [None]:
q = "select distinct(details_call_description) result, count(*) as count from pitches group by details_call_description"
mlb_db.cached_query(q)

Unnamed: 0,result,count
0,Strike - Foul,44
1,Hit Into Play - Out(s),34
2,Strike - Called,42
3,Hit Into Play - No Out(s),6
4,Hit Into Play - Run(s),5
5,Ball - Called,74
6,Strike - Swinging,27
7,Ball - Ball In Dirt,2
8,Strike - Swinging Blocked,2
9,"In play, out(s)",96867


In [None]:
#hide
from nbdev.export import notebook2script; notebook2script()

Converted 00_core.ipynb.
Converted index.ipynb.
