In [1]:
from datetime import datetime, timedelta
import os
import uuid
from typing import List

import awswrangler as wr
import requests
from bs4 import BeautifulSoup
import pandas as pd
import psycopg2
from sqlalchemy import exc, create_engine

yesterday = datetime.now() - timedelta(days=1)

In [2]:
def sql_connection(rds_schema: str):
    """
    SQL Connection function connecting to my postgres db with schema = nba_source where initial data in ELT lands.
    Args:
        rds_schema (str): The Schema in the DB to connect to.
    Returns:
        SQL Connection variable to a specified schema in my PostgreSQL DB
    """
    RDS_USER = os.environ.get("RDS_USER")
    RDS_PW = os.environ.get("RDS_PW")
    RDS_IP = os.environ.get("IP")
    RDS_DB = os.environ.get("RDS_DB")
    try:
        connection = create_engine(
            f"postgresql+psycopg2://{RDS_USER}:{RDS_PW}@{RDS_IP}:5432/{RDS_DB}",
            connect_args={"options": f"-csearch_path={rds_schema}"},
            # defining schema to connect to
            echo=False,
        )
        print(f"SQL Connection to schema: {rds_schema} Successful")
        return connection
    except exc.SQLAlchemyError as e:
        return e

conn = sql_connection(rds_schema='nba_source')

SQL Connection to schema: nba_source Successful


In [4]:
# wrap timestamp value in single quote for sql
df_count2 = pd.read_sql_query(f"select count(*) from aws_reddit_comment_data_source where scrape_ts >= '{yesterday}';", conn)
print(f"total count of the table for after {yesterday} is {df_count2['count'][0]}")

total count of the table for after 2022-05-29 21:05:20.385567 is 7234


In [18]:
with conn.connect() as con:
    # this didnt work
    df_count2 = conn.execute(f"select count(*) from aws_reddit_comment_data_source where scrape_ts >= '{yesterday}';")


7234