In [3]:
import pyodbc
import os
import pandas as pd
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv("C:/Users/BoydClaire/.Renviron"))

True

In [43]:
def get_database_tables(database_name, connection_args, schema=None):

    connection_string = "".join(connection_args)

    # create connection object
    cnxn = pyodbc.connect(connection_string)

    cursor = cnxn.cursor()

    if schema:
        tables_raw = cursor.tables(schema=schema)
    else:
        tables_raw = cursor.tables()

    tables = pd.DataFrame(tables_raw)
    tables.columns = ['list']
    cleaned_tables = pd.DataFrame(tables['list'].apply(list).to_list(), columns = ['DATABASE','SCHEMA','NAME','TYPE','SELF_REFERENCING_COL_NAME'])
    cleaned_tables['db'] = database_name

    return cleaned_tables

In [44]:
# get sql tables

sql_database_list = ["production", "test", "sandbox"]
sql_tables = []

for database in sql_database_list:

    # build connection string
    connection_args = ["Driver={SQL Server};",
                        f"SERVER={os.environ[f'{database}_server']};",
                        f"DATABASE={os.environ[f'{database}_database']};",
                        f"UID={os.environ[f'{database}_username']};",
                        f"PWD={os.environ[f'{database}_password']};",
                        f"Trusted_Connection=no;",
                        "Port=1433"
    ]

    table = get_database_tables(database, connection_args)
    sql_tables.append(table)

In [58]:
# get oracle tables

oracle_database_list = ["fdw", "ias"]
oracle_tables = []

for database in oracle_database_list:

    # build connection string
    database_schema = os.getenv(f"{database}_schema")
    connection_args = ["Driver={Oracle in OraClient19Home1};",
                        f"DBQ={os.environ[f'{database}_path']};"
                        f"DATABASE={os.environ[f'{database}_schema']};"
                        f"UID={os.environ[f'{database}_username']};",
                        f"PWD={os.environ[f'{database}_password']};",
                        f"Trusted_Connection=no;",
                        "Port=1433"
    ]

    table = get_database_tables(database, connection_args)
    table = table.loc[table.loc[:,"SCHEMA"] == database_schema.upper(), :]

    oracle_tables.append(table)


In [62]:
sql_tables[1]

Unnamed: 0,DATABASE,SCHEMA,NAME,TYPE,SELF_REFERENCING_COL_NAME,db
0,V8_NEWYORK_NY_TEST_LIVE_NOV2024,COMMON,ACCOUNT_AUTO_GEN_SEQUENCE,TABLE,,test
1,V8_NEWYORK_NY_TEST_LIVE_NOV2024,COMMON,AKA_NAME,TABLE,,test
2,V8_NEWYORK_NY_TEST_LIVE_NOV2024,COMMON,AKA_NAME_OWNERS,TABLE,,test
3,V8_NEWYORK_NY_TEST_LIVE_NOV2024,COMMON,AOM_TO_OWNER_MAP,TABLE,,test
4,V8_NEWYORK_NY_TEST_LIVE_NOV2024,COMMON,APPLICATIONCONFIG,TABLE,,test
...,...,...,...,...,...,...
4545,V8_NEWYORK_NY_TEST_LIVE_NOV2024,sys,xml_schema_model_groups,VIEW,,test
4546,V8_NEWYORK_NY_TEST_LIVE_NOV2024,sys,xml_schema_namespaces,VIEW,,test
4547,V8_NEWYORK_NY_TEST_LIVE_NOV2024,sys,xml_schema_types,VIEW,,test
4548,V8_NEWYORK_NY_TEST_LIVE_NOV2024,sys,xml_schema_wildcard_namespaces,VIEW,,test


In [60]:
flat = [table for tables in [sql_tables,oracle_tables] for table in tables]
all_tables = pd.concat(flat, axis=0)

In [61]:
all_tables.groupby("db").count()

Unnamed: 0_level_0,DATABASE,SCHEMA,NAME,TYPE,SELF_REFERENCING_COL_NAME
db,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
fdw,0,228,228,228,0
production,4684,4684,4684,4684,0
pts,0,6462,6462,6462,0
sandbox,4265,4265,4265,4265,0
test,4550,4550,4550,4550,0


things to consider:
* repeats of the same data
* how often records get updated
* mirrored tables in different databases
* 


how to pull in cols for all databases based on the tables above
*

In [67]:
database= "test"

# build connection string
connection_args = ["Driver={SQL Server};",
                    f"SERVER={os.environ[f'{database}_server']};",
                    f"DATABASE={os.environ[f'{database}_database']};",
                    f"UID={os.environ[f'{database}_username']};",
                    f"PWD={os.environ[f'{database}_password']};",
                    f"Trusted_Connection=no;",
                    "Port=1433"
]

connection_string = "".join(connection_args)

# create connection object
cnxn = pyodbc.connect(connection_string)

cursor = cnxn.cursor()

def get_columns_from_table(cursor, tablename):
    
    cursor.execute(f"""
        SELECT COLUMN_NAME, DATA_TYPE
        FROM INFORMATION_SCHEMA.COLUMNS
        WHERE TABLE_NAME = '{tablename}'
    """)

    # Fetch all results
    columns_info = pd.DataFrame(cursor.fetchall())

    return columns_info


get_columns_from_table(cursor, "AKA_NAME_OWNERS")

Unnamed: 0,0
0,"[ANO_MNC, numeric]"
1,"[ANO_AKA_ID, bigint]"
2,"[ANO_OWNER_ID, numeric]"
3,"[ANO_CREATE_DATE, datetime]"
4,"[ANO_LAST_UPDATE, datetime]"
