# DWH ACCURACY

In [None]:
import pandas as pd
import psycopg2 as pg
from sqlalchemy import create_engine
from sqlalchemy.exc import SQLAlchemyError
from datetime import datetime, timedelta
import sys
sys.path.append('../../')
from env import pg_conn, conn

PG and SNOWFAKE QUERY FUCNTIONS

In [None]:
def pg_query(database, query):
    engine = create_engine(pg_conn+str(database))
    conn = engine.connect()
    transaction = conn.begin()
    try:
        # perform some database operations here
        df = pd.read_sql(query, conn)
        transaction.commit()
        with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also    
            print(df)
        return df
    except SQLAlchemyError as e:
        transaction.rollback()
        print(f"Error occurred: {e}")
    finally:
        conn.close()

def snow_query(query):
    try:
        cur = conn.cursor()
        cur.execute(query)

        df = pd.DataFrame(cur.fetchall(), columns=[col[0] for col in cur.description])
        return df
    finally:
        cur.close()

### FACT LC SUCCESS EVENTS COMPARED TO UBIQUITY SCHEME ACCOUNT ENTRY

In [None]:
ubiquity_query = """select distinct scheme_account_id, user_id from ubiquity_schemeaccountentry where link_status = 1"""
ubiquity_table = pg_query('hermes', ubiquity_query)

In [None]:
fact_lc_query = """with lc as (select * from PROD.BINK_SECURE.FACT_LOYALTY_CARD)

select distinct loyalty_card_id, user_id from lc where is_most_recent = TRUE and event_Type = 'SUCCESS';"""
fact_lc = snow_query(fact_lc_query)
fact_lc.head()

In [None]:
fact_lc['unique_key'] = fact_lc['LOYALTY_CARD_ID'].astype(str) + fact_lc['USER_ID'].astype(str)
ubiquity_table['unique_key'] = ubiquity_table['scheme_account_id'].astype(str) + ubiquity_table['user_id'].astype(str)

matched1 = fact_lc[fact_lc['unique_key'].isin(ubiquity_table['unique_key'])]
result1 = 1 - (len(fact_lc) - len(matched1))/len(fact_lc)
print(result1)

matched2 = ubiquity_table[ubiquity_table['unique_key'].isin(fact_lc['unique_key'])]
result2 = 1 - (len(ubiquity_table) - len(matched2))/len(ubiquity_table)
print(result2)

print(f"Average match rate for fact_lc to ubiquity_schemeaccountentry is {(result1 + result2)/2}")


In [None]:

#DELETE as this is for another task
ubiquity_query = """select distinct card_number
    from ubiquity_schemeaccountentry
    left join scheme_schemeaccount on scheme_schemeaccount.id = ubiquity_schemeaccountentry.scheme_account_id
    left join scheme_scheme on scheme_scheme.id = scheme_schemeaccount.scheme_id
    where scheme_scheme.company = 'The Works' and link_status = 1;
    """
ubiquity_table = pg_query('hermes', ubiquity_query)

In [None]:
ubiquity_table.to_csv('The_Works_SAIDS_20230326.csv', index=False)

In [None]:
ubiquity_query = """select count(distinct payment_card_account_id)
    from ubiquity_paymentcardschemeentry where active_link = True;
    """
ubiquity_table = pg_query('hermes', ubiquity_query)

In [None]:
ubiquity_table = pg_query('hermes', "SELECT * FROM pg_catalog.pg_tables;")
