# Retention Score Test - Redshift
* StelllarAlgo Data Science
* Ryan Kazmerik & Nakisa Rad
* Mar 23, 2022

In [1]:
import awswrangler as wr
import boto3
from datetime import datetime
from termcolor import colored

! aws sso login --profile Stellaralgo-DataScienceAdmin

Attempting to automatically open the SSO authorization page in your default browser.
If the browser does not open or you wish to use a different device to authorize this request, open the following URL:

https://device.sso.us-east-1.amazonaws.com/

Then enter the code:

QTGM-XWHJ
Successully logged into Start URL: https://stellaralgo.awsapps.com/start


In [2]:
CLUSTER = "qa-app"
USER = "admin"

### Connect to Redshift:

In [3]:
try:
    session = boto3.setup_default_session(profile_name='Stellaralgo-DataScienceAdmin')
    client = boto3.client('redshift')

    conn = wr.data_api.redshift.connect(
        cluster_id = CLUSTER,
        database = "datascience",
        db_user = USER
    )
    
    print('CREDENTIALS RETRIEVED SUCCESSFULLY')
    
except Exception as err:
    print('FAILED TO RETRIEVE CREDENTIALS!', err)

CREDENTIALS RETRIEVED SUCCESSFULLY


In [4]:
sqlcode =  f"""
    SELECT DISTINCT
        clientcode,
        lkupclientid,
        dbname,
        leagueName
    FROM 
        datascience.ds.teamsconfig
    """
    
df = wr.data_api.redshift.read_sql_query(
            sql = sqlcode, 
            con = conn
        )

columns = df.columns.values

teams = []
for index, row in df.iterrows():
    teams.append(dict(zip(columns, row)))


print(f"Total teams: {len(teams)} ")
print(f"Example team {teams[0]} ")

Total teams: 39 
Example team {'clientcode': 'hops', 'lkupclientid': 9, 'dbname': 'stlrmilb', 'leaguename': 'MILB'} 


### Create a dictionary of all teams that have retention scoring:

In [5]:
for team in teams:
    
    conn = wr.data_api.redshift.connect(
        cluster_id = CLUSTER,
        database= team['dbname'],
        db_user = USER
    )

    sql = f"""
            SELECT TOP 1 insertdate, count(*) as count
            FROM {team['dbname']}.dw.customerretentionscores  
            WHERE [lkupclientid] = {team['lkupclientid']}
            GROUP BY insertdate 
            ORDER BY convert(datetime, insertdate) DESC
        """

    df_customerretention = wr.data_api.redshift.read_sql_query(
        sql = sql, 
        con = conn
    )
    
    for row in df_customerretention.iterrows():
        team['insertdate'] = row[1]['insertdate']
        team['count'] = row[1]['count']
        
print(f"Example team {teams[0]} ")

Example team {'clientcode': 'hops', 'lkupclientid': 9, 'dbname': 'stlrmilb', 'leaguename': 'MILB', 'insertdate': True, 'count': 931} 


### Write a test to see if insertdate is greater than todays date:

In [6]:
today = datetime.strptime(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), '%Y-%m-%d %H:%M:%S')

print("SCORE REPORT FOR DATASCIENCE customerScores TABLE:")
for team in teams:

    try:
        print(today)
        print(team['insertdate'])
        print()

    #try:
    #    insert_date = datetime.strptime(teams[0]['insertdate'], '%Y-%m-%d %H:%M:%S')

    #    if insert_date > today:
    #        print(colored(f" > {team['count']} SCORES UPDATED: {team['clientcode']} ({team['lkupclientid']})", 'green'))
    #    else:
    #        print(colored(f" ! {team['count']} SCORES NOTE UPDATED: {team['clientcode']} ({team['lkupclientid']})", 'yellow'))
    
    except Exception as err:

    #    print(colored(f" ! NO RETENTION SCORES EXIST: {team['clientcode']} ({team['lkupclientid']})", 'red'))
        print(f"ERROR: {err}")
        print()


SCORE REPORT FOR DATASCIENCE customerScores TABLE:
2022-04-05 16:04:20
True

2022-04-05 16:04:20
True

2022-04-05 16:04:20
ERROR: 'insertdate'

2022-04-05 16:04:20
True

2022-04-05 16:04:20
True

2022-04-05 16:04:20
True

2022-04-05 16:04:20
True

2022-04-05 16:04:20
True

2022-04-05 16:04:20
True

2022-04-05 16:04:20
ERROR: 'insertdate'

2022-04-05 16:04:20
True

2022-04-05 16:04:20
True

2022-04-05 16:04:20
True

2022-04-05 16:04:20
True

2022-04-05 16:04:20
True

2022-04-05 16:04:20
ERROR: 'insertdate'

2022-04-05 16:04:20
True

2022-04-05 16:04:20
True

2022-04-05 16:04:20
True

2022-04-05 16:04:20
True

2022-04-05 16:04:20
True

2022-04-05 16:04:20
2022-04-04 09:26:58

2022-04-05 16:04:20
2022-04-05 09:14:45

2022-04-05 16:04:20
2022-04-05 09:15:39

2022-04-05 16:04:20
2022-04-05 09:28:35

2022-04-05 16:04:20
2022-04-05 09:34:20

2022-04-05 16:04:20
2022-04-05 09:38:14

2022-04-05 16:04:20
2022-04-05 09:48:49

2022-04-05 16:04:20
2022-03-23 22:09:04

2022-04-05 16:04:20
ERROR: 'in