# Retention Score Test - Redshift
* StelllarAlgo Data Science
* Ryan Kazmerik & Nakisa Rad
* Mar 23, 2022

In [92]:
import awswrangler as wr
import boto3
import pandas as pd
from datetime import datetime
from termcolor import colored

! aws sso login --profile Stellaralgo-DataScienceAdmin

Attempting to automatically open the SSO authorization page in your default browser.
If the browser does not open or you wish to use a different device to authorize this request, open the following URL:

https://device.sso.us-east-1.amazonaws.com/

Then enter the code:

VXQV-DSPJ
Successully logged into Start URL: https://stellaralgo.awsapps.com/start


In [110]:
CLUSTER = "qa-app"
USER = "admin"

try:
    session = boto3.setup_default_session(profile_name='Stellaralgo-DataScienceAdmin')
    client = boto3.client('redshift')

    conn = wr.data_api.redshift.connect(
        cluster_id = CLUSTER,
        database = "datascience",
        db_user = USER
    )
    
    print(f"CREDENTIALS RETRIEVED SUCCESSFULLY: {CLUSTER} : {USER}")
    
except Exception as err:
    print(f"FAILED TO RETRIEVE CREDENTIALS! {err} ")

CREDENTIALS RETRIEVED SUCCESSFULLY: qa-app : admin


### Create a dictionary of all teams that have retention scoring:

In [111]:
sqlcode =  f"""
    SELECT DISTINCT
        clientcode,
        lkupclientid,
        dbname,
        leagueName
    FROM 
        datascience.ds.teamsconfig
    """
    
df = wr.data_api.redshift.read_sql_query(
            sql = sqlcode, 
            con = conn
        )

columns = df.columns.values

teams = []
for index, row in df.iterrows():
    teams.append(dict(zip(columns, row)))


print(f"Total teams: {len(teams)} ")
print(f"Example team {teams[0]} ")

Total teams: 37 
Example team {'clientcode': 'hops', 'lkupclientid': 9, 'dbname': 'stlrmilb', 'leaguename': 'MILB'} 


### Query the customerretentionscores table to see when the last time the retention scores were updated and add that property to each team, along with the count of records:

In [112]:
for team in teams:
    
    conn = wr.data_api.redshift.connect(
        cluster_id = CLUSTER,
        database= team['dbname'],
        db_user = USER
    )

    sql = f"""
            SELECT TOP 1 insertdate, count(*)
            FROM {team['dbname']}.dw.customerretentionscores  
            WHERE lkupclientid = {team['lkupclientid']}
            GROUP BY insertdate 
            ORDER BY insertdate DESC
        """

    df_customerretention = wr.data_api.redshift.read_sql_query(
        sql = sql, 
        con = conn
    )
    
    try:
        team['insertdate'] = df_customerretention.iloc[0]['insertdate']
        team['count'] = df_customerretention.iloc[0]['count']

    except Exception as err:
        team['insertdate'] = None

            
print(f"Example team {teams[0]} ")

Example team {'clientcode': 'hops', 'lkupclientid': 9, 'dbname': 'stlrmilb', 'leaguename': 'MILB', 'insertdate': True, 'count': 1773} 


### Write a test to see if insertdate is greater than todays date:

In [117]:
today = datetime.strptime(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), '%Y-%m-%d %H:%M:%S')

print(f"RETENTION SCORES REPORT FOR: {CLUSTER}")

fresh_teams = []
stale_teams = []
null_teams = []

for team in teams:

    print(team['insertdate'])
    
"""         insert_date = datetime.strptime(team['insertdate'], '%Y-%m-%d %H:%M:%S')
        days_old = (int((insert_date - today).days)+1)*-1

        if days_old == 0: fresh_teams.append(team)
        else: stale_teams.append(team)
    
print(colored(f"NO RETENTION SCORES EXIST:", 'red'))
df_null_teams = pd.DataFrame.from_dict(null_teams)
print(colored(df_null_teams, 'red'))

print(colored(f"RETENTION SCORES STALE:", 'yellow'))
df_stale_teams = pd.DataFrame.from_dict(stale_teams)
print(colored(df_stale_teams, 'yellow'))

print(colored(f"RETENTION SCORES FRESH:", 'green'))
df_fresh_teams = pd.DataFrame.from_dict(fresh_teams)
print(colored(df_fresh_teams, 'green'))
 """


RETENTION SCORES REPORT FOR: qa-app
True
True
None
True
True
True
True
True
None
True
True
True
True
True
None
True
True
True
True
True
2022-04-11 08:28:57
2022-04-11 08:15:21
2022-04-11 08:27:48
2022-04-11 08:37:16
2022-04-11 08:34:56
2022-04-11 08:43:53
2022-04-11 08:42:09
2022-03-23 22:09:04
2022-04-11 08:35:39
2022-04-11 08:20:50
2022-04-11 08:18:24
2022-04-11 08:36:14
2022-04-11 08:24:24
2022-04-11 08:24:24
None
2022-04-11 09:14:56
None


'         insert_date = datetime.strptime(team[\'insertdate\'], \'%Y-%m-%d %H:%M:%S\')\n        days_old = (int((insert_date - today).days)+1)*-1\n\n        if days_old == 0: fresh_teams.append(team)\n        else: stale_teams.append(team)\n    \nprint(colored(f"NO RETENTION SCORES EXIST:", \'red\'))\ndf_null_teams = pd.DataFrame.from_dict(null_teams)\nprint(colored(df_null_teams, \'red\'))\n\nprint(colored(f"RETENTION SCORES STALE:", \'yellow\'))\ndf_stale_teams = pd.DataFrame.from_dict(stale_teams)\nprint(colored(df_stale_teams, \'yellow\'))\n\nprint(colored(f"RETENTION SCORES FRESH:", \'green\'))\ndf_fresh_teams = pd.DataFrame.from_dict(fresh_teams)\nprint(colored(df_fresh_teams, \'green\'))\n '