# Retention Score Test - Redshift
* StelllarAlgo Data Science
* Ryan Kazmerik & Nakisa Rad
* Mar 23, 2022

In [1]:
import boto3
import getpass

from datetime import datetime
from termcolor import colored, cprint
import pandas as pd
import awswrangler as wr
import pyodbc

In [2]:
! aws sso login --profile Stellaralgo-DataScienceAdmin

Attempting to automatically open the SSO authorization page in your default browser.
If the browser does not open or you wish to use a different device to authorize this request, open the following URL:

https://device.sso.us-east-1.amazonaws.com/

Then enter the code:

WFKW-WVWC
Successully logged into Start URL: https://stellaralgo.awsapps.com/start#/


### First we have to tell AWS which profile we'd like to login to, this will send us for a browser authentication trip:

### Connect to Redshift:

In [3]:
try:
    session = boto3.setup_default_session(profile_name='Stellaralgo-DataScienceAdmin')
    client = boto3.client('redshift')

    conn = wr.data_api.redshift.connect(
        cluster_id="qa-app",
        database="datascience",
        db_user="admin"
    )
    
    print('CREDENTIALS RETRIEVED SUCCESSFULLY')
    
except Exception as err:
    print('FAILED TO RETRIEVE CREDENTIALS!', err)


CREDENTIALS RETRIEVED SUCCESSFULLY


### Create a dictionary of all teams that have retention scoring:

In [4]:
sqlcode =  f"""
    SELECT DISTINCT
        clientcode,
        lkupclientid,
        dbname,
        leagueName
    FROM 
        datascience.ds.teamsconfig
    """
df = wr.data_api.redshift.read_sql_query(
            sql = sqlcode, 
            con = conn
        )

columns = df.columns.values

teams = []
for index, row in df.iterrows():
    teams.append(dict(zip(columns, row)))


print(f"Total teams: {len(teams)} ")
print(f"Example team {teams[0]} ")

Total teams: 38 
Example team {'clientcode': 'hops', 'lkupclientid': 9, 'dbname': 'stlrmilb', 'leaguename': 'MILB'} 


In [7]:
for team in teams:
    
    
    conn = wr.data_api.redshift.connect(
    cluster_id="qa-app",
    database= team['dbname'],
    db_user="admin"
    )
        


    sql2 = f"""
            SELECT TOP 1 insertdate  insertdate, count(*) as count
            FROM {team['dbname']}.dw.customerretentionscores  
            WHERE [lkupclientid] = {team['lkupclientid']}
            AND insertdate IS NOT NULL
            GROUP BY insertdate 
            ORDER BY convert(datetime, insertdate) DESC
        """

    df_customerretention = wr.data_api.redshift.read_sql_query(
        sql = sql2, 
        con = conn
    )
    results = []
    for row in df_customerretention.iterrows():
        #print(row[0])
       # print(row[1]['insertdate'])
        #print(row[1]['count'])
       # break
        team['insertdate'] = row[1]['insertdate']
        team['count'] = row[1]['count']
        
print(f"Example team {teams} ")

Example team [{'clientcode': 'hops', 'lkupclientid': 9, 'dbname': 'stlrmilb', 'leaguename': 'MILB', 'insertdate': '2022-04-01 10:23:16', 'count': 931}, {'clientcode': 'bulls', 'lkupclientid': 11, 'dbname': 'stlrmilb', 'leaguename': 'MILB', 'insertdate': '2022-04-01 10:23:16', 'count': 3581}, {'clientcode': 'rivercats', 'lkupclientid': 12, 'dbname': 'stlrmilb', 'leaguename': 'MILB'}, {'clientcode': 'vegas51s', 'lkupclientid': 15, 'dbname': 'stlrmilb', 'leaguename': 'MILB', 'insertdate': '2022-04-01 10:23:16', 'count': 1755}, {'clientcode': 'rainiers', 'lkupclientid': 17, 'dbname': 'stlrmilb', 'leaguename': 'MILB', 'insertdate': '2022-04-01 10:23:16', 'count': 111}, {'clientcode': '66ers', 'lkupclientid': 19, 'dbname': 'stlrmilb', 'leaguename': 'MILB', 'insertdate': '2022-04-01 10:23:16', 'count': 247}, {'clientcode': 'loons', 'lkupclientid': 20, 'dbname': 'stlrmilb', 'leaguename': 'MILB', 'insertdate': '2022-04-01 10:23:16', 'count': 603}, {'clientcode': 'rattlers', 'lkupclientid': 24, 

### Write a test to see if insertdate is greater than todays date:

In [6]:
today = datetime.now().strftime("%m-%d-%Y 00:00:00")

print("SCORE REPORT FOR DATASCIENCE customerScores TABLE:")
for team in teams:
    
    if team['insertdate'] > today:
        print(colored(f" > {team['count']} Scores Updated: {team['clientcode']} ({team['lkupclientid']})", 'green'))
    else:
        print(colored(f" ! {team['count']} Scores NOT Updated: {team['clientcode']} ({team['lkupclientid']})", 'red'))

SCORE REPORT FOR DATASCIENCE customerScores TABLE:
[32m > 931 Scores Updated: hops (9)[0m
[32m > 3581 Scores Updated: bulls (11)[0m


KeyError: 'insertdate'