# Retention Check Scores Fallback
* StelllarAlgo Data Science
* Ryan Kazmerik & Nakisa Rad
* Mar 15, 2022

In [1]:
import boto3
import getpass

from datetime import datetime
from termcolor import colored, cprint
import pandas as pd
import awswrangler as wr
import pyodbc

### First we have to tell AWS which profile we'd like to login to, this will send us for a browser authentication trip:

In [2]:
! aws sso login --profile Stellaralgo-DataScienceAdmin

Attempting to automatically open the SSO authorization page in your default browser.
If the browser does not open or you wish to use a different device to authorize this request, open the following URL:

https://device.sso.us-east-1.amazonaws.com/

Then enter the code:

GQJT-ZLTQ
Successully logged into Start URL: https://stellaralgo.awsapps.com/start#/


### Now we can create a connection to our datascience table in RedShift:

In [3]:
session = boto3.setup_default_session(profile_name='Stellaralgo-DataScienceAdmin')
client = boto3.client('redshift')

conn1 = wr.data_api.redshift.connect(
    cluster_id="qa-app",
    database="datascience",
    db_user="admin"
)

print(f"CREDENTIALS RETRIEVED SUCCESSFULLY: {conn1}")

CREDENTIALS RETRIEVED SUCCESSFULLY: <awswrangler.data_api.redshift.RedshiftDataApi object at 0x7f88c3dd9640>


### Create a dictionary of all teams that have retention scoring:

In [6]:
sql1 =  f"""
    SELECT DISTINCT
        clientcode,
        lkupclientid,
        dbname,
        leagueName
    FROM
        datascience.ds.teamsconfig
    """

df_teams = wr.data_api.redshift.read_sql_query(
    sql = sql1, 
    con = conn1
)

columns = df_teams.columns.values

teams = []
for index, row in df_teams.iterrows():
    teams.append(dict(zip(columns, row)))

print(f"Total teams: {len(teams)} ")
print(f"Example team {teams[0]} ")

Total teams: 38 
Example team {'clientcode': 'hops', 'lkupclientid': 9, 'dbname': 'stlrmilb', 'leaguename': 'MILB'} 


### Add two more fields to each teams dictionary (insertdate & count):

In [9]:
for team in teams:
        
    conn2 = wr.data_api.redshift.connect(
        cluster_id="qa-app",
        database= team['dbname'],
        db_user="admin"
    )

    sql2 = f"""
            SELECT insertdate
            FROM {team['dbname']}.dw.customerretentionscores
            WHERE lkupclientid = {team['lkupclientid']}
            GROUP BY insertDate
            ORDER BY convert(datetime, insertdate) DESC
            LIMIT 1
        """

    df_customerretention = wr.data_api.redshift.read_sql_query(
        sql = sql2, 
        con = conn2
    )
    
    for row in df_customerretention.iterrows():
        team['insertdate'] = row[0]
                    
print(f"Example team {teams[25]} ")

Example team {'clientcode': 'canucks', 'lkupclientid': 7, 'dbname': 'stlrcanucks', 'leaguename': 'NHL', 'insertdate': 0} 


### Write a test to see if insertdate is greater than todays date:

In [10]:
today = datetime.now().strftime("%m-%d-%Y 00:00:00")

print("SCORE REPORT FOR CUSTOMERRETENTIONSCORES TABLE:")
for team in teams:
    
    print(team["insertdate"])
    
    if team['insertdate'] > today:
        print(colored(f" > {team['count']} Scores Updated: {team['clientcode']} ({team['lkupclientid']})", 'green'))
    else:
        print(colored(f" ! {team['count']} Scores NOT Updated: {team['clientcode']} ({team['lkupclientid']})", 'red'))

SCORE REPORT FOR CUSTOMERRETENTIONSCORES TABLE:
0


TypeError: '>' not supported between instances of 'int' and 'str'