# Test - Check Retention Score Dates
* StelllarAlgo Data Science
* Ryan Kazmerik & Nakisa Rad
* Apr 30, 2022

In [8]:
import awswrangler as wr
import boto3 
import pandas as pd
import pytz

from datetime import datetime, timedelta

pd.options.display.max_columns = 100
pd.options.display.max_rows = 100

In [2]:
! aws sso login --profile Stellaralgo-DataScienceAdmin

Attempting to automatically open the SSO authorization page in your default browser.
If the browser does not open or you wish to use a different device to authorize this request, open the following URL:

https://device.sso.us-east-1.amazonaws.com/

Then enter the code:

PZRM-RLRX
Successully logged into Start URL: https://stellaralgo.awsapps.com/start


In [3]:
session = boto3.setup_default_session(profile_name='Stellaralgo-DataScienceAdmin')
client = boto3.client('redshift')

### Let's create a list of team db's that we'd like to check for fresh scores:

In [4]:
SCHEMA = "dw"
TABLE = "customerretentionscores"
USER = "admin"

TEAMS_QA = [
    {"cluster": "qa-app","dbname": "stlrahl"},
    {"cluster": "qa-app","dbname": "stlrcfl"},
    {"cluster": "qa-app","dbname": "stlrmilb"},
    {"cluster": "qa-app-elbu","dbname": "stlrmilbseadogs"},
    {"cluster": "qa-app-elbu","dbname": "stlrmilbwindsurge"},
    {"cluster": "qa-app","dbname": "stlrrays"},
    {"cluster": "qa-app","dbname": "stlrmls"},
    {"cluster": "qa-app","dbname": "stlrtrailblazers"},
    {"cluster": "qa-app","dbname": "stlrcanucks"},
    {"cluster": "qa-app","dbname": "stlrcoyotes"},
    {"cluster": "qa-app","dbname": "stlrflames"},
    {"cluster": "qa-app","dbname": "stlrnhlpanthers"},
    {"cluster": "qa-app","dbname": "stlrvgk"},
    {"cluster": "qa-app","dbname": "stlrnll"},
    {"cluster": "qa-app-elbu","dbname": "stlrechleverblades"},
    {"cluster": "qa-app","dbname": "stlrwhl"}
]

TEAMS_PROD = [
    {"cluster": "prod-app","dbname": "stlrahl"},
    {"cluster": "prod-app","dbname": "stlrcfl"},
    {"cluster": "prod-app","dbname": "stlrmilb"},
    {"cluster": "prod-app-elbu","dbname": "stlrmilbseadogs"},
    {"cluster": "prod-app-elbu","dbname": "stlrmilbwindsurge"},
    {"cluster": "prod-app","dbname": "stlrrays"},
    {"cluster": "prod-app","dbname": "stlrmls"},
    {"cluster": "prod-app","dbname": "stlrtrailblazers"},
    {"cluster": "prod-app","dbname": "stlrcanucks"},
    {"cluster": "prod-app","dbname": "stlrcoyotes"},
    {"cluster": "prod-app","dbname": "stlrflames"},
    {"cluster": "prod-app","dbname": "stlrnhlpanthers"},
    {"cluster": "prod-app","dbname": "stlrvgk"},
    {"cluster": "prod-app","dbname": "stlrnll"},
    {"cluster": "prod-app-elbu","dbname": "stlrechleverblades"},
    {"cluster": "prod-app","dbname": "stlrwhl"}
]

### Now we can loop through each datatbase, connect to it and check the retention scores:

In [5]:
print(f"CHECKING RETENTION SCORES .", end='')

TEAMS = TEAMS_QA + TEAMS_PROD

results = []
for team in TEAMS:

    conn = wr.data_api.redshift.connect(
        cluster_id = team['cluster'],
        database =  team['dbname'].lower(),
        db_user = USER
    )
    
    print(f".", end='')

    sql = f"""
        SELECT count(*), lkupclientid, MAX(scoredate) as max_score_date
        FROM {team['dbname'].lower()}.{SCHEMA}.{TABLE} 
        GROUP BY lkupclientid
    """

    df = wr.data_api.redshift.read_sql_query(
        sql = sql, 
        con = conn
    )

    df["cluster"] = team["cluster"]
    df["dbname"] = team['dbname']

    results.append(df)

df_results = pd.concat(results)

df_results["count"] = pd.to_numeric(df_results["count"])
df_results["max_score_date"] = pd.to_datetime(df_results["max_score_date"])

df_results.shape


CHECKING RETENTION SCORES .................................

(75, 5)

In [9]:
today = datetime.now(pytz.timezone("MST")).strftime("%Y-%m-%d")

days_to_sub = -1
target_day = (datetime.today() - timedelta(days_to_sub)).strftime("%Y-%m-%d")

df_stale = df_results[df_results["max_score_date"] < target_day]

print(f"STALE TEAMS: {len(df_stale)}")
print(f"TARGET DAY: {target_day}")
df_stale

STALE TEAMS: 75
TARGET DAY: 2022-05-18


Unnamed: 0,count,lkupclientid,max_score_date,cluster,dbname
0,176554,56,2022-05-17,qa-app,stlrahl
1,31607,16,2022-05-16,qa-app,stlrahl
2,95485,52,2022-05-17,qa-app,stlrahl
0,2694923,35,2022-05-17,qa-app,stlrcfl
0,1243156,11,2022-05-17,qa-app,stlrmilb
1,81882,48,2022-05-17,qa-app,stlrmilb
2,4624,51,2022-05-17,qa-app,stlrmilb
3,28564,20,2022-05-17,qa-app,stlrmilb
4,32805,34,2022-05-17,qa-app,stlrmilb
5,32789,49,2022-05-17,qa-app,stlrmilb


### Done