# Test - Check Retention Score Dates
* StelllarAlgo Data Science
* Ryan Kazmerik & Nakisa Rad
* Apr 30, 2022

In [1]:
import awswrangler as wr
import boto3 
import json
import pandas as pd
import pytz

from datetime import datetime, timedelta

pd.options.display.max_columns = 100
pd.options.display.max_rows = 100

In [3]:
! aws sso login --profile Stellaralgo-DataScienceAdmin

Attempting to automatically open the SSO authorization page in your default browser.
If the browser does not open or you wish to use a different device to authorize this request, open the following URL:

https://device.sso.us-east-1.amazonaws.com/

Then enter the code:

WRML-TCLR
Successfully logged into Start URL: https://stellaralgo.awsapps.com/start#/


In [4]:
session = boto3.setup_default_session(profile_name='Stellaralgo-DataScienceAdmin')
client = boto3.client('redshift')

### Let's create a list of team db's that we'd like to check for fresh scores:

In [5]:
SCHEMA = "dw"
TABLE = "customerretentionscores"
TEAMS = json.load(open('../../config/teamsconfig.json'))
USER = "admin"

### Now we can loop through each datatbase, connect to it and check the retention scores:

In [6]:
print(f"CHECKING RETENTION SCORES")

results = []
for team in TEAMS:

    conn = wr.data_api.redshift.connect(
        cluster_id = team['cluster'],
        database =  team['dbname'].lower(),
        db_user = USER
    )

    sql = f"""
        SELECT count(*), lkupclientid, MAX(scoredate) as max_score_date
        FROM {team['dbname'].lower()}.{SCHEMA}.{TABLE} 
        WHERE lkupclientid = {team['lkupclientid']}
        GROUP BY lkupclientid
    """

    df = wr.data_api.redshift.read_sql_query(
        sql = sql, 
        con = conn
    )


    df["cluster"] = team["cluster"]
    df["dbname"] = team["dbname"]
    df["lkupclientid"] = team["lkupclientid"]
    df["name"] = team["name"]

    if len(df) > 0:
        results.append(df)
    else:
        print(f" > NO RETENTION SCORES FOUND FOR: {team['name']} - {team['clientcode']} ({team['lkupclientid']})")

df_results = pd.concat(results)

df_results["count"] = pd.to_numeric(df_results["count"])
df_results["max_score_date"] = pd.to_datetime(df_results["max_score_date"])

df_results.shape


CHECKING RETENTION SCORES


ValidationException: An error occurred (ValidationException) when calling the ExecuteStatement operation: Redshift endpoint doesn't exist in this region.

In [7]:
today = datetime.now(pytz.timezone("MST")).strftime("%Y-%m-%d")

days_to_sub = 0
target_day = (datetime.today() - timedelta(days_to_sub)).strftime("%Y-%m-%d")

df_stale = df_results[df_results["max_score_date"] < target_day]
df_stale = df_stale.sort_values(by='name')

print(f"STALE TEAMS: {len(df_stale)}")
print(f"TARGET DAY: {target_day}")
df_stale

STALE TEAMS: 38
TARGET DAY: 2022-07-06


Unnamed: 0,count,lkupclientid,max_score_date,cluster,dbname,name
0,465528,55,2022-07-05,prod-app,stlrcoyotes,Arizona Coyotes
0,918886,36,2022-07-05,prod-app,stlrflames,Calgary Flames
0,337460,37,2022-07-05,prod-app,stlrwhl,Calgary Hitmen
0,726344,8,2022-07-05,prod-app,stlrnll,Calgary Roughnecks
0,1095495,35,2022-07-05,prod-app,stlrcfl,Calgary Stampeders
0,135538,28,2022-07-05,prod-app,stlrmilb,Columbia Fireflies
0,1560059,11,2022-07-05,prod-app,stlrmilb,Durham Bulls
0,346346,30,2022-07-05,prod-app,stlrmilb,El Paso Chihauhaus
0,34912,99,2022-07-05,prod-app-elbu,stlrusllocomotive,El Paso Locomotive
0,30316,96,2022-07-05,prod-app-elbu,stlrechleverblades,Florida Everblades


### Done