# Test - Check Retention Score Dates
* StelllarAlgo Data Science
* Ryan Kazmerik & Nakisa Rad
* Apr 30, 2022

In [1]:
import awswrangler as wr
import boto3 
import json
import pandas as pd
import pytz

from datetime import datetime, timedelta

pd.options.display.max_columns = 100
pd.options.display.max_rows = 100

In [2]:
! aws sso login --profile Stellaralgo-DataScienceAdmin

Attempting to automatically open the SSO authorization page in your default browser.
If the browser does not open or you wish to use a different device to authorize this request, open the following URL:

https://device.sso.us-east-1.amazonaws.com/

Then enter the code:

RZDH-MJRC
Successully logged into Start URL: https://stellaralgo.awsapps.com/start


In [3]:
session = boto3.setup_default_session(profile_name='Stellaralgo-DataScienceAdmin')
client = boto3.client('redshift')

### Let's create a list of team db's that we'd like to check for fresh scores:

In [4]:
SCHEMA = "dw"
TABLE = "customerretentionscores"
TEAMS = json.load(open('../../teamsconfig.json'))
USER = "admin"

### Now we can loop through each datatbase, connect to it and check the retention scores:

In [5]:
print(f"CHECKING RETENTION SCORES")

results = []
for team in TEAMS:

    conn = wr.data_api.redshift.connect(
        cluster_id = team['cluster'],
        database =  team['dbname'].lower(),
        db_user = USER
    )

    sql = f"""
        SELECT count(*), lkupclientid, MAX(scoredate) as max_score_date
        FROM {team['dbname'].lower()}.{SCHEMA}.{TABLE} 
        WHERE lkupclientid = {team['lkupclientid']}
        GROUP BY lkupclientid
    """

    df = wr.data_api.redshift.read_sql_query(
        sql = sql, 
        con = conn
    )


    df["cluster"] = team["cluster"]
    df["dbname"] = team["dbname"]
    df["lkupclientid"] = team["lkupclientid"]
    df["name"] = team["name"]

    if len(df) > 0:
        results.append(df)
    else:
        print(f" > NO RETENTION SCORES FOUND FOR: {team['name']} - {team['clientcode']} ({team['lkupclientid']})")

df_results = pd.concat(results)

df_results["count"] = pd.to_numeric(df_results["count"])
df_results["max_score_date"] = pd.to_datetime(df_results["max_score_date"])

df_results.shape


CHECKING RETENTION SCORES
 > NO RETENTION SCORES FOUND FOR: Portland Sea Dogs - milbseadogs (100)


(37, 6)

In [7]:
today = datetime.now(pytz.timezone("MST")).strftime("%Y-%m-%d")

days_to_sub = 1
target_day = (datetime.today() - timedelta(days_to_sub)).strftime("%Y-%m-%d")

df_stale = df_results[df_results["max_score_date"] < target_day]
df_stale = df_stale.sort_values(by='name')

print(f"STALE TEAMS: {len(df_stale)}")
print(f"TARGET DAY: {target_day}")
df_stale

STALE TEAMS: 9
TARGET DAY: 2022-05-23


Unnamed: 0,count,lkupclientid,max_score_date,cluster,dbname,name
0,173932,49,2022-05-22,prod-app,stlrmilb,Gwinnett Stripers
0,755299,15,2022-05-22,prod-app,stlrmilb,Las Vegas Aviators
0,689672,47,2022-05-22,prod-app,stlrmilb,Oklahoma City Dodgers
0,139841,43,2022-05-22,prod-app,stlrmilb,Omaha Storm Chasers
0,586293,40,2022-05-22,prod-app,stlrmilb,Reno Aces
0,261966,34,2022-05-22,prod-app,stlrmilb,Round Rock Express
0,349680,12,2022-05-22,prod-app,stlrmilb,Sacramento River Cats
0,136054,59,2022-05-22,prod-app,stlrmilb,Toledo Mud Hens
0,4458,98,2022-05-22,prod-app-elbu,stlrmilbwindsurge,Wichita WindSurge


### Done