# Test - Check Retention Score Dates
* StelllarAlgo Data Science
* Ryan Kazmerik & Nakisa Rad
* Apr 30, 2022

In [23]:
import boto3 
import json
import pandas as pd
import psycopg2
import pytz

from datetime import datetime, timedelta
from shared_utilities import helpers

pd.options.display.max_columns = 100
pd.options.display.max_rows = 100

In [8]:
! aws sso login --profile Stellaralgo-DataScienceAdmin

Attempting to automatically open the SSO authorization page in your default browser.
If the browser does not open or you wish to use a different device to authorize this request, open the following URL:

https://device.sso.us-east-1.amazonaws.com/

Then enter the code:

GXBT-PZFT
Successfully logged into Start URL: https://stellaralgo.awsapps.com/start#/


In [9]:
session = boto3.setup_default_session(profile_name='Stellaralgo-DataScienceAdmin')

### Let's create a list of team db's that we'd like to check for fresh scores:

In [20]:
teams = json.load(open('../config/teamsconfig.json'))

results = []
for team in teams:

    cnxn = helpers.get_redshift_connection(team['cluster'], team['dbname'])
    cursor = cnxn.cursor()

    sql = f"""
        SELECT count(*), lkupclientid, MAX(scoredate) as max_score_date
        FROM {team['dbname'].lower()}.dw.customerretentionscores 
        WHERE lkupclientid = {team['lkupclientid']}
        GROUP BY lkupclientid
    """

    cursor.execute(sql)

    df = pd.DataFrame(cursor.fetchall(), columns=[desc[0] for desc in cursor.description])
    df["cluster"] = team["cluster"]
    df["dbname"] = team["dbname"]
    df["lkupclientid"] = team["lkupclientid"]
    df["name"] = team["name"]
    
    if len(df) > 0:
        results.append(df)
    else:
        print(f" > NO RETENTION SCORES FOUND FOR: {team['name']} - {team['clientcode']} ({team['lkupclientid']})")

df_results = pd.concat(results)

df_results["count"] = pd.to_numeric(df_results["count"])
df_results["max_score_date"] = pd.to_datetime(df_results["max_score_date"])

df_results.shape

prod-app stlrahl
prod-app stlrahl
prod-app stlrcfl
prod-app-elbu stlrechleverblades
prod-app stlrahl
prod-app stlrmilb
prod-app stlrmilb
prod-app stlrmilb
prod-app stlrmilb
prod-app stlrmilb
prod-app stlrmilb
prod-app stlrmilb
prod-app stlrmilb
prod-app stlrmilb
prod-app stlrmilb
prod-app stlrmilb
prod-app stlrmilb
prod-app stlrmilb
prod-app stlrmilb
prod-app stlrmilb
prod-app stlrmilb
prod-app stlrmilb
prod-app stlrmilb
prod-app-elbu stlrmilbseadogs
prod-app-elbu stlrmilbwindsurge
prod-app stlrrays
prod-app stlrmls
prod-app stlrmls
prod-app stlrmls
prod-app stlrtrailblazers
prod-app stlrcanucks
prod-app stlrcoyotes
prod-app stlrflames
prod-app stlrnhlpanthers
prod-app stlrvgk
prod-app stlrnll
prod-app-elbu stlrusllocomotive
prod-app stlrwhl


(38, 6)

### Now we can loop through each datatbase, connect to it and check the retention scores:

In [22]:
today = datetime.now(pytz.timezone("MST")).strftime("%Y-%m-%d")

days_to_sub = 1
target_day = (datetime.today() - timedelta(days_to_sub)).strftime("%Y-%m-%d")

df_stale = df_results[df_results["max_score_date"] < target_day]
df_stale = df_stale.sort_values(by='name')

print(f"STALE TEAMS: {len(df_stale)}")
print(f"TARGET DAY: {target_day}")
df_stale

STALE TEAMS: 0
TARGET DAY: 2022-08-01


Unnamed: 0,count,lkupclientid,max_score_date,cluster,dbname,name


### Done