In [2]:
import os
import sys

# Get the current working directory of the notebook
current_dir = os.getcwd()

# Check if the last part of the path is 'notebooks'
if os.path.basename(current_dir) == 'notebooks':
    # If it is, then change the directory to the parent folder (the project root)
    project_root = os.path.dirname(current_dir)
    os.chdir(project_root)

# Verify the new working directory
print(f"Current Working Directory has been set to: {os.getcwd()}")

# Add the project root to the Python path
# This makes sure Python can find your modules
if os.getcwd() not in sys.path:
    sys.path.append(os.getcwd())

Current Working Directory has been set to: /Users/rchhetri/C360 Projects/C360_metric_importance


In [3]:
# Magic commands to automatically reload modules
%load_ext autoreload
%autoreload 2

# Import your custom modules just like any other library!
from config import SnowflakeConfig
from snowflake_connector import SnowflakeConnector

# Import other necessary libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
# Initialize your configuration and connector
sf_config = SnowflakeConfig()
connector = SnowflakeConnector(config=sf_config)

# Write your SQL query
query = """
  with filtered_data as 
(
select *
from dev.gtm.customer_360_v5_hscores
where 
 LATEST_EFFECTIVE_ACR between 100000 and 1000000
 and SALES_HIER_GEO in ('AMER')
),
min_accnt_date as 
(
select SUBSCRIPTION_ACCOUNT_ID, SFDC_ACCOUNT_NAME, min(subscription_term_start_date) as min_sub_date
from dev.gtm.customer_360_v5_hscores
group by SUBSCRIPTION_ACCOUNT_ID, SFDC_ACCOUNT_NAME
),
segments as (
select DATEDIFF(MONTH, m.min_sub_date, current_date()) as accnt_age_mths, f.*,
 CASE WHEN DATEDIFF(MONTH, m.min_sub_date, current_date()) <=6 AND latest_buying_program = 'Volume Plan' THEN 'Early Volume Plan'
     WHEN DATEDIFF(MONTH, m.min_sub_date, current_date()) > 6 AND latest_buying_program = 'Volume Plan' THEN 'Late Volume Plan'
     WHEN DATEDIFF(MONTH, m.min_sub_date, current_date()) <= 6 AND latest_buying_program = 'Savings Plan' THEN 'Early Savings Plan'
     WHEN DATEDIFF(MONTH, m.min_sub_date, current_date()) > 6 AND latest_buying_program = 'Savings Plan' THEN 'Late Savings Plan'
     WHEN latest_buying_program = 'PAYG' THEN 'PAYG' ELSE 'Others' END AS segment_name

from filtered_data f
left join min_accnt_date m
on f.SUBSCRIPTION_ACCOUNT_ID = m.SUBSCRIPTION_ACCOUNT_ID
and f.SFDC_ACCOUNT_NAME = m.SFDC_ACCOUNT_NAME
)

select *
from segments
where segment_name in ('Early Volume Plan','Early Savings Plan','Late Volume Plan','Late Savings Plan','PAYG') ;
"""

# Connect, execute the query, and close the connection
try:
    connector.connect()
    df_raw = connector.execute_query(query)
finally:
    connector.close()


 pip install snowflake-connector-python[secure-local-storage]


Initiating login request with your identity provider. A browser window should have opened for you to complete the login. If you can't see it, check existing browser windows, or your OS settings. Press CTRL+C to abort and try again...
Going to open: https://newrelic.okta.com/app/snowflake/exkugjs4xeGHw0Vo10x7/sso/saml?SAMLRequest=lZJRT9swFIX%2FSuQ9J3bSloHVFrVUQCe2dm0AbW9uchu8OnbwdUj493NTimASSHuz7HOuv3vPHZ63pQqewKI0ekTiiJEAdGZyqYsRuU0vw1MSoBM6F8poGJFnQHI%2BHqIoVcUntXvQK3isAV3gC2nk3cOI1FZzI1Ai16IE5C7j68n3G55EjFfWOJMZRd5YPncIRLDOEx4tOUqP9%2BBcxSltmiZqepGxBU0YY5SdUa%2FaS74c9a3v6QN9TFl%2Fr%2FcKL1%2B%2BsE2lPozgM6zNQYT8Ok2X4XKxTkkwOaJeGI11CXYN9klmcLu6OQCgJ9DQWFAy8whhLpwwGKE2zVaJHWSmrGrny0b%2BRLeQU2UK6Tufz0ak2sn8sdgwdlr3gLVq9a1cs0FvM783KdtdMbH81S7axbS5%2FzmtkyIjwd0x2mQf7RyxhrneB%2Br8FUsGITsJ2VmaJLzHeDKI4pP%2BbxLMfKBSC9c5%2F6WOzM6Jjk5UFX0Fp9Du6uIP9lu4um7YnYlZ%2B5UiGroPmBx2hncEdvzfkxjSt%2FaX%2FfvhI5nPlsYXeA4ujS2F%2BzixOIq7G5mH207KoRRSTfLcAqJPTinTXFgQzq%2B5szUQOj78%2Bn7Rx38B&RelayState=ver%3A1-hin

In [5]:
df_raw.head()  # Display the first few rows of the DataFrame

Unnamed: 0,ACCNT_AGE_MTHS,SUBSCRIPTION_ACCOUNT_ID,EFFECTIVE_SUBSCRIPTION_ACCOUNT_ID,SFDC_ACCOUNT_ID,SFDC_ACCOUNT_NAME,REPORT_AS_OF_DT,REPORT_MONTH,BUYING_PROGRAM,LATEST_BUYING_PROGRAM,EFFECTIVE_ACR,...,SCORE_TYPE,METRIC_SCORE,METRIC_NAME_SCORE,WEIGHTS,ADOPTION_SCORE_CAL,ENGAGEMENT_SCORE_CAL,FINANCIAL_HEALTH_SCORE_CAL,RISK_SCORE_CAL,HEALTH_SCORE,SEGMENT_NAME
0,30,3251820,3251820,0011U00001S8ge2QAB,"Recurly, Inc.",2023-05-31,2023-05-01,Volume Plan,Savings Plan,116000.04,...,Adoption_score,,PRODUCT_STICKINESS_RATIO_HSCORE,0,87,0,59,100,61.5,Late Savings Plan
1,30,3251820,3251820,0011U00001S8ge2QAB,"Recurly, Inc.",2023-05-31,2023-05-01,Volume Plan,Savings Plan,116000.04,...,Adoption_score,100.0,USER_STICKINESS_RATIO_HSCORE,0,87,0,59,100,61.5,Late Savings Plan
2,30,3251820,3251820,0011U00001S8ge2QAB,"Recurly, Inc.",2023-11-30,2023-11-01,Volume Plan,Savings Plan,116000.04,...,Financial_health_score,25.0,CONTRACT_LENGTH_SCORE_HSCORE,0,37,0,53,100,47.5,Late Savings Plan
3,30,3251820,3251820,0011U00001S8ge2QAB,"Recurly, Inc.",2023-01-31,2023-01-01,Volume Plan,Savings Plan,116000.04,...,Adoption_score,,PRODUCT_UTILIZATION_RATE_HSCORE,0,31,0,47,100,44.5,Late Savings Plan
4,30,3251820,3251820,0011U00001S8ge2QAB,"Recurly, Inc.",2023-03-31,2023-03-01,Volume Plan,Savings Plan,116000.04,...,Financial_health_score,100.0,MAXIMUM_DAYS_PAST_DUE_HSCORE,0,31,0,60,100,47.75,Late Savings Plan


In [6]:
df_raw.shape

(272574, 99)

In [7]:
df_raw.columns

Index(['ACCNT_AGE_MTHS', 'SUBSCRIPTION_ACCOUNT_ID',
       'EFFECTIVE_SUBSCRIPTION_ACCOUNT_ID', 'SFDC_ACCOUNT_ID',
       'SFDC_ACCOUNT_NAME', 'REPORT_AS_OF_DT', 'REPORT_MONTH',
       'BUYING_PROGRAM', 'LATEST_BUYING_PROGRAM', 'EFFECTIVE_ACR',
       'IS_CONTRACT_FLAG', 'LATEST_EFFECTIVE_ACR', 'BCM', 'LATEST_BCM',
       'INGEST_BCM', 'INGEST_UNIT_PRICE', 'CCU_BCM', 'CCU_UNIT_PRICE',
       'USERS_BCM', 'USERS_UNIT_PRICE', 'SUBSCRIPTION_TERM_START_DATE',
       'SUBSCRIPTION_TERM_END_DATE', 'RENEWAL_DATE', 'MULTIYEAR_FLAG',
       'TOTAL_ACR', 'INDUSTRY', 'PHYSICAL_COUNTRY', 'SALES_HIER_GEO',
       'SALES_HIER_REGION', 'SALES_HIER_SUB_REGION', 'EMPLOYEES',
       'CHURN_INDICATOR', 'CONTRACT_START_DATE', 'MONTHS_SINCE_CONTRACT_START',
       'DAILY_ENGAGED_USERS', 'DAU_R7D', 'COMMITTED_USERS',
       'ROLLING_60_DAY_MEDIAN', 'DENOM_USERS', 'ADV_USED_FEATURES',
       'TOTAL_ADV_FEATURES', 'DAU_ACTUALS', 'USED_FEATURES',
       'TOTAL_FEATURES_3_PERCENT', 'DAU_R30D', 'TOTAL_FEATURES_R