In [38]:
# this is a parameter that will get overwritten when run by papermill on a schedules
is_local_development = True

In [39]:
if is_local_development:
    !pip install gitlabdata==0.3.29



In [40]:
import configparser

# import sys
import pandas as pd
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt
from datetime import date
import json, os
from pyprojroot import here
from os import environ as env
import re

In [41]:
from gitlabdata.orchestration_utils import (
    data_science_engine_factory,
    query_dataframe,
    snowflake_engine_factory,
    snowflake_stage_load_copy_remove,
    get_env_from_profile,
    dataframe_uploader,
    write_to_gsheets,
    read_from_gsheets,
    query_executor,
    query_from_file,
)

## Create Snowflake engine

In [42]:
# engine factory can be created using a local role from output
# depending on this notebook being run locally or remotely, the
# engine is creation process is different

if is_local_development:
    snowflake_engine = data_science_engine_factory(
        profile_target="sales_analytics_local"
    )
else:
    snowflake_engine = snowflake_engine_factory(env, "SALES_ANALYTICS")

    raw_db_name = env["SNOWFLAKE_LOAD_DATABASE"]
    prod_db_name = env["SNOWFLAKE_PROD_DATABASE"]

snowflake_engine

Engine(snowflake://nfiguera%40gitlab.com:***@gitlab/RAW/?authenticator=externalbrowser&role=NFIGUERA&warehouse=DEV_XS)

## Credentials for Gsheet manipulation

Remember to give access to the following two users:

- Data Team runner: data-team-sheets-sa@gitlab-analysis.iam.gserviceaccount.com
- Sales Strategy service account: service-revenue-strat-analytic@revenue-strategy-anal-411d5a72.iam.gserviceaccount.com

In [43]:
# read the credentials of the google service account
if is_local_development:
    credentials_path = here("credentials/rsa_gcloud_service_account.json")
    # credentials_path = here("credentials/gsheet_service_file.json")

    with open(credentials_path) as f:
        service_account_credentials = f.read().replace("\n", "")

    # set the credential as a enviroment variable
    os.environ["GSHEETS_SERVICE_ACCOUNT_CREDENTIALS"] = service_account_credentials

# AE Quota and Credit upload process

This notebook is run daily to upload credits and quotas for AEs.

* The Quotas are maintained by @hselim.
* The AE credits are captured from an Xactly upload maintained by @hselim.

The source gsheet document is the (following one)[https://docs.google.com/spreadsheets/d/1Cvs2IfMEiY-mGJ07F5r6sBwUIl9AXqlsISkfJG2iLHk/edit#gid=896147171]

## Read Quota gSheet data

In [44]:
# Read from GSheets
sheet_id = "1Cvs2IfMEiY-mGJ07F5r6sBwUIl9AXqlsISkfJG2iLHk"
sheet_name = "quotas"
quotas_df = read_from_gsheets(sheet_id, sheet_name)

INFO:root:Reading data from sheet quotas...
INFO:root:Read completed.


In [45]:
print(quotas_df.columns)

period_fields = [
    "FY24",
    "H1-FY24",
    "Q1-FY24",
    "Feb-23",
    "Mar-23",
    "Apr-23",
    "Q2-FY24",
    "May-23",
    "Jun-23",
    "Jul-23",
    "H2-FY24",
    "Q3-FY24",
    "Aug-23",
    "Sep-23",
    "Oct-23",
    "Q4-FY24",
    "Nov-23",
    "Dec-23",
    "Jan-24",
    "Q1-FY25",
    "Q2-FY25",
    "Q3-FY25",
    "Q4-FY25",
]

desc_fields = [
    "Comp Design",
    "Employee ID",
    "Start Date",
    "Quota Effective Start Date",
    "Name",
    "Region",
    "Role",
    "HC Type",
    "Role Type",
    "GHP ID",
    "Manager",
    "Area",
    "Territory",
    "Quota Type",
    "CSM Pool",
    "Ramp Status",
    "Credit Type",
    "YTD Target",
    "YTD Closed",
    "YTD Attainment",
    "H1 Attainment",
    "Annual Attainment",
    "End Date",
    "Hire Status",
]

Index(['Comp Design', 'Employee ID', 'Start Date',
       'Quota Effective Start Date', 'Name', 'Region', 'Role', 'HC Type',
       'Role Type', 'GHP ID', 'Manager', 'Area', 'Territory', 'Quota Type',
       'FY24', 'H1-FY24', 'Q1-FY24', 'Feb-23', 'Mar-23', 'Apr-23', 'Q2-FY24',
       'May-23', 'Jun-23', 'Jul-23', 'H2-FY24', 'Q3-FY24', 'Aug-23', 'Sep-23',
       'Oct-23', 'Q4-FY24', 'Nov-23', 'Dec-23', 'Jan-24', 'CSM Pool',
       'Credit Type', 'YTD Target', 'YTD Closed', 'YTD Attainment',
       'H1 Attainment', 'Annual Attainment', 'End Date', 'Hire Status',
       'Ramped Month', 'Ramp Status', 'Q1-FY25', 'Q2-FY25', 'Q3-FY25',
       'Q4-FY25', 'fy25_ramp_status', 'fy23_quota', 'fy23_credits',
       'fy23_attainment', 'fy22_quota', 'fy22_credits', 'fy22_attainment'],
      dtype='object')


In [46]:
list(quotas_df.columns)

['Comp Design',
 'Employee ID',
 'Start Date',
 'Quota Effective Start Date',
 'Name',
 'Region',
 'Role',
 'HC Type',
 'Role Type',
 'GHP ID',
 'Manager',
 'Area',
 'Territory',
 'Quota Type',
 'FY24',
 'H1-FY24',
 'Q1-FY24',
 'Feb-23',
 'Mar-23',
 'Apr-23',
 'Q2-FY24',
 'May-23',
 'Jun-23',
 'Jul-23',
 'H2-FY24',
 'Q3-FY24',
 'Aug-23',
 'Sep-23',
 'Oct-23',
 'Q4-FY24',
 'Nov-23',
 'Dec-23',
 'Jan-24',
 'CSM Pool',
 'Credit Type',
 'YTD Target',
 'YTD Closed',
 'YTD Attainment',
 'H1 Attainment',
 'Annual Attainment',
 'End Date',
 'Hire Status',
 'Ramped Month',
 'Ramp Status',
 'Q1-FY25',
 'Q2-FY25',
 'Q3-FY25',
 'Q4-FY25',
 'fy25_ramp_status',
 'fy23_quota',
 'fy23_credits',
 'fy23_attainment',
 'fy22_quota',
 'fy22_credits',
 'fy22_attainment']

### Store in Snowflake unpivoted version

In [47]:
%%time
# upload to database

target_fields = ["Employee ID", "Name", "Start Date", "Credit Type", "HC Type"] + [
    "Q1-FY24",
    "Q2-FY24",
    "Q3-FY24",
    "Q4-FY24",
    "FY24",
    "Q1-FY25",
    "Q2-FY25",
    "Q3-FY25",
    "Q4-FY25",
    "YTD Target",
    "YTD Closed",
    "YTD Attainment",
    "fy23_quota",
    "fy23_credits",
    "fy23_attainment",
    "fy22_quota",
    "fy22_credits",
    "fy22_attainment",
]

index = quotas_df["Quota Type"] == "Carried Net ARR Quota"
to_gsheet = quotas_df[index][target_fields].copy()
print(len(to_gsheet))

to_gsheet.columns = [
    "employee_id",
    "name",
    "start_date",
    "credit_type",
    "hc_type",
    "cfy_q1",
    "cfy_q2",
    "cfy_q3",
    "cfy_q4",
    "cfy_total",
    "nfy_q1",
    "nfy_q2",
    "nfy_q3",
    "nfy_q4",
    "cfy_ytd_target",
    "cfy_ytd_closed",
    "cfy_ytd_attainment",
    "fy23_quota",
    "fy23_credits",
    "fy23_attainment",
    "fy22_quota",
    "fy22_credits",
    "fy22_attainment",
]

to_gsheet

336
CPU times: user 2.7 ms, sys: 1.82 ms, total: 4.53 ms
Wall time: 3.93 ms


Unnamed: 0,employee_id,name,start_date,credit_type,hc_type,cfy_q1,cfy_q2,cfy_q3,cfy_q4,cfy_total,...,nfy_q4,cfy_ytd_target,cfy_ytd_closed,cfy_ytd_attainment,fy23_quota,fy23_credits,fy23_attainment,fy22_quota,fy22_credits,fy22_attainment
0,12079,Adam Mustapic,9/20/2021,ARR Net,MM AE,170858,333041,258428,282672,1045000,...,282672,856517,289886,0.34,1070000.03,333807.34,0.311969,890000,72634.2,0.081611
2,12603,Adrian Tigert,4/25/2022,ARR Net,ENTR-ASM,965095,905932,1332378,1954764,5158169,...,1954764,3842788,1345500,0.35,3977524.14,1976064.33,0.496808,0,0,0
8,10780,Alan Cooke,5/6/2019,ARR Net,ENTR-ASM,1265075,1026648,1426487,1975224,5693434,...,1975224,4376618,124932,0.03,6279543.46,6252554.68,0.995702,6038769.598,6113996.39,1.012457
14,12425,Alan Koch,2/14/2022,ARR Net,SAL,262271,353970,396164,270095,1282500,...,270095,1102437,210202,0.19,1600000,265290.42,0.165807,0,0,0
18,12437,Alex Dakin,2/28/2022,ARR Net,SAL,215398,168430,232965,322581,939374,...,322581,724320,60064,0.08,500000,86630.45,0.173261,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1062,12354,Raphael Werner,2/1/2022,ARR Net,SMB AE,228638,283963,340705,229656,1082962,...,,929858,308399,0.33,,,0,,,0
1695,10688,Michael Pyle,8/1/2023,ARR Net,ENTR-VP,20532032,24529060,31022315,36333683,112417090,...,,88098698,90314110,1.03,,,0,,,0
1702,10522,Ryan O'Nell,12/3/2018,ARR Net,COMM-VP,9185865,12617219,14989247,18574142,55366472,...,,42948399,35409758,0.82,,,0,,,0
1709,10695,David Sakamoto,4/8/2019,ARR Net,SA-VP,29717897,37146279,46952722,56183103,170000000,...,,132413350,131048176,0.99,,,0,,,0


In [None]:
%%time
# upload to database

to_gsheet["cfy_q1"] = pd.to_numeric(to_gsheet["cfy_q1"])
to_gsheet["cfy_q2"] = pd.to_numeric(to_gsheet["cfy_q2"])
to_gsheet["cfy_q3"] = pd.to_numeric(to_gsheet["cfy_q3"])
to_gsheet["cfy_q4"] = pd.to_numeric(to_gsheet["cfy_q4"])

to_gsheet["nfy_q1"] = pd.to_numeric(to_gsheet["nfy_q1"])
to_gsheet["nfy_q2"] = pd.to_numeric(to_gsheet["nfy_q2"])
to_gsheet["nfy_q3"] = pd.to_numeric(to_gsheet["nfy_q3"])
to_gsheet["nfy_q4"] = pd.to_numeric(to_gsheet["nfy_q4"])

to_gsheet["cfy_ytd_target"] = pd.to_numeric(to_gsheet["cfy_ytd_target"])
to_gsheet["cfy_ytd_closed"] = pd.to_numeric(to_gsheet["cfy_ytd_closed"])
to_gsheet["cfy_ytd_attainment"] = pd.to_numeric(to_gsheet["cfy_ytd_attainment"])

to_gsheet["fy23_quota"] = pd.to_numeric(to_gsheet["fy23_quota"])
to_gsheet["fy23_credits"] = pd.to_numeric(to_gsheet["fy23_credits"])
to_gsheet["fy23_attainment"] = pd.to_numeric(to_gsheet["fy23_attainment"])

to_gsheet["fy22_quota"] = pd.to_numeric(to_gsheet["fy22_quota"])
to_gsheet["fy22_credits"] = pd.to_numeric(to_gsheet["fy22_credits"])
to_gsheet["fy22_attainment"] = pd.to_numeric(to_gsheet["fy22_attainment"])

to_gsheet["cfy_total"] = pd.to_numeric(to_gsheet["cfy_total"])
to_gsheet["employee_id"] = to_gsheet["employee_id"].astype(str)

to_gsheet.fillna(0, inplace=True)

table_name = "ae_quotas_unpivoted"
schema = "SALES_ANALYTICS"
dataframe_uploader(
    dataframe=to_gsheet,
    engine=snowflake_engine,
    table_name=table_name,
    schema=schema,
    if_exists="replace",
    add_uploaded_at=False,
)

INFO:snowflake.connector.connection:Snowflake Connector for Python Version: 2.7.8, Python Version: 3.9.13, Platform: macOS-14.2-x86_64-i386-64bit
INFO:snowflake.connector.connection:This connection is in OCSP Fail Open Mode. TLS Certificates would be checked for validity and revocation status. Any other Certificate Revocation related exceptions or OCSP Responder failures would be disregarded in favor of connectivity.
Initiating login request with your identity provider. A browser window should have opened for you to complete the login. If you can't see it, check existing browser windows, or your OS settings. Press CTRL+C to abort and try again...


## Read Credit gSheet extract

In [None]:
# Read from GSheets
sheet_id = "1Cvs2IfMEiY-mGJ07F5r6sBwUIl9AXqlsISkfJG2iLHk"
sheet_name = "credit"
credit_df = read_from_gsheets(sheet_id, sheet_name)
# Employee ID is being picked as a number, so turn it into a string
credit_df["Participant Employee ID"] = credit_df["Participant Employee ID"].astype(
    "string"
)

In [None]:
credit_df['User_Segment'] = credit_df['User_Segment'].astype(str)

## Save Credits into Snowflake

In [None]:
%%time
# upload to database
print(len(credit_df))

table_name = "ae_credits"
schema = "SALES_ANALYTICS"
dataframe_uploader(
    dataframe=credit_df,
    engine=snowflake_engine,
    table_name=table_name,
    schema=schema,
    if_exists="replace",
    add_uploaded_at=True,
)

# Process Quotas Unpivoted version

In [None]:
#
# Identify the type of period and compliment the tall table
#
def type_of_period(period_name):
    result = None
    # fiscal year
    if re.match(r"^FY[2-3][0-9]", period_name):
        result = "Fiscal Year"
    elif re.match(r"^(Q)[1-4]-FY[2-3][0-9]", period_name):
        result = "Fiscal Quarter"
    elif re.match(r"^(H)[1-2]-FY[2-3][0-9]", period_name):
        result = "Half Fiscal Year"
    elif re.match(r"^([a-zA-Z]{3})-[2-3][0-9]", period_name):
        result = "Month"
    else:
        None
    return result


# test
print(type_of_period("FY24"))

In [None]:
quotas_df

In [None]:
# Turn the table from a long table to a tall one
melted_df = pd.melt(quotas_df, id_vars=desc_fields, value_vars=period_fields)
melted_df.rename(columns={"variable": "period"}, inplace=True)
melted_df.columns = melted_df.columns.str.lower()
# retrieve the period type
melted_df["period_type"] = melted_df["period"].apply(type_of_period)

# set up column types, this gives errors if not handled correctly
# it tends to take the fields as numerics when they are strings
melted_df["value"] = melted_df["value"].replace("#N/A", 0)
melted_df["value"] = pd.to_numeric(melted_df["value"])
melted_df["ghp id"] = melted_df["ghp id"].astype(str)
melted_df["employee id"] = melted_df["employee id"].astype(str)

melted_df.dropna(subset=["value"], inplace=True)
melted_df.head()

### Save Tall Format Quotas into gSheet and Snowflake

This process is giving problems and cannot be run locally

In [None]:
# upload to database
print(len(melted_df))

target_columns = [
    "comp design",
    "employee id",
    "start date",
    "end date",
    "credit type",
    "quota effective start date",
    "name",
    "region",
    "role",
    "hc type",
    "role type",
    "manager",
    "area",
    "quota type",
    "period_type",
    "period",
    "value",
]

In [None]:
# Write Tall format to gSheets
sheet_id = "1Cvs2IfMEiY-mGJ07F5r6sBwUIl9AXqlsISkfJG2iLHk"
sheet_name = "tall_quota_file"
credit_df = write_to_gsheets(sheet_id, sheet_name, melted_df[target_columns])

In [None]:
len(melted_df)

In [None]:
%%time
to_upload = melted_df[target_columns].copy()

table_name = "ae_quotas"
schema = "SALES_ANALYTICS"
dataframe_uploader(
    dataframe=to_upload,
    engine=snowflake_engine,
    table_name=table_name,
    schema=schema,
    if_exists="replace",
    add_uploaded_at=False,
)

In [None]:
# using datetime module
import datetime

# ct stores current time
ct = datetime.datetime.now()
print("current time:-", ct)