In [None]:
# this is a parameter that will get overwritten when run by papermill on a schedules
is_local_development = True

In [None]:
!python -m pip install gitlabdata --upgrade

In [None]:
import configparser

# import sys
import pandas as pd
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt
from datetime import date
import json, os
from pyprojroot import here
from os import environ as env
import re

In [None]:
from gitlabdata.orchestration_utils import (
    data_science_engine_factory,
    query_dataframe,
    snowflake_engine_factory,
    snowflake_stage_load_copy_remove,
    get_env_from_profile,
    dataframe_uploader,
    write_to_gsheets,
    read_from_gsheets,
    query_executor,
    query_from_file,
)

## Create Snowflake engine

In [None]:
# engine factory can be created using a local role from output
# depending on this notebook being run locally or remotely, the
# engine is creation process is different

if is_local_development:
    snowflake_engine = data_science_engine_factory(
        profile_target="sales_analytics_local"
    )
else:
    snowflake_engine = snowflake_engine_factory(env, "SALES_ANALYTICS")

    raw_db_name = env["SNOWFLAKE_LOAD_DATABASE"]
    prod_db_name = env["SNOWFLAKE_PROD_DATABASE"]

snowflake_engine

## Credentials for Gsheet manipulation

Remember to give access to the following two users:

- Data Team runner: data-team-sheets-sa@gitlab-analysis.iam.gserviceaccount.com
- Sales Strategy service account: service-revenue-strat-analytic@revenue-strategy-anal-411d5a72.iam.gserviceaccount.com

In [None]:
# read the credentials of the google service account
if is_local_development:
    credentials_path = here("credentials/rsa_gcloud_service_account.json")
    # credentials_path = here("credentials/gsheet_service_file.json")

    with open(credentials_path) as f:
        service_account_credentials = f.read().replace("\n", "")

    # set the credential as a enviroment variable
    os.environ["GSHEETS_SERVICE_ACCOUNT_CREDENTIALS"] = service_account_credentials

# AE Quota and Credit upload process

This notebook is run daily to upload credits and quotas for AEs.

* The Quotas are maintained by @hselim.
* The AE credits are captured from an Xactly upload maintained by @hselim.

The source gsheet document is the (following one)[https://docs.google.com/spreadsheets/d/1Cvs2IfMEiY-mGJ07F5r6sBwUIl9AXqlsISkfJG2iLHk/edit#gid=896147171]

## Read Quota gSheet data

In [None]:
# Read from GSheets
sheet_id = "1Cvs2IfMEiY-mGJ07F5r6sBwUIl9AXqlsISkfJG2iLHk"
sheet_name = "quotas"
quotas_df = read_from_gsheets(sheet_id, sheet_name)

In [None]:
print(quotas_df.columns)

period_fields = [
    "FY24",
    "H1-FY24",
    "Q1-FY24",
    "Feb-23",
    "Mar-23",
    "Apr-23",
    "Q2-FY24",
    "May-23",
    "Jun-23",
    "Jul-23",
    "H2-FY24",
    "Q3-FY24",
    "Aug-23",
    "Sep-23",
    "Oct-23",
    "Q4-FY24",
    "Nov-23",
    "Dec-23",
    "Jan-24",
    "Q1-FY25",
    "Q2-FY25",
    "Q3-FY25",
    "Q4-FY25",
]

desc_fields = [
    "Comp Design",
    "Employee ID",
    "Start Date",
    "Quota Effective Start Date",
    "Name",
    "Region",
    "Role",
    "HC Type",
    "Role Type",
    "GHP ID",
    "Manager",
    "Area",
    "Territory",
    "Quota Type",
    "CSM Pool",
    "Ramp Status",
    "Credit Type",
    "YTD Target",
    "YTD Closed",
    "YTD Attainment",
    "H1 Attainment",
    "Annual Attainment",
    "End Date",
    "Hire Status",
]

### Store in Snowflake unpivoted version

In [None]:
%%time
# upload to database


target_fields = ["Employee ID", "Name", "Start Date", "Credit Type",'HC Type'] + [
    "Q1-FY24",
    "Q2-FY24",
    "Q3-FY24",
    "Q4-FY24",
    "FY24",
    "Q1-FY25",
    "Q2-FY25",
    "Q3-FY25",
    "Q4-FY25",
]

index = quotas_df["Quota Type"] == "Carried Net ARR Quota"

to_gsheet = quotas_df[index][target_fields].copy()
print(len(to_gsheet))

to_gsheet.columns = [
    "employee_id",
    "name",
    "start_date",
    "credit_type",
    "hc_type",
    "cfy_q1",
    "cfy_q2",
    "cfy_q3",
    "cfy_q4",
    "cfy_total",
    "nfy_q1",
    "nfy_q2",
    "nfy_q3",
    "nfy_q4",
]

to_gsheet["cfy_q1"] = pd.to_numeric(to_gsheet["cfy_q1"])
to_gsheet["cfy_q2"] = pd.to_numeric(to_gsheet["cfy_q2"])
to_gsheet["cfy_q3"] = pd.to_numeric(to_gsheet["cfy_q3"])
to_gsheet["cfy_q4"] = pd.to_numeric(to_gsheet["cfy_q4"])

to_gsheet["nfy_q1"] = pd.to_numeric(to_gsheet["nfy_q1"])
to_gsheet["nfy_q2"] = pd.to_numeric(to_gsheet["nfy_q2"])
to_gsheet["nfy_q3"] = pd.to_numeric(to_gsheet["nfy_q3"])
to_gsheet["nfy_q4"] = pd.to_numeric(to_gsheet["nfy_q4"])

to_gsheet["cfy_total"] = pd.to_numeric(to_gsheet["cfy_total"])
to_gsheet["employee_id"] = to_gsheet["employee_id"].astype(str)

to_gsheet.fillna(0, inplace=True)

table_name = "ae_quotas_unpivoted"
schema = "SALES_ANALYTICS"
dataframe_uploader(
    dataframe=to_gsheet,
    engine=snowflake_engine,
    table_name=table_name,
    schema=schema,
    if_exists="replace",
    add_uploaded_at=False,
)

## Read Credit gSheet extract

In [None]:
# Read from GSheets
sheet_id = "1Cvs2IfMEiY-mGJ07F5r6sBwUIl9AXqlsISkfJG2iLHk"
sheet_name = "credit"
credit_df = read_from_gsheets(sheet_id, sheet_name)

## Save Credits into Snowflake

In [None]:
%%time
# upload to database
print(len(credit_df))

table_name = "ae_credits"
schema = "SALES_ANALYTICS"
dataframe_uploader(
    dataframe=credit_df,
    engine=snowflake_engine,
    table_name=table_name,
    schema=schema,
    if_exists="replace",
    add_uploaded_at=True,
)

# Process Quotas Unpivoted version

In [None]:
#
# Identify the type of period and compliment the tall table
#
def type_of_period(period_name):
    result = None
    # fiscal year
    if re.match(r"^FY[2-3][0-9]", period_name):
        result = "Fiscal Year"
    elif re.match(r"^(Q)[1-4]-FY[2-3][0-9]", period_name):
        result = "Fiscal Quarter"
    elif re.match(r"^(H)[1-2]-FY[2-3][0-9]", period_name):
        result = "Half Fiscal Year"
    elif re.match(r"^([a-zA-Z]{3})-[2-3][0-9]", period_name):
        result = "Month"
    else:
        None
    return result


# test
print(type_of_period("FY24"))

In [None]:
# Turn the table from a long table to a tall one
melted_df = pd.melt(quotas_df, id_vars=desc_fields, value_vars=period_fields)
melted_df.rename(columns={"variable": "period"}, inplace=True)
melted_df.columns = melted_df.columns.str.lower()
# retrieve the period type
melted_df["period_type"] = melted_df["period"].apply(type_of_period)

# set up column types, this gives errors if not handled correctly
# it tends to take the fields as numerics when they are strings
melted_df["value"] = pd.to_numeric(melted_df["value"])
melted_df["ghp id"] = melted_df["ghp id"].astype(str)
melted_df["employee id"] = melted_df["employee id"].astype(str)

melted_df.dropna(subset=["value"], inplace=True)
melted_df.head()

### Save Tall Format Quotas into gSheet and Snowflake

This process is giving problems and cannot be run locally

In [None]:
%%time
# upload to database
print(len(melted_df))

target_columns = [
    "comp design",
    "employee id",
    "start date",
    "end date",
    "credit type",
    "quota effective start date",
    "name",
    "region",
    "role",
    "hc type",
    "role type",
    "manager",
    "area",
    "quota type",
    "period_type",
    "period",
    "value",
]

In [None]:
# Write Tall format to gSheets
sheet_id = "1Cvs2IfMEiY-mGJ07F5r6sBwUIl9AXqlsISkfJG2iLHk"
sheet_name = "tall_quota_file"
credit_df = write_to_gsheets(sheet_id, sheet_name, melted_df[target_columns])

In [None]:
# avoid running the whole process
if not is_local_development:
    to_upload = melted_df[target_columns].copy()

    table_name = "ae_quotas"
    schema = "SALES_ANALYTICS"
    dataframe_uploader(
        dataframe=to_upload,
        engine=snowflake_engine,
        table_name=table_name,
        schema=schema,
        if_exists="replace",
        add_uploaded_at=False,
    )