In [11]:
import pandas as pd
import snowflake.connector

#login to snowflake db
con = snowflake.connector.connect(user='vishal.kumar@scale.com',
                                 account='pxa65918',
                                 authenticator='externalbrowser',
                                 warehouse='COMPUTE_WH',
                                 database='SCALE_PROD',
                                 role='GENERAL_RO')

cs = con.cursor()

Initiating login request with your identity provider. A browser window should have opened for you to complete the login. If you can't see it, check existing browser windows, or your OS settings. Press CTRL+C to abort and try again...


In [46]:
#Get data from snowflake
sql = f'''
with fpa as (Select 
date_trunc("week", DATE(day)) as report_date_week,
Project_NAME,
Project_group_name, 
customer_company,
Project_product,
PROJECT_BUSINESS_UNIT,
PROJECT_FULFILLMENT_MECHANISM,
PROJECT_TASK_TYPE,
sum(revenue) as revenue,
sum(TOTAL_COGS_AMOUNT-TOTAL_HOSTING_COGS_AMOUNT) as COGS,
sum(TOTAL_OPEX_AMOUNT-TOTAL_HOSTING_OPEX_AMOUNT) as OPEX,
SUM(NUM_COMPLETED_TASKS) AS TASK_VOLUME,
Sum(num_completed_subtasks) as subtask_volume,
sum(TOTAL_COGS_PAY_HOURS) as pay_hours,
--sum(HOURS_SPENT) as hours_spent,
sum(TOTAL_COGS_WORK_UNIT_COUNT) as touches_COGS,
sum(TOTAL_WORKER_COGS_AMOUNT) as worker_cost,

sum(MODTA_COGS_AMOUNT) as mod_cost,
sum(MXQA_COGS_AMOUNT) as mxqa_cost,
sum(TOTAL_REWARDS_COGS_AMOUNT) as rewards,
sum(TOTAL_WORKLOG_COGS_AMOUNT) as worklogs,
sum(TASKER_COGS_AMOUNT) as tasker_cost
from VIEW.FACT_PROJECT_AGGREGATE
where DATE(day) >= '2023-04-24' and date(day) <= '2023-04-29'
AND RECOGNIZED ILIKE 'TRUE' 
AND project_group_name IN ('Instacart Production v0',
'Flamingo Catalog Deprecated',
'Imbe NPC v1',
'Goose',
'Flamingo Catalog Processing',
'Woodpecker Enrichment Validation Test 1',
'Seagull')

group by 1,2,3,4,5,6,7,8
),

long as(select 
date_trunc('week', DATE(day)) as report_date_week,
Project_NAME,
Project_group_name, 
customer_company,
Project_product,
PROJECT_BUSINESS_UNIT,
PROJECT_FULFILLMENT_MECHANISM,
project_task_type,
sum(case when COGS_OPEX ilike 'COGS' and work_level='attempt' then work_unit_count end) as attempt_touches,
sum(case when COGS_OPEX ilike 'COGS' and work_level in ('review_0','review_1','review_10')then work_unit_count end) as review_touches,
sum(case when COGS_OPEX ilike 'COGS' and work_level='attempt'then amount end) as attempt_cost,
sum(case when COGS_OPEX ilike 'COGS' and work_level in ('review_0','review_1','review_10')then amount end) as review_cost,
sum(case when COGS_OPEX ilike 'COGS' and work_level='attempt'then pay_hours end) as attempt_hrs,
sum(case when COGS_OPEX ilike 'COGS' and work_level in ('review_0','review_1','review_10')then pay_hours end) as review_hrs
--sum(case when COGS_OPEX ilike 'COGS' and work_level='attempt'then HOURS_SPENT end) as attempt_hs,
--sum(case when COGS_OPEX ilike 'COGS' and work_level in ('review_0','review_1','review_10')then HOURS_SPENT end) as review_hs
FROM scale_prod.view.costs_by_project_id_long
WHERE project_group_name IN ('Instacart Production v0',
'Flamingo Catalog Deprecated',
'Imbe NPC v1',
'Goose',
'Flamingo Catalog Processing',
'Woodpecker Enrichment Validation Test 1',
'Seagull')
AND  DATE(day) >= '2023-04-24' and DATE(day) <=  '2023-04-29'
GROUP BY 1,2,3,4,5,6,7,8
)
select a.*,
b.attempt_touches,
b.review_touches,
b.attempt_cost,
b.review_cost,
b.attempt_hrs,
b.review_hrs
--b.attempt_hs,
--b.review_hs
from FPA a
left join long b on 
a.report_date_week=b.report_date_week and
a.Project_NAME=  b.Project_NAME and 
a.Project_group_name= b.Project_group_name and
a.customer_company=b.customer_company and 
a.Project_product=b.Project_product and 
a.PROJECT_BUSINESS_UNIT=b.PROJECT_BUSINESS_UNIT and
a.PROJECT_FULFILLMENT_MECHANISM=b.PROJECT_FULFILLMENT_MECHANISM and
a.PROJECT_TASK_TYPE=b.PROJECT_TASK_TYPE
'''
cs.execute(sql)
jdf = cs.fetch_pandas_all()

In [47]:
#list all columns of jdf
#drop columns WORKLOGS, CUSTOMER_COMPANY, PROJECT_PRODUCT, PROJECT_BUSINESS_UNIT, PROJECT_FULFILLMENT_MECHANISM, PROJECT_TASK_TYPE
jdf.drop(columns=['WORKLOGS', 'CUSTOMER_COMPANY', 'PROJECT_PRODUCT', 'PROJECT_BUSINESS_UNIT', 'PROJECT_FULFILLMENT_MECHANISM', 'PROJECT_TASK_TYPE'], inplace=True)
jdf.columns


Index(['REPORT_DATE_WEEK', 'PROJECT_NAME', 'PROJECT_GROUP_NAME', 'REVENUE',
       'COGS', 'OPEX', 'TASK_VOLUME', 'SUBTASK_VOLUME', 'PAY_HOURS',
       'TOUCHES_COGS', 'WORKER_COST', 'MOD_COST', 'MXQA_COST', 'REWARDS',
       'TASKER_COST', 'ATTEMPT_TOUCHES', 'REVIEW_TOUCHES', 'ATTEMPT_COST',
       'REVIEW_COST', 'ATTEMPT_HRS', 'REVIEW_HRS'],
      dtype='object')

In [48]:
#convert jdf to csv
jdf.to_csv('jdf.csv', index=False)

In [49]:
print(jdf)

   REPORT_DATE_WEEK                                       PROJECT_NAME  \
0        2023-04-24                      Flamingo Color Categorization   
1        2023-04-24                                 seagull_image_eval   
2        2023-04-24                  seagull_text_attribute_evaluation   
3        2023-04-24    Flamingo Bullets Categorization (Health/Beauty)   
4        2023-04-24          Flamingo Bullets Categorization (Jewelry)   
5        2023-04-24                        seagull_taxonomy_evaluation   
6        2023-04-24                                ralph-dummy-project   
7        2023-04-24    Woodpecker Enrichment Validation - Pilot Test 4   
8        2023-04-24                    Flamingo Color Title Extraction   
9        2023-04-24            Flamingo Size Chart Site Classification   
10       2023-04-24                    seagull_taxonomy_eavluation_llm   
11       2023-04-24                  [Catalog Engine] Attribute Review   
12       2023-04-24                   