In [1]:
import pandas as pd
import numpy as np
import snowflake.connector
from collections import defaultdict
import json
import re
import requests
import sys
import random
import requests
import tabulate
from datetime import datetime
import schedule
import time

In [2]:
#login to snowflake db
con = snowflake.connector.connect(user='vishal.kumar@scale.com',
                                 account='pxa65918',
                                 authenticator='externalbrowser',
                                 warehouse='COMPUTE_WH',
                                 database='SCALE_CRAWLER',
                                 role='GENERAL_RO')

cs = con.cursor()

Initiating login request with your identity provider. A browser window should have opened for you to complete the login. If you can't see it, check existing browser windows, or your OS settings. Press CTRL+C to abort and try again...


In [3]:
#Get fivetran PVIDs
sql = f'''
 SELECT
   CAST (DECISION_DATE as varchar) as DECISION_DATE,
  CAST(product_id as varchar) as product_id,
  SOURCE,
  CAST(catalog_id as varchar) as catalogid,
  CUSTOMER_DATE_SENT as ds
FROM
  PC_FIVETRAN_DB.GSHEETS.flamingocfapril_25
'''
cs.execute(sql)
fivetrandf = cs.fetch_pandas_all()

In [4]:
#Get tool PVIDs
sql = f'''
select DISTINCT
f.key as product_id
from
PUBLIC.SPOTTERAUDITS p
,lateral flatten(input => p.RESULT) as f
where
 AUDIT_TYPE = 'CustomerFeedback'
 and STATUS not in  ('Canceled')
order by f.key desc
'''
cs.execute(sql)
tooldf = cs.fetch_pandas_all()

In [5]:
#Get Catalog acceptance and cessation status
sql = f'''
with 
submission_level_updates as (
  select
    CATALOG_ID,
    historyRow.value :value value,
    historyRow.value :updatedAt updatedAt
  from
    scale_crawler.PUBLIC.CUSTOMERCATALOGS,
    lateral flatten(input => SUBMISSION_LEVEL_HISTORY) historyRow
  where
    customer = 'flamingo'
    -- and updatedAt <= '{{when}}'
),
last_submission_update as (
  select
    max(updatedAt) updatedAt,
    catalog_id
  from
    submission_level_updates
  group by
    catalog_id
),
submission_level as (
  select
    submission_level_updates.*
  from
    submission_level_updates
    join last_submission_update on submission_level_updates.catalog_id = last_submission_update.catalog_id
    and submission_level_updates.updatedAt = last_submission_update.updatedAt
),
-- extractor compmlexity
extractor_complexity_updates as (
  select
    CATALOG_ID,
    historyRow.value :value value,
    historyRow.value :updatedAt updatedAt
  from
    scale_crawler.PUBLIC.CUSTOMERCATALOGS,
    lateral flatten(input => EXTRACTION_COMPLEXITY_HISTORY) historyRow
  where
    customer = 'flamingo'
    -- and updatedAt <= '{{when}}'
),
last_extractor_complexity_update as (
  select
    max(updatedAt) updatedAt,
    catalog_id
  from
    extractor_complexity_updates
  group by
    catalog_id
),
extractor_complexity as (
  select
    extractor_complexity_updates.*
  from
    extractor_complexity_updates
    join last_extractor_complexity_update on extractor_complexity_updates.catalog_id = last_extractor_complexity_update.catalog_id
    and last_extractor_complexity_update.updatedAt = last_extractor_complexity_update.updatedAt
),
-- sizechart compmlexity
sizechart_complexity_updates as (
  select
    CATALOG_ID,
    historyRow.value :value value,
    historyRow.value :updatedAt updatedAt
  from
    scale_crawler.PUBLIC.CUSTOMERCATALOGS,
    lateral flatten(input => SIZECHART_COMPLEXITY_HISTORY) historyRow
  where
    customer = 'flamingo'
    -- and updatedAt <= '{{when}}'
),
last_sizechart_complexity_update as (
  select
    max(updatedAt) updatedAt,
    catalog_id
  from
    sizechart_complexity_updates
  group by
    catalog_id
),
sizechart_complexity as (
  select
    sizechart_complexity_updates.*
  from
    sizechart_complexity_updates
    join last_sizechart_complexity_update on sizechart_complexity_updates.catalog_id = last_sizechart_complexity_update.catalog_id
    and last_sizechart_complexity_update.updatedAt = sizechart_complexity_updates.updatedAt
),
-- acceptance
acceptance_updates as (
  select
    CATALOG_ID,
    historyRow.value :value value,
    historyRow.value :updatedAt updatedAt,
    historyRow.value :metadata.source = '21_day_threshold' force_accepted
  from
    scale_crawler.PUBLIC.CUSTOMERCATALOGS,
    lateral flatten(input => ACCEPTANCE_HISTORY) historyRow
  where
    customer = 'flamingo'
    -- and updatedAt <= '{{when}}'
),
last_acceptance_update as (
  select
    max(updatedAt) updatedAt,
    catalog_id
  from
    acceptance_updates
  group by
    catalog_id
),
acceptance as (
  select
    acceptance_updates.*
  from
    acceptance_updates
    join last_acceptance_update on acceptance_updates.catalog_id = last_acceptance_update.catalog_id
    and last_acceptance_update.updatedAt = acceptance_updates.updatedAt
),
-- cessation
cessation_updates as (
  select
    CATALOG_ID,
    historyRow.value :value value,
    historyRow.value :updatedAt updatedAt
  from
    scale_crawler.PUBLIC.CUSTOMERCATALOGS,
    lateral flatten(input => CESSATION_HISTORY) historyRow
  where
    customer = 'flamingo'
    -- and updatedAt <= '{{when}}'
),
last_cessation_update as (
  select
    max(updatedAt) updatedAt,
    catalog_id
  from
    cessation_updates
  group by
    catalog_id
),
cessation as (
  select
    cessation_updates.*
  from
    cessation_updates
    join last_cessation_update on cessation_updates.catalog_id = last_cessation_update.catalog_id
    and last_cessation_update.updatedAt = cessation_updates.updatedAt
),
--combine data
all_data as (
    select
        cc.catalog_id,
        cc.brands,
        COALESCE(acceptance.value, false) acceptance_value,
        COALESCE(acceptance.force_accepted, false) acceptance_force_accepted,
        COALESCE(cessation.value, false) cessation_value,
        acceptance.updatedAt acceptance_updatedAt,
        cessation.updatedAt cessation_updatedAt
    from
        CUSTOMERCATALOGS cc 
        left join submission_level on cc.catalog_id = submission_level.catalog_id
        left join extractor_complexity on cc.catalog_id = extractor_complexity.catalog_id
        left join sizechart_complexity on cc.catalog_id = sizechart_complexity.catalog_id
        left join acceptance on cc.catalog_id = acceptance.catalog_id
        left join cessation on cc.catalog_id = cessation.catalog_id
    where
        cc.customer = 'flamingo'
)
select * from all_data
'''
cs.execute(sql)
statusdf = cs.fetch_pandas_all()

In [6]:
tdf = pd.merge(fivetrandf, statusdf, left_on = 'CATALOGID', right_on = 'CATALOG_ID', how = 'left', )

In [7]:
tdf.drop(tdf[tdf['CESSATION_VALUE'] == 'true'].index, inplace = True)
tdf.drop(tdf[tdf['ACCEPTANCE_VALUE'] == 'true'].index, inplace = True)
tdf.drop(tdf[tdf['ACCEPTANCE_FORCE_ACCEPTED'] == 'true'].index, inplace = True)

In [8]:
print("\n Number of PVIDs in fivetran =",len(fivetrandf),"\n","Number of PVIDs in tool =",len(tooldf),"\n","Number of Non-Cessated and Not-Accepted PVIDs =",len(tdf))


 Number of PVIDs in fivetran = 519555 
 Number of PVIDs in tool = 174155 
 Number of Non-Cessated and Not-Accepted PVIDs = 48796


In [9]:
fdf = pd.merge(tdf, tooldf, left_on = 'PRODUCT_ID', right_on = 'PRODUCT_ID', how = 'left', indicator=True)

In [10]:
fdf['_merge'].unique()

['left_only', 'both']
Categories (3, object): ['left_only', 'right_only', 'both']

In [12]:
fdf.drop(fdf[fdf['_merge'] == 'both'].index, inplace = True)

In [16]:
fdf.to_clipboard()