In [1]:
import pandas as pd
import re
import datetime
import numpy as np
import requests
from pandas.io.json import json_normalize
import json
import os
import os.path
import snowflake.connector
import boto3
from __future__ import print_function
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import random

In [2]:
# S3
BUCKET = 'scale-crawler-enriched-csv-exports-us-west-2'
s3 = boto3.client('s3')
session = boto3.Session()

# Google Sheets
SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
SPREADSHEET_ID = '1ycZEbsg7hEb_kKAYmIg6eK0hBIl4fvhK0FDan1f5UkE'
RANGE_NAME = 'Sheet9!A:M'
PATH_TO_SECRETS_FILE = 'credentials.json'
creds = None

In [3]:
#Snowflake
con = snowflake.connector.connect(user='vishal.kumar@scale.com',
                                 account='pxa65918',
                                 authenticator='externalbrowser',
                                 warehouse='COMPUTE_WH',
                                 database='SCALE_CRAWLER',
                                 role='GENERAL_RO')
cs = con.cursor()

Initiating login request with your identity provider. A browser window should have opened for you to complete the login. If you can't see it, check existing browser windows, or your OS settings. Press CTRL+C to abort and try again...


In [4]:
def uploadData(data,filename):
    s3.put_object(
        ACL='bucket-owner-full-control',
        Body=data.encode('utf-8'),
        Bucket=RESULTS_BUCKET,
        Key=f'flamingo_qa_potential_issues/{filename}')

In [5]:
## Pull data from Google Sheet https://docs.google.com/spreadsheets/d/1UCIE1P6PbI9odzxFUjNF44s-SaPePbDUnHQKqxa9XpM/edit#gid=774020952
def pullFromGS(SCOPES,PATH_TO_SECRETS_FILE,creds,SPREADSHEET_ID,RANGE_NAME):
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)

    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(PATH_TO_SECRETS_FILE, SCOPES)
            creds = flow.run_local_server(port=0)
        with open('token.json', 'w') as token:
            token.write(creds.to_json())

    try:
        service = build('sheets', 'v4', credentials=creds)

        sheet = service.spreadsheets()
        result = sheet.values().get(spreadsheetId=SPREADSHEET_ID,range=RANGE_NAME).execute()
        values = result.get('values', [])

        if not values:
            print('No data found.')
        
    except HttpError as err:
        print(err)
        
    df = pd.DataFrame(values[1:],columns = values[0])    
    return df

In [6]:
#All CQR audit data from Spotter Audits including S3 csv path which contains each PVID and its attributes with results
def getCQRResults(min_date,max_date):
    
    sql = f'''
    with cqr_result as (
      with audits as (
        select
          sa.CATALOG_ID,
          sa.domain,
          sa.BODY_S3_KEY,
          sa._id audit_id,
          date(sa.completed_at) audit_time,
          sa.grade :"scores" :"descriptionScore" :"score" as CQR_DESCRIPTION_SCORE,
          sa.grade :"scores" :"titleScore" :"score" as Title,
          sa.result
        from
          PUBLIC.SPOTTERAUDITS sa
          inner join (
            select
              max(completed_at) as max_time,
              CATALOG_ID
            from
              PUBLIC.SPOTTERAUDITS
            group by
              CATALOG_ID
          ) as cqr_max on cqr_max.CATALOG_ID = sa.CATALOG_ID
          and cqr_max.max_time = sa.completed_at
        where
          AUDIT_TYPE = 'Attributes'
          and sa.COMPLETED_AT is not null
          and sa.grade :"scores" :"descriptionScore" :"score" is not null
          and date(sa.completed_at) >= '{min_date}'
          and date(sa.completed_at) <= '{max_date}'
      )
      select
        au.CATALOG_ID,
        au.domain,
        au.audit_id,
        au.audit_time CQR_AUDIT_DATE,
        au.BODY_S3_KEY,
        a.key variant_id,
        b.key attribute,
        b.value :result :: string attribute_grade,
        b.value :reason :: string reason,
        b.value :comment :: string comment  
      from
        audits au,
        lateral flatten (input => au.result) a,
        lateral flatten (input => a.value) b
      where
        b.key in ('description')
        and b.value :result = 'Incorrect'
    )
    select 
    c.*,
    pv.pvid,
    pv.scraped_attributes:link::string link
    from cqr_result c
    join productvariants pv on pv.unique_id = c.variant_id
    '''
    print('Getting CQR incorrect results from Snowflake!')
    cs.execute(sql)
    df = cs.fetch_pandas_all()
    print('Success! Got CQR incorrect results from Snowflake. Number of rows:',len(df),'\n-------------')
    return df

In [7]:
#All CQR audit data from Spotter Audits including S3 csv path which contains each PVID and its attributes with results
def getCQRCount(min_date,max_date):
    
    sql = f'''
    with cqr_result as (
      with audits as (
        select
          sa.CATALOG_ID,
          sa.domain,
          sa.BODY_S3_KEY,
          sa._id audit_id,
          date(sa.completed_at) audit_time,
          sa.grade :"scores" :"descriptionScore" :"score" as CQR_DESCRIPTION_SCORE,
          sa.grade :"scores" :"titleScore" :"score" as Title,
          sa.result
        from
          PUBLIC.SPOTTERAUDITS sa
          inner join (
            select
              max(completed_at) as max_time,
              CATALOG_ID
            from
              PUBLIC.SPOTTERAUDITS
            group by
              CATALOG_ID
          ) as cqr_max on cqr_max.CATALOG_ID = sa.CATALOG_ID
          and cqr_max.max_time = sa.completed_at
        where
          AUDIT_TYPE = 'Attributes'
          and sa.COMPLETED_AT is not null
          and sa.grade :"scores" :"descriptionScore" :"score" is not null
          and date(sa.completed_at) >= '{min_date}'
          and date(sa.completed_at) <= '{max_date}'
      )
      select
        au.CATALOG_ID,
        au.domain,
        au.audit_id,
        au.audit_time CQR_AUDIT_DATE,
        au.BODY_S3_KEY,
        a.key variant_id,
        b.key attribute,
        b.value :result :: string attribute_grade,
        b.value :reason :: string reason,
        b.value :comment :: string comment  
      from
        audits au,
        lateral flatten (input => au.result) a,
        lateral flatten (input => a.value) b
      where
        b.key in ('description')
    )
    select 
    c.*,
    pv.pvid,
    pv.scraped_attributes:link::string link
    from cqr_result c
    join productvariants pv on pv.unique_id = c.variant_id
    '''
    print('Getting all CQR data from Snowflake!')
    cs.execute(sql)
    df = cs.fetch_pandas_all()
    print('Success! Got all CQR data from Snowflake. Number of rows:',len(df),'\n-------------')
    return df

In [8]:
#take the column S3 csv url and consolidate all audits csv data into one dataframe, return this df
def getCQRInputs(cqr_results):
    
    df = pd.DataFrame() 
    print('Getting CQR input data from S3!')
    for s3_file in cqr_results['BODY_S3_KEY'].unique().tolist():
        print('Pulling from', s3_file)
        response = s3.get_object(Bucket = BUCKET, Key = s3_file)
        tmp = pd.read_csv(response.get("Body"))
        df = pd.concat([df,tmp])
    print('Success! Got CQR input data from S3. Number of rows:', len(df),'\n-------------')
    return df

In [9]:
#merge audit data with PVID attributes data and flags
def mergeCQRData(cqr_results, cqr_inputs):
    if len(cqr_results) == 0 or len(cqr_inputs) == 0: 
        print('ERROR: Not enough information to complete')
        df = pd.DataFrame()
    else:
        print('Merging data!')
        df = cqr_results.merge(cqr_inputs[['pvid','description','link']], left_on = 'PVID', right_on = 'pvid')
        df = df.fillna('').rename(columns = {'description':'POST_PROCESSED_DESCRIPTION','COMMENT':'CORRECT_DESCRIPTION'})

        df = df.sort_values(['POST_PROCESSED_DESCRIPTION'])
        df = df.loc[(df['POST_PROCESSED_DESCRIPTION'] != '') & (df['CORRECT_DESCRIPTION'] != '')] 
        print('Success! Merged data. Number of rows:', len(df),'\n-------------')
    return df

In [10]:
#Get QA events data - post processing task logs by QA, by PVID
def getPPQAData(relevant_pvids):
    pvids = "('" + "','".join(relevant_pvids) + "')"
#     print(pvids)
    sql_descs = f'''
    select
      user_email,
      _ID,
      metadata :pvids description_id,
      b.value :: string pvid,
      CREATED_AT variant_pped_at,
      metadata: auditLevel :: string audit_level,
      metadata: fieldCurrent :: string QA_DESCRIPTION
    from
      PUBLIC.QAEVENTS,
      lateral flatten(input => metadata :pvids) b
    where
      audit_level != 'Other'
      and METADATA :action in ('Save', 'SwitchItem')
      and metadata: fieldCurrent is not Null
      and pvid in {pvids}
    '''

    sql_rules = f'''
    select
      user_email,
      metadata :pvids description_id,
      b.value :: string pvid,
      CREATED_AT variant_pped_at,
      metadata: auditLevel :: string audit_level,
      metadata: flagComment :: string flagtext,
      metadata: ruleCreated :: string ruleCreated
    from
      PUBLIC.QAEVENTS,
      lateral flatten(input => metadata :pvids) b
    where
      audit_level != 'Other'
      and METADATA :action in ('CreateRule')
      and metadata: flagComment is not Null
      and pvid in {pvids}
    '''
    

    
    print('Getting descriptions data from Snowflake!')
    cs.execute(sql_descs)
    pp_desc_data = cs.fetch_pandas_all()
    print('Success! Got descriptions data from Snowflake. Number of rows:',len(pp_desc_data))    
    
    print('Getting rules data from Snowflake!')
    cs.execute(sql_rules)
    pp_rules_data = cs.fetch_pandas_all()
    print('Success! Got rules data from Snowflake. Number of rows:',len(pp_rules_data),'\n-------------')    
    
    return pp_desc_data, pp_rules_data

In [11]:
#Combine CQR audit data with PP QA Events and find 'extra' or 'missing' text in the description - FOR SPEED AUDITS
def generateSpeedAuditErrors(cqr_data, pp_desc_data):
    cols = ['CQR_AUDIT_DATE', 'USER_EMAIL', 'type', 'AUDIT_LEVEL',
                    'DOMAIN', 'description_PPed_at', 'sample_pvid',
                    'sample_link','CORRECT_DESCRIPTION', 'QA_DESCRIPTION',
                   'Extra text (not removed by QA)',
                   'Missing text (incorrectly removed by QA)','outcome']
        
    if len(cqr_data) == 0 or len(pp_desc_data) == 0: 
        print('ERROR: Not enough information to complete')
        dff = pd.DataFrame(columns = cols)
    else: 
        print('Generating Speed Audit errors!')
        df = cqr_data.merge(pp_desc_data, on = 'PVID')
        df = df.rename(columns = {'COMMENT':'CORRECT_DESCRIPTION'})
        df['clean_final_desc'] = df.apply(lambda x: re.sub('\\\\n|\n| ','',x['CORRECT_DESCRIPTION']),axis=1)
        df['clean_fieldcurrent'] = df.apply(lambda x: re.sub('\\\\n|\n| ','',x['QA_DESCRIPTION']),axis=1)
        df['is_correct_desc'] = df['clean_final_desc'] == df['clean_fieldcurrent']
        df = df.drop_duplicates() # .loc[df['is_correct_desc'] == False]
        if len(df) ==0:
            return df
        else:
            tmp_cols = ['CQR_AUDIT_DATE',
                'USER_EMAIL',
                'AUDIT_LEVEL',
                'DOMAIN',
                'CORRECT_DESCRIPTION',
                'QA_DESCRIPTION','is_correct_desc']

            dff = df.groupby(tmp_cols)['VARIANT_PPED_AT','PVID','LINK'].min()                .reset_index()                .rename(columns = {'VARIANT_PPED_AT':'description_PPed_at','PVID':'sample_pvid','LINK':'sample_link'})
            dff['Extra text (not removed by QA)'] = dff.apply(lambda x: np.setdiff1d([i.strip('. ').strip('! ').strip('? ').lower() for i in re.split('\. |\n|\! |\? ', x['QA_DESCRIPTION']) if i != ''],[i.strip('. ').strip('! ').strip('? ').lower() for i in re.split('\. |\n|\! |\? ', x['CORRECT_DESCRIPTION']) if i != '']), axis = 1)    
            dff['Missing text (incorrectly removed by QA)'] = dff.apply(lambda x: np.setdiff1d([i.strip('. ').strip('! ').strip('? ').lower() for i in re.split('\. |\n|\! |\? ', x['CORRECT_DESCRIPTION']) if i != ''],[i.strip('. ').strip('! ').strip('? ').lower() for i in re.split('\. |\n|\! |\? ', x['QA_DESCRIPTION']) if i != '']), axis = 1)
            dff['type'] = 'Speed Audit'
            dff['outcome'] = dff.apply(lambda x: 'incorrect speed audit' if x['is_correct_desc'] == False else 'correct speed audit', axis = 1)
            dfg = df.groupby(['USER_EMAIL'])['PVID'].nunique() 
            print('Success! Generated Speed Audit Errors\n-------------')     
        return dfg, dff.loc[:,cols]

In [21]:
#Get total sample of CQR Audits
def generateAuditCountQA(cqr_count, pp_desc_data):
    cols = ['CQR_AUDIT_DATE', 'USER_EMAIL', 'type', 'AUDIT_LEVEL',
                    'DOMAIN', 'description_PPed_at', 'sample_pvid',
                    'sample_link','CORRECT_DESCRIPTION', 'QA_DESCRIPTION',
                   'Extra text (not removed by QA)',
                   'Missing text (incorrectly removed by QA)','outcome']
        
    if len(cqr_count) == 0 or len(pp_desc_data) == 0: 
        print('ERROR: Not enough information to complete')
        dff = pd.DataFrame(columns = cols)
    else: 
        print('Generating QA Sample Size Count')
        df = cqr_count.merge(pp_desc_data, how='left', on = 'fb_product_id')
        df = df.rename(columns = {'COMMENT':'CORRECT_DESCRIPTION'})
        dff = df.groupby(['USER_EMAIL'])['fb_product_id'].nunique() 
        print('Success! QA Sample Size Count')     
        return dff

In [13]:
##Combine CQR audit data with PP QA Events and find 'extra' or 'missing' text in the description - FOR FLAG AUDITS
def generateFlagAuditErrors(full_cqr_data, pp_rules_data):
    cols = ['CQR_AUDIT_DATE', 'USER_EMAIL','type','AUDIT_LEVEL',
                'DOMAIN', 'description_PPed_at', 'sample_pvid',
                'sample_link','FLAGTEXT', 'RULECREATED',
               'Extra text (not removed by QA)',
               'Missing text (incorrectly removed by QA)','outcome']
    if len(full_cqr_data) == 0 or len(pp_rules_data) == 0: 
        print('ERROR: Not enough information to complete')
        dff = pd.DataFrame(columns = cols)
    else: 
        print('Generating Flag Audit errors!')
        df = full_cqr_data.merge(pp_rules_data, on = 'PVID')
        df = df.rename(columns = {'COMMENT':'CORRECT_DESCRIPTION'})
        cols = ['CQR_AUDIT_DATE',
            'USER_EMAIL',
            'AUDIT_LEVEL',
            'DOMAIN',
            'POST_PROCESSED_DESCRIPTION',
            'CORRECT_DESCRIPTION',
               'FLAGTEXT','RULECREATED']
        dff = df.groupby(cols)['VARIANT_PPED_AT','PVID','LINK'].min()            .reset_index()            .rename(columns = {'VARIANT_PPED_AT':'description_PPed_at','PVID':'sample_pvid','LINK':'sample_link'})
        dff['Extra text (not removed by QA)'] = dff.apply(lambda x: np.setdiff1d([i.strip('. ') for i in re.split('\. |\n|\! |\? ', x['POST_PROCESSED_DESCRIPTION']) if i != ''],[i.strip('. ') for i in re.split('\. |\n|\! |\? ', x['CORRECT_DESCRIPTION']) if i != '']), axis = 1)    
        dff['Missing text (incorrectly removed by QA)'] = dff.apply(lambda x: np.setdiff1d([i.strip('. ') for i in re.split('\. |\n|\! |\? ', x['CORRECT_DESCRIPTION']) if i != ''],[i.strip('. ') for i in re.split('\. |\n|\! |\? ', x['POST_PROCESSED_DESCRIPTION']) if i != '']), axis = 1)

        dff['bad_removal'] = dff.apply(lambda x: x['RULECREATED'] == 'true' and re.sub("\.|\'|\,",'',x['FLAGTEXT'].strip().lower()) in re.sub("\.|\'|\,",'',str(x['Missing text (incorrectly removed by QA)']).strip().lower()),axis = 1)
        dff['bad_inclusion'] = dff.apply(lambda x:  x['RULECREATED'] == 'false' and re.sub("\.|\'|\,",'',x['FLAGTEXT'].strip().lower()) in re.sub("\.|\'|\,",'',str(x['Extra text (not removed by QA)']).strip().lower()),axis = 1)
        dff['outcome'] = dff.apply(lambda x: 'bad flag removal' if x['bad_removal'] == True else ('bad flag inclusion' if x['bad_inclusion'] == True else 'ok'), axis = 1)
        dff['type'] = 'Flag Audit'

        print('Success! Generated Flag Audit Errors\n-------------')    
    return dff.loc[:,cols] #dff['outcome'] != 'ok',


In [14]:
#Generate final error report
def completeErrorReport(speed_audit_errors,flag_audit_errors):
    df = pd.concat([speed_audit_errors,flag_audit_errors])[['CQR_AUDIT_DATE',
    'USER_EMAIL',
    'type',
    'AUDIT_LEVEL',
    'DOMAIN',
    'description_PPed_at',
    'sample_pvid',
    'sample_link',
    'CORRECT_DESCRIPTION',
    'QA_DESCRIPTION',
    'FLAGTEXT',
    'RULECREATED',
    'Extra text (not removed by QA)',
    'Missing text (incorrectly removed by QA)',
    'outcome']]
    df = df.sort_values(['DOMAIN','USER_EMAIL'])
    
    print(df.shape)
    
    df['CQR_AUDIT_DATE'] = pd.to_datetime(df['CQR_AUDIT_DATE'],utc=True)
    df['description_PPed_at'] = pd.to_datetime(df['description_PPed_at'],utc=True)

########## Change variable - number of days #############
#    df = df.loc[abs((df['CQR_AUDIT_DATE'] - df['description_PPed_at']).dt.days) <= 21] ##default -  only include work done in past week
    
#    df.loc[df['outcome'].isin(['incorrect speed audit','bad flag removal'])].to_clipboard(index = False)
    df.to_clipboard(index = False)
    print('Error Report created!')
    return df

In [22]:
date_in = '09/18/2022'
# date_out = date_in
date_out = '09/20/2022'

print(f'ERROR LOGS {date_in} to {date_out}\n')
cqr_results = getCQRResults(date_in,date_out)
cqr_inputs = getCQRInputs(cqr_results)
full_cqr_data = mergeCQRData(cqr_results, cqr_inputs)

pp_desc_data, pp_rules_data = getPPQAData(cqr_results['PVID'].unique().tolist())
error_count,speed_audit_errors = generateSpeedAuditErrors(cqr_results, pp_desc_data)

flag_audit_errors = generateFlagAuditErrors(full_cqr_data, pp_rules_data)
df = completeErrorReport(speed_audit_errors,flag_audit_errors)

tcqr_count = getCQRCount(date_in,date_out)
cqr_count = getCQRInputs(tcqr_count)
audit_count = generateAuditCountQA(cqr_count, pp_desc_data)
# In[6]:


#df.loc[df['outcome'].isin(['incorrect speed audit','bad flag removal'])].to_clipboard(index = False)

ERROR LOGS 09/18/2022 to 09/20/2022

Getting CQR incorrect results from Snowflake!
Success! Got CQR incorrect results from Snowflake. Number of rows: 431 
-------------
Getting CQR input data from S3!
Pulling from www.rollbicycles.com/partial_www.rollbicycles.com_0919_00_26:12:26:18.csv
Pulling from m.nadula.com/m.nadula.com_0913_02_07:02:07:32.csv
Pulling from www.ourtruegod.com/partial_www.ourtruegod.com_0916_04_30:04:30:34.csv
Pulling from ansonbelt.com/ansonbelt.com_0915_04_19:04:19:24.csv
Pulling from branchbasics.com/branchbasics.com_0915_05_09:05:09:39.csv
Pulling from nagijewelers.com/nagijewelers.com_0914_00_38:12:38:33.csv
Pulling from jaeleacosmetics.com/partial_jaeleacosmetics.com_0919_06_27:06:27:53.csv
Pulling from www.tula.com/www.tula.com_0916_02_01:02:01:29.csv
Pulling from shop.wisdomofthewombonline.com/shop.wisdomofthewombonline.com_0915_05_08:05:08:39.csv
Pulling from dailysale.com/partial_dailysale.com_0914_00_32:12:32:28.csv
Pulling from littleseedfarm.com/partial

  dff = df.groupby(tmp_cols)['VARIANT_PPED_AT','PVID','LINK'].min()                .reset_index()                .rename(columns = {'VARIANT_PPED_AT':'description_PPed_at','PVID':'sample_pvid','LINK':'sample_link'})
  dff = df.groupby(cols)['VARIANT_PPED_AT','PVID','LINK'].min()            .reset_index()            .rename(columns = {'VARIANT_PPED_AT':'description_PPed_at','PVID':'sample_pvid','LINK':'sample_link'})


Success! Generated Speed Audit Errors
-------------
Generating Flag Audit errors!
Success! Generated Flag Audit Errors
-------------
(412, 15)
Error Report created!
Getting all CQR data from Snowflake!
Success! Got all CQR data from Snowflake. Number of rows: 7283 
-------------
Getting CQR input data from S3!
Pulling from tkees.com/partial_tkees.com_0919_00_21:12:21:55.csv
Pulling from bluechipteam.com/partial_bluechipteam.com_0919_00_33:12:33:11.csv
Pulling from ballermerch.com/partial_ballermerch.com_0916_00_25:12:25:34.csv
Pulling from shop.barnowl.tech/partial_shop.barnowl.tech_0916_00_36:12:36:57.csv
Pulling from gumps.com/partial_gumps.com_0919_02_26:02:26:09.csv
Pulling from workthemetal.com/partial_workthemetal.com_0919_21_44:09:44:49.csv
Pulling from rkmerch.com/rkmerch.com_0916_01_56:01:56:04.csv
Pulling from alltherestaurants.com/partial_alltherestaurants.com_0919_21_57:09:57:33.csv
Pulling from strawberryavocados.com/partial_strawberryavocados.com_0915_04_59:04:59:20.csv
P

KeyError: 'fb_product_id'

In [None]:
ac = audit_count.to_frame()
ac.to_clipboard(index = False)

In [24]:
tcqr_count

Unnamed: 0,CATALOG_ID,DOMAIN,AUDIT_ID,CQR_AUDIT_DATE,BODY_S3_KEY,VARIANT_ID,ATTRIBUTE,ATTRIBUTE_GRADE,REASON,COMMENT,PVID,LINK
0,2785268745061885,tkees.com,6327ef15bccd1c57b6372db3,2022-09-19,tkees.com/partial_tkees.com_0919_00_21:12:21:5...,18230650830921,description,Good,,,tkees.com!18230650830921,https://tkees.com/products/mini-neons?variant=...
1,538362037744988,bluechipteam.com,6328348106210efcabed5475,2022-09-19,bluechipteam.com/partial_bluechipteam.com_0919...,42018851487939,description,Good,,,bluechipteam.com!42018851487939,https://bluechipteam.com/products/monty-montgo...
2,292992665163720,ballermerch.com,63240cbc7b603dacf06ffa72,2022-09-19,ballermerch.com/partial_ballermerch.com_0916_0...,40679927054520,description,Good,,,ballermerch.com!40679927054520,https://ballermerch.com/products/cuffed-beanie...
3,290437888446813,shop.barnowl.tech,6327cc5acbf41d571c3e0a85,2022-09-20,shop.barnowl.tech/partial_shop.barnowl.tech_09...,39728642130002,description,Good,,,shop.barnowl.tech!39728642130002,https://shop.barnowl.tech/products/barn-owl-un...
4,911984675868483,gumps.com,6328715dc806030e28e72937,2022-09-20,gumps.com/partial_gumps.com_0919_02_26:02:26:0...,31788965986389,description,Good,,,gumps.com!31788965986389,https://gumps.com/products/necklace-ss-blue-to...
...,...,...,...,...,...,...,...,...,...,...,...,...
7278,255457501904489,roseboxnyc.com,6328f7060f35beb509c38ff9,2022-09-20,roseboxnyc.com/roseboxnyc.com_0919_19_29:07:29...,571,description,Good,,,www.oakrivercompany.com!571,https://www.oakrivercompany.com/store/p571/Buc...
7279,302351757209070,www.livieandluca.com,6319f0d7c36b0b4e47ac1d99,2022-09-19,www.livieandluca.com/www.livieandluca.com_0908...,39389877633095,description,Good,,,www.livieandluca.com!39389877633095,https://www.livieandluca.com/products/ali-boot...
7280,576304583071755,www.ryanchristianjewelry.com,63247b097b603dbf179024b9,2022-09-20,www.ryanchristianjewelry.com/partial_www.ryanc...,31300746608738,description,Good,,,ryanchristiandesigns.com!31300746608738,https://ryanchristiandesigns.com/products/ster...
7281,1111616945602561,alltherestaurants.com,63293c0346db09131c733816,2022-09-20,alltherestaurants.com/partial_alltherestaurant...,39277492011071,description,Good,,,alltherestaurants.com!39277492011071,https://alltherestaurants.com/products/hop-kee...


In [17]:
error_count

USER_EMAIL
abegael.intertas@teleworkph-mails.com          1
adeline.santos@teleworkph-mails.com            1
alvin.dagdagan@teleworkph-mails.com            3
christian.mitu@teleworkph-mails.com            2
danilo.gatuz@teleworkph-mails.com             37
earlson.miquiabas@contractors.scale.com       24
gabriela.almaraz@contractors.scale.com        26
german.toledo@contractors.scale.com           63
jaspher.abayon@teleworkph-mails.com            5
jericlopez.lopez@teleworkph-mails.com          1
johntristan.faustino@teleworkph-mails.com      1
jose.bangay@contractors.scale.com             22
karenann.astorga@teleworkph-mails.com          5
karla.nunez@contractors.scale.com              6
kent.mozo@contractors.scale.com                6
kyla.mananghaya@teleworkph-mails.com           7
liezel.mangulabnan@teleworkph-mails.com        1
lucia.ledesma@contractors.scale.com            4
lyndon.tojeno@contractors.scale.com            9
marco.escaroz@contractors.scale.com           11
mario.fri