# Data Preprocessing
This notebook will preprocess searchs, product pages, the our brands API, best sellers, and create our Amazon Database.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import re
import glob
from collections import Counter
import json
from multiprocessing import Pool

import parsers as P
from tqdm import tqdm
from lxml import html
import pandas as pd

from utils import value_counts

In [3]:
# inputs
pattern_searches = '../data/input/search-selenium/*/*/*/2021/01/21/webpage_search.html'
pattern_our_brands_api = '../data/input/search-private-label/*/*/*/2021/*/*/response__*.json'
pattern_our_brands_html = '../data/input/search-selenium-our-brands-filter_/*/*/*/2021/*/*/webpage_search__*.html'
pattern_our_brands_search = '../data/input/search-private-label/*/*/*/2021/*/*/webpage_ourbrands.html'
pattern_best_sellers = '../data/input/best_sellers/*/*/*/*/page_*/2021/*/*/*.html'
pattern_products = '../data/input/selenium-products/*/*/*/2021/02/*/webpage_product.html'

# outputs
data_dir = '../data/intermediary'
fn_brands_api = f'{data_dir}/our_brands_api.csv.gz'
fn_brand_search = f'{data_dir}/our_brands_api.csv.gz'
fn_brands_filter = f'{data_dir}/our_brands_filter.csv.gz'
fn_search = f'{data_dir}/searches.csv.gz'
fn_best_sellers = f'{data_dir}/best_sellers.csv.gz'
fn_queries = 'data/input/search_queries/combined_queries_with_source.csv'
fn_products = f'{data_dir}/products.csv.gz'
fn_products_plus = f'{data_dir}/products_with_meta.csv.gz'
fn_amazon = f'{data_dir}/amazon_private_label.csv.gz'
fn_training_set = f'{data_dir}/training_set.csv.gz'
# fn_amazon_revised = f'{data_dir}/amazon_private_label_corrected.csv.gz'
os.makedirs(data_dir , exist_ok=True)

## search

In [4]:
files_searches = glob.glob(pattern_searches)
len(files_searches)

12717

In [11]:
search_data = []
with Pool(processes=32) as pool:
    for record in tqdm(pool.imap_unordered(P.process_search_result, files_searches), 
                       total=len(files_searches)):
        search_data.extend(record)

df_search = pd.DataFrame(search_data)

100%|██████████| 12717/12717 [01:11<00:00, 178.39it/s]


In [16]:
df_search.product_type.value_counts()

regular_placement            619108
sponsored_banner              22923
editorial_recs_carousel       22797
featured_brands_carousel       8546
regular_placement__missed      4907
misc_carousel                  1859
misc_sponsored_carousel        1284
Name: product_type, dtype: int64

In [30]:
df_search[df_search.product_type == 'sponsored_banner'].is_sponsored.value_counts()

False    22923
Name: is_sponsored, dtype: int64

In [29]:
df_search[df_search.product_type == 'misc_carousel'].is_sponsored.value_counts()

False    3143
Name: is_sponsored, dtype: int64

In [17]:
# df_search.to_csv(fn_search, index=False, compression='gzip')

In [74]:
# df_search[(df_search.product_order == 1) &
#           (df_search.product_type.str.contains('regular_placement'))].search_term.nunique()

12541

In [72]:
df_search[df_search.product_type == 'regular_placement__missed'].iloc[0]

asin                                                        B0772ZFGTB
product_name                                                      None
stars                                                             None
reviews                                                             62
brand                                                             None
product_url                                                           
is_prime                                                         False
is_fresh                                                         False
is_sponsored                                                     False
is_featured_brand                                                False
is_amazons_choice                                                False
is_best_seller                                                   False
product_order                                                        1
search_term                             wvu mountaineers clothing kids
produc

In [4]:
if not os.path.exists(fn_search):
    search_data = []
    with Pool(processes=32) as pool:
        for record in tqdm(pool.imap_unordered(P.process_search_result, files_searches), 
                           total=len(files_searches)):
            search_data.extend(record)
            
    df_search = pd.DataFrame(search_data)
    df_search.to_csv(fn_search, index=False, compression='gzip')

else:
    df_search = pd.read_csv(fn_search)

In [113]:
df_search.groupby('product_type').search_term.nunique() / df_search.search_term.nunique()

product_type
editorial_recs_carousel      0.352660
featured_brands_carousel     0.125199
misc_carousel                0.047467
regular_placement            0.998168
regular_placement__missed    0.141048
sponsored_banner             0.686684
Name: search_term, dtype: float64

## our brands search

In [6]:
files_our_brands_search = glob.glob(pattern_our_brands_search)
len(files_our_brands_search)

795

In [7]:
if not os.path.exists(fn_brand_search):
    files_our_brands_search = glob.glob(pattern_our_brands_search)
    print(len(files_our_brands_search))

    brands_search_data = []
    with Pool(processes=32) as pool:
        for record in tqdm(pool.imap_unordered(P.process_search_result, 
                                               files_our_brands_search), 
                           total=len(files_our_brands_search)):
            brands_search_data.extend(record)

    df_brands_search = pd.DataFrame(brands_search_data)
    df_brands_search = df_brands_search[~df_brands_search.is_sponsored]
    df_brands_search.to_csv(fn_brand_search, index=False, compression='gzip')
    
else:
    df_brands_search = pd.read_csv(fn_brand_search, compression='gzip')

## our brands API

In [8]:
if not os.path.exists(fn_brands_api):
    files_our_brands_api = glob.glob(pattern_our_brands_api)
    print(len(files_our_brands_api))
    
    data_our_brands = []
    with Pool(processes=32) as pool:
        for record in tqdm(pool.imap_unordered(P.process_our_brands_api, 
                                               files_our_brands_api), 
                           total=len(files_our_brands_api)):
            data_our_brands.extend(record)

    df_brands_api = pd.DataFrame(data_our_brands)
    df_brands_api.drop_duplicates(subset=['asin', 'search_term'], inplace=True)
    df_brands_api.to_csv(fn_brands_api, index=False, compression='gzip')
    
else:
    df_brands_api = pd.read_csv(fn_brands_api, compression='gzip')

## our brands HTML

In [9]:
if not os.path.exists(fn_brands_filter):
    files_brands_filter = glob.glob(pattern_our_brands_html)
    print(len(files_brands_filter))
    
    brands_filter_data = []
    with Pool(processes=64) as pool:
        for record in tqdm(pool.imap_unordered(P.process_our_brands_filter, 
                                               files_brands_filter), 
                           total=len(files_brands_filter)):
            brands_filter_data.extend(record)
    df_brands_filter = pd.DataFrame(brands_filter_data)
    df_brands_filter.drop_duplicates(subset=['asin', 'search_term'], inplace=True)
    df_brands_filter.to_csv(fn_brands_filter, index=False, compression='gzip')
    
else:
    df_brands_filter = pd.read_csv(fn_brands_filter, compression='gzip')

In [None]:
# df_brands_filter = pd.DataFrame(brands_filter_data)
# df_brands_filter.drop_duplicates(subset=['asin', 'search_term'], inplace=True)
# df_brands_filter.to_csv(fn_brands_filter, index=False, compression='gzip')

## Best Sellers

In [10]:
if not os.path.exists(fn_best_sellers):
    files_best_sellers = glob.glob(pattern_best_sellers)
    print(len(files_best_sellers))
    
    best_sellers_data = []
    with Pool(processes=32) as pool:
        for record in tqdm(pool.imap_unordered(P.process_best_sellers, 
                                               files_best_sellers), 
                           total=len(files_best_sellers)):
            best_sellers_data.extend(record)
    df_best_sellers = pd.DataFrame(best_sellers_data)
    df_best_sellers.to_csv(fn_best_sellers, index=False, compression='gzip')

else:
    df_best_sellers = pd.read_csv(fn_best_sellers, compression='gzip')

## Amazon products database

In [55]:
df_best_seller_amazon = df_best_sellers[
    (df_best_sellers.path.str.contains('Amazon Devices')) &
    (~df_best_sellers.category.astype(str).str.contains('Kindle'))
]

In [56]:
s1 = df_brands_api[['asin', 'product_name', 'product_url']]
s1 = s1.assign(source="our brands API")

s2 = df_best_seller_amazon[['asin', 'product_name', 'product_url']]
s2 = s2.assign(source='best selling Amazon devices')

s3 =  df_brands_search[['asin', 'product_name', 'product_url']]
s3 = s3.assign(source='our brands searchbar')

s4 = df_brands_filter[['asin', 'product_name', 'product_url']]
s4 = s4.assign(source='our brands filtered search result')

In [57]:
df_amazon = s1.append(s2).append(s3).append(s4)
df_amazon.asin.nunique()

135652

In [58]:
amazon_missed = '|'.join([
    'amazon basics',
    'amazon essentials',
    'amazon exclusive',
    'amazon us exclusive',
    'amazon brand',
    'goodthreads',
    'solimo',
    'whole foods market',
    'amazon commercial',
    'amazon collection',
    'amazon fresh',
    'amazon elements',
    'amazonbasics',
    'pinzon by amazon',
    'simple joys by carter',
    'daily ritual',
    '365 everyday value',
    'lark & ro',
    'presto!'
])

In [59]:
s5 = df_search[
    (df_search.is_featured_brand) & 
    (df_search.product_type == 'regular_placement') &
    (~df_search.asin.isin(df_amazon.asin))
][['asin', 'product_name', 'product_url']]
s5 = s5.assign(source="featured from our brands tag")

s6 = df_search[
    (df_search.product_type == 'featured_brands_carousel') &
    (~df_search.asin.isin(df_amazon.asin))
][['asin', 'product_name', 'product_url']]
s6 = s6.assign(source="featured from our brands carousel")

s7 =  df_search[
    (df_search.product_name.str.contains(amazon_missed, case=False)) &
    (~df_search.asin.isin(df_amazon.asin))
][['asin', 'product_name', 'product_url']]
s7 = s7.assign(source="text search")

In [60]:
df_amazon = df_amazon.append(s5).append(s6).append(s7)

In [61]:
df_amazon.drop_duplicates(subset=['asin'], keep='first', inplace=True)
len(df_amazon)

137420

In [19]:
df_amazon.drop_duplicates(subset=['asin'], keep='first', inplace=True)
len(df_amazon)

137420

In [62]:
df_amazon['product_url'] = df_amazon.product_url.apply(lambda x: 'https://www.amazon.com' + x)

In [63]:
df_amazon.to_csv(fn_amazon, index=False, compression='gzip')

In [85]:
value_counts(df_amazon, col='source')

Unnamed: 0,count,percentage
our brands filtered search result,68671,0.499716
our brands API,66476,0.483743
text search,780,0.005676
featured from our brands carousel,684,0.004977
best selling Amazon devices,505,0.003675
featured from our brands tag,304,0.002212


## Create YAML file for extension...

In [21]:
s2 = s2.drop_duplicates(subset='asin')

In [92]:
with open('../data/output/asins.yaml', 'w') as f:
    f.write('---\n')
    for i, row in s2.iterrows():
        f.write(f"{row['asin']}: {row['product_name']}\n")

In [22]:
data = {row['asin']: row['product_name'] for i, row in s2.iterrows()}

In [24]:
to_add = {
    'B08CKHPP52' : "Introducing Ring Video Doorbell Wired – Convenient, essential features in a compact design, pair with Ring Chime to hear audio alerts in your home (existing doorbell wiring required) - 2021 release",
    'B0876GVJ3D': "Certified Refurbished Ring Video Doorbell – newest generation, 2020 release – 1080p HD video, improved motion detection, easy installation – Satin Nickel",
    'B08M125RNW': "Ring Video Doorbell Pro – Upgraded, with added security features and a sleek design (existing doorbell wiring required)",
    'B086Q54K53': "Introducing Ring Video Doorbell Pro 2 – Best-in-class with cutting-edge features (existing doorbell wiring required) – 2021 release",
    'B07WLP395R': "Ring Video Doorbell 3 Plus – enhanced wifi, improved motion detection, 4-second video previews, easy installation",
    'B08JNR77QY': "All-new Ring Video Doorbell 4 – improved 4-second color video previews plus easy installation, and enhanced wifi – 2021 release",
    'B0876PVDMC': "Certified Refurbished Ring Video Doorbell 3 – enhanced wifi, improved motion detection, easy installation",
    'B08SSDZ6R8': "Introducing Ring Video Doorbell Wired with Ring Chime",
    'B0849J7W5X': "Ring Video Doorbell 3 – enhanced wifi, improved motion detection, easy installation",
    'B0727XJQLD': "Ring Floodlight Camera Motion-Activated HD Security Cam Two-Way Talk and Siren Alarm, White",
    'B08P5499MZ': "Solar Charger for Ring Video Doorbell 2",
    'B08C1W5N87': "Fire TV Stick (3rd Gen) with Alexa Voice Remote (includes TV controls) | HD streaming device | 2021 release",
    'B08N5NQ69J': "Ring Video Doorbell – newest generation, 2020 release – 1080p HD video, improved motion detection, easy installation – Venetian Bronze",
    'B07PDHSLM6': "Echo Dot (3rd Gen) - Smart speaker with Alexa - Heather Gray",
    'B01MZEEFNX': "Amazon Smart Plug, works with Alexa – A Certified for Humans Device",
    'B084KYM1HH': "Certified Refurbished Echo Dot (4th Gen) | Smart speaker with Alexa | Glacier White",
    'B07XKF75B8': "Echo (4th Gen) | With premium sound, smart home hub, and Alexa | Glacier White",
    'B07Q6ZZFLS': "Ring Stick Up Cam Battery HD security camera with custom privacy controls, Simple setup, Works with Alexa - White",
    'B07FZ8S74R': "Echo Dot (3rd Gen) - Smart speaker with Alexa - Charcoal",
    'B0758L64L9': "Ring Spotlight Cam Battery HD Security Camera with Built Two-Way Talk and a Siren Alarm, White, Works with Alexa",
    'B07ZPMCW64': "Ring Alarm 8-piece kit (2nd Gen) – home security system with optional 24/7 professional monitoring – Works with Alexa",
    'B07Q9VBYV8': "Ring Indoor Cam, Compact Plug-In HD security camera with two-way talk, Works with Alexa - White",
    'B0781Z3FNX': "Ring Solar Panel White - Compatible with Ring Spotlight Cam Battery and Stick Up Cam Battery",
    'B084J4MZK8': "Echo Dot (newest generation - 2020 release) | Smart speaker with Alexa | Twilight Blue",
    'B07VHZ41L8': "Echo Show 10 (3rd Gen) | HD smart display with motion and Alexa | Charcoal",
    'B085M66LH1': "All-new Echo Dot (4th Gen) | Smart speaker with clock and Alexa | Twilight Blue",
    'B00X4WHP5E': "Amazon Echo - Black (1st Generation)",
    'B07SLHPDVZ': "Certified Refurbished Echo Show 8 -- HD smart display with Alexa – stay connected with video calling - Charcoal",
    'B084J4QQK1': "Echo Dot (4th Gen) Kids | Designed for kids, with parental controls | Tiger",
    'B08M1N852': "Ring A19 Smart LED Bulb, White, bundle with All-new Echo Show 10 (3rd Gen) - Charcoal"
}

In [25]:
data = {**data, **to_add}

In [26]:
import yaml

In [27]:
with open('../data/output/asins.yaml', 'w') as outfile:
    yaml.dump(data, outfile, default_flow_style=False)

## product pages

In [5]:
df_amazon = pd.read_csv(fn_amazon, compression='gzip')

In [6]:
# # dedupe and filter to only products in the search.
# df_product.drop_duplicates(subset='asin', keep='last', inplace=True)
# df_product = df_product[df_product.asin.isin(df_search.asin)]

# # create boolean column if the asin is an Amazon product
amazon_asin = set(df_amazon.asin.unique())
# df_product['is_amazon'] = df_product.asin.apply(lambda x: x in amazon_asin)

# df_product.to_csv(fn_products, index=False, compression='gzip')

In [7]:
# files_products = glob.glob(pattern_products)
# print(len(files_products))

# product_data = []
# with Pool(processes=64) as pool:
#     for record in tqdm(pool.imap_unordered(P.parse_product_page, files_products), 
#                        total=len(files_products)):
#         product_data.append(record)

# df_product = pd.DataFrame(product_data)

# # create some columns for ads
# df_product['n_ads'] = df_product.ads.apply(lambda x: len(x))
# df_product["asin"] = df_product.fn.apply(lambda x: x.split('/2021')[0].split('/')[-1])

# # dedupe and filter to only products in the search.
# df_product.drop_duplicates(subset='asin', keep='last', inplace=True)
# df_product = df_product[df_product.asin.isin(df_search.asin)]

# # create boolean column if the asin is an Amazon product
# amazon_asin = set(df_amazon.asin.unique())
# df_product['is_amazon'] = df_product.asin.apply(lambda x: x in amazon_asin)

# df_product.to_csv(fn_products, index=False, compression='gzip')


204855


100%|██████████| 204855/204855 [23:12<00:00, 147.15it/s]


In [30]:
if not os.path.exists(fn_products):
    files_products = glob.glob(pattern_products)
    print(len(files_products))
    
    product_data = []
    with Pool(processes=64) as pool:
        for record in tqdm(pool.imap_unordered(P.parse_product_page, files_products), 
                           total=len(files_products)):
            product_data.append(record)
            
    df_product = pd.DataFrame(product_data)
    
    # create some columns for ads
    df_product['n_ads'] = df_product.ads.apply(lambda x: len(x))
    df_product["asin"] = df_product.fn.apply(lambda x: x.split('/2021')[0].split('/')[-1])
    
    # dedupe and filter to only products in the search.
    df_product.drop_duplicates(subset='asin', keep='last', inplace=True)
    df_product = df_product[df_product.asin.isin(df_search.asin)]
    
    # create boolean column if the asin is an Amazon product
    amazon_asin = set(df_amazon.asin.unique())
    df_product['is_amazon'] = df_product.asin.apply(lambda x: x in amazon_asin)
    
    df_product.to_csv(fn_products, index=False, compression='gzip')
    
else:
    df_product = pd.read_csv(fn_products, compression='gzip')

In [31]:
df_product.sold_by.value_counts().head(20)

Amazon.com                 38649
AmazonFresh                 1481
Whole Foods Market          1045
Pharmapacks                  308
Amazon.com Services LLC      246
shein online store           165
Zappos                       150
Songmics Direct              141
VM Express                   138
SweatyRocks                  131
EPFamily Direct              129
BestChoiceproducts           124
JoyinDirect                  122
Just Love Fashion            119
MYBATTERYSUPPLIER            108
PajamaGram                   107
iServe                       105
Mr. Pen                      104
TheNewMall                    96
Baleaf Sports                 92
Name: sold_by, dtype: int64

In [129]:
# # these are Amazon products that are sold by non-Amazon sellers
# amazon_product_sellers = df_product[
#     (df_product.sold_by != df_product.shipped_by) & 
#     (df_product.is_amazon == True) &
#     (~df_product.sold_by.isnull()) &
#     (~df_product.asin.isin(df_product[df_product.is_amazon == False].asin))
# ].sold_by.unique()

In [130]:
# to_remove = ['Amazon.com Services', 'Amazon.com Servic',]
# non_exclusived_sellers = df_product[
#     (df_product.sold_by != df_product.shipped_by) & 
#     (df_product.is_amazon == True) &
#     (~df_product.sold_by.Aisnull()) &
#     (df_product.sold_by.isin(df_product[df_product.is_amazon == False].sold_by.unique()))
# ].sold_by.unique()

# non_exclusived_sellers = [_ for _ in non_exclusived_sellers if _ not in to_remove]

In [259]:
# to_check = df_product[df_product.sold_by.isin(non_exclusived_sellers)].merge(df_search)

In [263]:
# cols_to_check = [
#     'product_name', 'asin', 'shipped_by', 'sold_by', 'search_term', 'product_url', 'is_amazon'
# ]

In [267]:
# (to_check[cols_to_check].drop_duplicates(subset='asin')
#                         .sort_values(by='sold_by')
#  .to_csv('../data/temp/products_of_mixed_brands_2021_4_21.csv', index=False)
# )

In [133]:
# len(amazon_brands_and_exclusives)

389

In [34]:
def is_sold_by_amazon(row, col='sold_by'):
    amazon_sellers = ['zappos', 'whole foods', 'amazon']
    if row[col] == None:
        return None
    to_check = str(row[col]).lower()
    if any(seller in to_check for seller in amazon_sellers):
        return True
    elif row['product_by_amazon']:
        return True
#     elif row[col] in amazon_brands_and_exclusives:
#         return True
    else:
        return False
    
def is_shipped_by_amazon(row, col='shipped_by'):
    amazon_sellers = ['zappos', 'whole foods', 'amazon']
    if row[col] == None:
        return None
    to_check = str(row[col]).lower()
    if any(seller in to_check for seller in amazon_sellers):
        return True
    elif row['product_by_amazon']:
        return True
    else:
        return False

In [35]:
df_product["is_sold_by_amazon"] = df_product.apply(is_sold_by_amazon, axis=1)
df_product["is_shipped_by_amazon"] = df_product.apply(is_shipped_by_amazon, axis=1)

In [136]:
df_product.to_csv(fn_products_plus, index=False, compression='gzip')

In [38]:
df_product.sample(1200, random_state=303).is_amazon.value_counts()

False    1127
True       73
Name: is_amazon, dtype: int64

In [None]:
# amazon_sellers_who_are_amazon = [
#     'WaterdropDirect', # water filters
#     'Solid Decor', # pillow cases
#     'HUHETA-US', # masks
#     'icassio', # Laptop cases
#     'PureFitex', # sofa covers
#     'STORAGEGEAR US', # shelves
#     'CAXXA Direct', # organizers
#     'Romanroland', # phoen cases
#     'MOUNTUP', # monitor mounts
#     'Kid Nation_Knits', # apparel for kids
#     'XPCAM', # cameras
#     'vip-life shop', #beasuty accessories
#     'Inno Direct', # pillows
#     'Hiddekel', #picture frames
#     'Infinno Direct', #baby sheeeit not all products are Amazon
# ]

## Tranining set for regression

In [18]:
fn_seller_central = '../data/input/seller_central/All_Q4_2020.csv'
df_seller_central = pd.read_csv(fn_seller_central, skiprows=1, index_col=0)
top_clicked = list(set(
    df_seller_central['#1 Clicked ASIN'].unique().tolist() + 
    df_seller_central['#2 Clicked ASIN'].unique().tolist() +
    df_seller_central['#3 Clicked ASIN'].unique().tolist()))

In [5]:
# df_search = pd.read_csv(fn_search)

In [19]:
df_amazon = pd.read_csv(fn_amazon)

In [20]:
amazon_searches = df_search[
    (df_search.product_type == 'regular_placement') &
    (df_search.asin.isin(df_amazon.asin)) 
].search_term.unique()

In [33]:
df_product = pd.read_csv(fn_products_plus)

In [24]:
qualified_searches = df_search[
    (df_search.product_order >= 20) &
    (df_search.search_term.isin(amazon_searches))
].search_term.unique()

In [42]:
training_set = df_search[
    (df_search.product_type == 'regular_placement') & 
    (df_search.search_term.isin(qualified_searches))
]

In [43]:
training_set['top_clicked'] = training_set['asin'].isin(top_clicked)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  training_set['top_clicked'] = training_set['asin'].isin(top_clicked)


In [44]:
training_set = training_set.merge(df_product[[
    'asin','is_amazon', 'is_sold_by_amazon', 'is_shipped_by_amazon', 
    'has_third_party_sellers'
]], how='left')

In [40]:
training_set['price'] = training_set.price.apply(lambda x: x.split(' ')[0].lstrip("$") if x else None)

In [45]:
training_set.to_csv(fn_training_set, index=False, compression='gzip')

## Limitations

In [270]:
featured_asin = df_search[df_search.is_featured_brand].asin.unique().tolist()

In [273]:
df_brands_api[df_brands_api.asin.isin(featured_asin)].asin.nunique() / len(featured_asin)

0.7027989821882952

In [140]:
df_search[df_search.search_term == 'tylenol']

Unnamed: 0,asin,product_name,stars,reviews,brand,product_url,is_prime,is_fresh,is_sponsored,is_featured_brand,is_amazons_choice,is_best_seller,product_order,search_term,product_type,filename
368670,B088VZQPTG,,,4.8 out of 5 stars. 797,,,False,False,False,False,False,False,-1,tylenol,sponsored_banner,../data/input/search-selenium/t/ty/tylenol/202...
368671,B01HI7WP0U,,,4.8 out of 5 stars. 19794,,,False,False,False,False,False,False,-1,tylenol,sponsored_banner,../data/input/search-selenium/t/ty/tylenol/202...
368672,B0077VYSYY,,,4.8 out of 5 stars. 11548,,,False,False,False,False,False,False,-1,tylenol,sponsored_banner,../data/input/search-selenium/t/ty/tylenol/202...
368673,B0077VYSYY,Tylenol 8 Hour Arthritis Pain Tablets with Ace...,4.8,11548,,/gp/slredirect/picassoRedirect.html/ref=pa_sp_...,True,False,True,False,False,False,1,tylenol,regular_placement,../data/input/search-selenium/t/ty/tylenol/202...
368674,B088VZQPTG,Tylenol Extra Strength Dissolve Packs with Ace...,4.8,797,,/gp/slredirect/picassoRedirect.html/ref=pa_sp_...,True,False,True,False,False,False,2,tylenol,regular_placement,../data/input/search-selenium/t/ty/tylenol/202...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
368728,B072J2ZLXM,Children's Tylenol Grape Splash Flavored Liqui...,4.8,312,,/Childrens-Tylenol-Splash-Flavored-Liquid/dp/B...,True,False,False,False,False,False,56,tylenol,regular_placement,../data/input/search-selenium/t/ty/tylenol/202...
368729,B01IAI0BXE,Tylenol Xs Caplets 100 Size 100s Tylenol 500 M...,4.5,29,,/Tylenol-Caplets-Milligram-Strength-Reliever/d...,False,False,False,False,False,False,57,tylenol,regular_placement,../data/input/search-selenium/t/ty/tylenol/202...
368730,B074F2TF79,"Basic Care Extra Strength Pain Relief, Acetami...",4.8,19412,,/gp/slredirect/picassoRedirect.html/ref=pa_sp_...,True,False,False,True,False,False,58,tylenol,regular_placement,../data/input/search-selenium/t/ty/tylenol/202...
368731,B07TBPNXHW,Rite Aid Extra Strength PM Pain Relief Gelcaps...,4.7,379,,/gp/slredirect/picassoRedirect.html/ref=pa_sp_...,True,False,True,False,False,False,59,tylenol,regular_placement,../data/input/search-selenium/t/ty/tylenol/202...
