# Argos Search Results Notebook

## Installations

In [1]:
pip install fake-useragent

Note: you may need to restart the kernel to use updated packages.


In [2]:
import requests
import json
from fake_useragent import UserAgent

In [3]:
import pandas as pd
import numpy as np

In [4]:
pip install selectorlib

Note: you may need to restart the kernel to use updated packages.


In [5]:
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import NoSuchElementException
from selectorlib import Extractor
import time

## Loading Pre-Documented Gender Stereotyped Items

In [6]:
stereo_toys = pd.read_csv('predoc_stereotyped_items.csv', delimiter=',')
stereo_toys

Unnamed: 0,BOY,GIRL,NEUTRAL
0,vehicle toys,doll,toy animals
1,sport,domestic toys,books
2,military toys,educational art,educational teaching
3,race cars,clothes,musical games
4,outer space toys,dollhouses,games
...,...,...,...
67,toy rocket,barbie furniture set,
68,soccer ball,,
69,blue ipad,pink ipad,
70,toy robots,,


In [7]:
with open('all_items.txt') as f:
    all_items = f.read().splitlines()

In [8]:
len(all_items)

166

In [9]:
trial = all_items[160:]
trial

['legos', 'scooter', 'drum set', 'puzzles', 'board games', 'rock painting']

In [10]:
generic = ['toys', 'books', 'learning material', 'games', 'sports']

In [11]:
gender = ['boys', 'girls', 'neutral']

## Search

### Collecting ASIN of Retrieved Products

In [34]:
def asin(driver):
    asin_list = []
    for index in range(1, 10):
        asins = driver.find_elements('xpath','//a[@aria-labelledby]')
        for asin in asins:
            asin_list.append(asin.get_attribute('aria-labelledby'))
    return asin_list

### Get Title Information of Retrieved Products

In [64]:
from selenium.webdriver.common.by import By
def item_info(driver):
    item = []
    elem = driver.find_elements('xpath', "//a[@href]")
    for i in elem:
        item.append(i.text)
    return item

### Collect Product Link of Retrieved Products

In [104]:
def item_link(driver):
    href = []
    links = driver.find_elements('xpath', "//a[@data-test = 'component-product-card-link']")
    for link in links:
        time.sleep(1.5)
        href.append(link.get_attribute('href'))
    return href

## Running Queries for Boys, Girls, and Neutral

In [105]:
def search(item, who):
    if who == 'neutral':
        query = item + '-for-' + 'kids'
    else:
        query = item + '-for-' + who
    driver.get(f'https://www.argos.co.uk/search/{query}/?clickOrigin=searchbar:home:term:{query}')
    list_asin = asin(driver)
    item_list = item_info(driver)
    item_page = item_link(driver)
    return (list_asin, item_list), item_page

In [106]:
columns1 = ['gender', 'query', 'result']
qr = pd.DataFrame(columns=columns1)
columns2 = ['gender', 'query', 'href']
qr_link = pd.DataFrame(columns=columns2)

In [107]:
trial

['legos', 'scooter', 'drum set', 'puzzles', 'board games', 'rock painting']

In [108]:
len(trial)

6

## Running Queries in a Loop

In [109]:
import warnings
warnings.filterwarnings('ignore')
driver = webdriver.Chrome(ChromeDriverManager().install())
data1 = []
data2 = []
item = ''
for item in trial:
    for g in gender:
        result, link = search(item, g)
        values1 = [g, item, result]
        values2 = [g, item, link]
        zipped1 = zip(columns1, values1)
        zipped2 = zip(columns2, values2)
        a_dictionary1 = dict(zipped1)
        a_dictionary2 = dict(zipped2)
        time.sleep(1.5)
        data1.append(a_dictionary1)
        data2.append(a_dictionary2)
driver.close()

In [110]:
qr = qr.append(data1, True)
qr

Unnamed: 0,gender,query,result
0,boys,legos,"([product-title-9567867, product-title-1404203..."
1,girls,legos,"([product-title-9567867, product-title-1404203..."
2,neutral,legos,"([product-title-9568206, product-title-9632709..."
3,boys,scooter,"([product-title-8936372, product-title-8880916..."
4,girls,scooter,"([product-title-8936372, product-title-8880916..."
5,neutral,scooter,"([product-title-2073279, product-title-9424443..."
6,boys,drum set,"([product-title-7248937, product-title-1308378..."
7,girls,drum set,"([product-title-7248937, product-title-9654574..."
8,neutral,drum set,"([product-title-7248937, product-title-5318838..."
9,boys,puzzles,"([product-title-9393457, product-title-8803041..."


In [111]:
qr_link = qr_link.append(data2, True)
qr_link

Unnamed: 0,gender,query,href
0,boys,legos,[https://www.argos.co.uk/product/9567867?click...
1,girls,legos,[https://www.argos.co.uk/product/9567867?click...
2,neutral,legos,[https://www.argos.co.uk/product/9568206?click...
3,boys,scooter,[https://www.argos.co.uk/product/8936372?click...
4,girls,scooter,[https://www.argos.co.uk/product/8936372?click...
5,neutral,scooter,[https://www.argos.co.uk/product/2073279?click...
6,boys,drum set,[https://www.argos.co.uk/product/7248937?click...
7,girls,drum set,[https://www.argos.co.uk/product/7248937?click...
8,neutral,drum set,[https://www.argos.co.uk/product/7248937?click...
9,boys,puzzles,[https://www.argos.co.uk/product/9393457?click...


In [118]:
qr_link.href[1]

['https://www.argos.co.uk/product/9567867?clickSR=slp:term:legos%20for%20girls:1:461:1',
 'https://www.argos.co.uk/product/1404203?clickSR=slp:term:legos%20for%20girls:2:461:1',
 'https://www.argos.co.uk/product/3042711?clickSR=slp:term:legos%20for%20girls:3:461:1',
 'https://www.argos.co.uk/product/9618675?clickSR=slp:term:legos%20for%20girls:4:461:1',
 'https://www.argos.co.uk/product/1403871?clickSR=slp:term:legos%20for%20girls:5:461:1',
 'https://www.argos.co.uk/product/1461442?clickSR=slp:term:legos%20for%20girls:6:461:1',
 'https://www.argos.co.uk/product/1403307?clickSR=slp:term:legos%20for%20girls:7:461:1',
 'https://www.argos.co.uk/product/1404210?clickSR=slp:term:legos%20for%20girls:8:461:1',
 'https://www.argos.co.uk/product/9403938?clickSR=slp:term:legos%20for%20girls:9:461:1',
 'https://www.argos.co.uk/product/9654196?clickSR=slp:term:legos%20for%20girls:10:461:1',
 'https://www.argos.co.uk/product/9552432?clickSR=slp:term:legos%20for%20girls:11:461:1',
 'https://www.argos

In [113]:
first20 = pd.DataFrame()

In [114]:
first20 = first20.append(qr, ignore_index = True)

In [115]:
first20

Unnamed: 0,gender,query,result
0,boys,legos,"([product-title-9567867, product-title-1404203..."
1,girls,legos,"([product-title-9567867, product-title-1404203..."
2,neutral,legos,"([product-title-9568206, product-title-9632709..."
3,boys,scooter,"([product-title-8936372, product-title-8880916..."
4,girls,scooter,"([product-title-8936372, product-title-8880916..."
5,neutral,scooter,"([product-title-2073279, product-title-9424443..."
6,boys,drum set,"([product-title-7248937, product-title-1308378..."
7,girls,drum set,"([product-title-7248937, product-title-9654574..."
8,neutral,drum set,"([product-title-7248937, product-title-5318838..."
9,boys,puzzles,"([product-title-9393457, product-title-8803041..."


In [116]:
len(first20.loc[0]['result'][1])

228

In [117]:
first20.to_csv('item_results.csv', index = False)