# Argos Search Results Notebook

## Installations

Importing all necessary modules to run this notebook. Ensure selenium, selectorlib, and fake-useragent have been installed prior to running this notebook.

In [2]:
import requests
import json
from fake_useragent import UserAgent

In [3]:
import pandas as pd
import numpy as np

In [4]:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait

In [6]:
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import NoSuchElementException
from selectorlib import Extractor
import time

## Loading Pre-Documented Gender Stereotyped Items

Taking in predoc_stereotyped_items.csv to use for later analysis. This csv file contains 72 rows.

In [7]:
stereo_toys = pd.read_csv('~/Desktop/485/Data/predoc_stereotyped_items.csv', delimiter=',')
stereo_toys

Unnamed: 0,BOY,GIRL,NEUTRAL
0,vehicle toys,doll,toy animals
1,sport,domestic toys,books
2,military toys,educational art,educational teaching
3,race cars,clothes,musical games
4,outer space toys,dollhouses,games
...,...,...,...
67,toy rocket,barbie furniture set,
68,soccer ball,,
69,blue ipad,pink ipad,
70,toy robots,,


## Loading List of Toys Collected from Previous Research

all_items.txt contains a list of strings, where each string represents a toy that will be searched on Argos. This txt file contains 166 rows.

In [8]:
with open('all_items.txt') as f:
    all_items = f.read().splitlines()

In [9]:
len(all_items)

166

trial is a list of 6 toys from all_items. trial was used to test if the code works on a small sample of all_items rather than running the full iteration every time.

In [10]:
trial = all_items[160:]
trial

['legos', 'scooter', 'drum set', 'puzzles', 'board games', 'rock painting']

In [11]:
generic = ['toys', 'books', 'learning material', 'games', 'sports']

In [12]:
gender = ['boys', 'girls', 'neutral']

## Search

### Collecting EAN of Retrieved Products

Made use of inspect element on Argos' webpage to determine the xpath

In [13]:
def asin(driver):
    asin_list = []
    for index in range(1, 10):
        asins = driver.find_elements('xpath','//a[@aria-labelledby]')
        for asin in asins:
            #time.sleep(0.4)
            asin_list.append(asin.get_attribute('aria-labelledby'))
    return asin_list

### Get Title Information of Retrieved Products

In [14]:
from selenium.webdriver.common.by import By
def item_info(driver):
    item = []
    elem = driver.find_elements('xpath', "//a[meta/@itemprop]")
    for i in elem:
        #time.sleep(0.4)
        item.append(i.text)
    return item

### Collect Product Link of Retrieved Products

In [15]:
def item_link(driver):
    href = []
    links = driver.find_elements('xpath', "//a[@data-test = 'component-product-card-link']")
    for link in links:
        #time.sleep(0.4)
        href.append(link.get_attribute('href'))
    return href

## Preparing Search Function

This function is used to scrape all necessary information for each toy in all_items.

In [16]:
def search(item, who):
    if who == 'neutral':
        query = item + '-for-' + 'kids'
    else:
        query = item + '-for-' + who
    driver.get(f'https://www.argos.co.uk/search/{query}/?clickOrigin=searchbar:home:term:{query}')
    driver.implicitly_wait(10)
    list_asin = asin(driver)
    item_list = item_info(driver)
    item_page = item_link(driver)
    return (list_asin, item_list), item_page

Initializing databases for scraped data.

In [17]:
columns1 = ['gender', 'query', 'result']
qr = pd.DataFrame(columns=columns1)
columns2 = ['gender', 'query', 'href']
qr_link = pd.DataFrame(columns=columns2)

In [18]:
trial

['legos', 'scooter', 'drum set', 'puzzles', 'board games', 'rock painting']

## Running Queries for Boys, Girls, and Neutral

This code is used to scrape all data from toys included in all_items. As of right now, trial is used on line 7 in order to run the code on a small sample, but changing this with all_items will run it on all toys.

In [21]:
import warnings
warnings.filterwarnings('ignore')
driver = webdriver.Chrome(ChromeDriverManager().install())
data1 = []
data2 = []
item = ''
for item in trial:
    for g in gender:
        result, link = search(item, g)
        values1 = [g, item, result]
        values2 = [g, item, link]
        zipped1 = zip(columns1, values1)
        zipped2 = zip(columns2, values2)
        a_dictionary1 = dict(zipped1)
        a_dictionary2 = dict(zipped2)
        time.sleep(1.5)
        data1.append(a_dictionary1)
        data2.append(a_dictionary2)
driver.close()

[WDM] - Downloading: 100%|█████████████████| 8.84M/8.84M [00:05<00:00, 1.66MB/s]


Appending data to previously initialized dataframe.

In [22]:
qr = qr.append(data1, True)
qr

Unnamed: 0,gender,query,result
0,boys,legos,"([product-title-9564822, product-title-1404203..."
1,girls,legos,"([product-title-9564822, product-title-1404203..."
2,neutral,legos,"([product-title-9632709, product-title-1403857..."
3,boys,scooter,"([product-title-9536836, product-title-9451423..."
4,girls,scooter,"([product-title-9536836, product-title-9451423..."
5,neutral,scooter,"([product-title-9424443, product-title-9531556..."
6,boys,drum set,"([product-title-7248937, product-title-1308378..."
7,girls,drum set,"([product-title-7248937, product-title-8869607..."
8,neutral,drum set,"([product-title-7248937, product-title-5318838..."
9,boys,puzzles,"([product-title-1771125, product-title-9393457..."


In [34]:
qr['result'][4][0]

['product-title-9536836',
 'product-title-9451423',
 'product-title-8673439',
 'product-title-1252053',
 'product-title-1177994',
 'product-title-1175587',
 'product-title-8845401',
 'product-title-5034095',
 'product-title-8676742',
 'product-title-4982416',
 'product-title-3187573',
 'product-title-9487734',
 'product-title-8661702',
 'product-title-9452006',
 'product-title-9546888',
 'product-title-7841925',
 'product-title-9492671',
 'product-title-8438038',
 'product-title-1156142',
 'product-title-9430523',
 'product-title-2006923',
 'product-title-9517886',
 'product-title-1174997',
 'product-title-8625250',
 'product-title-8212773',
 'product-title-7622175',
 'product-title-9517446',
 'product-title-5577198',
 'product-title-9424137',
 'product-title-9457032',
 'product-title-9536836',
 'product-title-9451423',
 'product-title-8673439',
 'product-title-1252053',
 'product-title-1177994',
 'product-title-1175587',
 'product-title-8845401',
 'product-title-5034095',
 'product-ti

In [35]:
qr['result'][4][1]

['',
 'EVO Light Up Move and Groove Scooter - Pink',
 '',
 'EVO Light Up Move and Groove Scooter - Blue',
 '',
 'Spider-Man Tri Scooter',
 '',
 'Zinc Verge Pro Folding Big Wheeled Scooter',
 '',
 'Evo Light Up Inline Folding Scooter - Pink',
 '',
 'Evo Dino Mini Cruiser Scooter',
 '',
 'Zinc Black Folding T-Motion Tri Scooter',
 '',
 'Zinc Folding Inline Scooter - White',
 '',
 'The Ultimate Spider-Man Folding Scooter',
 '',
 'Zinc Folding Inline Scooter - Purple',
 '',
 'Zinc Detour Stunt Scooter - Yellow',
 '',
 'Zinc Light Up Safari Unicorn Scooter',
 '',
 'Disney Princess Tri Scooter',
 '',
 'EVO Move and Groove Scooter - Blue',
 '',
 'EVO Colour Burst Folding Tri Scooter',
 '',
 'Zinc Xtreme Stunt Scooter',
 '',
 'EVO 3-in-1 Cruiser Scooter',
 '',
 'Ozbozz Unicorn Folding Scooter with Soft Toy',
 '',
 'Evo Light Up Inline Folding Scooter - Blue',
 '',
 'Peppa Pig Switch It Multi Character Tri Scooter',
 '',
 'Zinc Detour Stunt Scooter - Pink',
 '',
 'Paw Patrol Switch It Multi Cha

In [24]:
qr_link = qr_link.append(data2, True)
qr_link

Unnamed: 0,gender,query,href
0,boys,legos,[https://www.argos.co.uk/product/9564822?click...
1,girls,legos,[https://www.argos.co.uk/product/9564822?click...
2,neutral,legos,[https://www.argos.co.uk/product/9632709?click...
3,boys,scooter,[https://www.argos.co.uk/product/9536836?click...
4,girls,scooter,[https://www.argos.co.uk/product/9536836?click...
5,neutral,scooter,[https://www.argos.co.uk/product/9424443?click...
6,boys,drum set,[https://www.argos.co.uk/product/7248937?click...
7,girls,drum set,[https://www.argos.co.uk/product/7248937?click...
8,neutral,drum set,[https://www.argos.co.uk/product/7248937?click...
9,boys,puzzles,[https://www.argos.co.uk/product/1771125?click...


In [25]:
qr_link.href[1]

['https://www.argos.co.uk/product/9564822?clickSR=slp:term:legos%20for%20girls:1:1585:1',
 'https://www.argos.co.uk/product/1404203?clickSR=slp:term:legos%20for%20girls:2:1585:1',
 'https://www.argos.co.uk/product/2018375?clickSR=slp:term:legos%20for%20girls:3:1585:1',
 'https://www.argos.co.uk/product/9567867?clickSR=slp:term:legos%20for%20girls:4:1585:1',
 'https://www.argos.co.uk/product/1403307?clickSR=slp:term:legos%20for%20girls:5:1585:1',
 'https://www.argos.co.uk/product/8863342?clickSR=slp:term:legos%20for%20girls:6:1585:1',
 'https://www.argos.co.uk/product/1404227?clickSR=slp:term:legos%20for%20girls:7:1585:1',
 'https://www.argos.co.uk/product/9235319?clickSR=slp:term:legos%20for%20girls:8:1585:1',
 'https://www.argos.co.uk/product/8847014?clickSR=slp:term:legos%20for%20girls:9:1585:1',
 'https://www.argos.co.uk/product/1404492?clickSR=slp:term:legos%20for%20girls:10:1585:1',
 'https://www.argos.co.uk/product/1403682?clickSR=slp:term:legos%20for%20girls:11:1585:1',
 'https:

Initialize a database to view only the first 20 items and their respective data points.

In [28]:
first20 = pd.DataFrame()
first20

In [29]:
first20 = first20.append(qr, ignore_index = True)

In [30]:
first20

Unnamed: 0,gender,query,result
0,boys,legos,"([product-title-9564822, product-title-1404203..."
1,girls,legos,"([product-title-9564822, product-title-1404203..."
2,neutral,legos,"([product-title-9632709, product-title-1403857..."
3,boys,scooter,"([product-title-9536836, product-title-9451423..."
4,girls,scooter,"([product-title-9536836, product-title-9451423..."
5,neutral,scooter,"([product-title-9424443, product-title-9531556..."
6,boys,drum set,"([product-title-7248937, product-title-1308378..."
7,girls,drum set,"([product-title-7248937, product-title-8869607..."
8,neutral,drum set,"([product-title-7248937, product-title-5318838..."
9,boys,puzzles,"([product-title-1771125, product-title-9393457..."


In [31]:
len(first20.loc[0]['result'][1])

60

In [32]:
first20.to_csv('argos_search_results.csv', index = False)

## Data Cleaning

In [2]:
import pandas as pd
data = pd.read_csv('argos_search_results.csv', delimiter=',')
data

Unnamed: 0,gender,query,result
0,boys,legos,"(['product-title-9564822', 'product-title-1404..."
1,girls,legos,"(['product-title-9564822', 'product-title-1404..."
2,neutral,legos,"(['product-title-9632709', 'product-title-1403..."
3,boys,scooter,"(['product-title-9536836', 'product-title-9451..."
4,girls,scooter,"(['product-title-9536836', 'product-title-9451..."
5,neutral,scooter,"(['product-title-9424443', 'product-title-9531..."
6,boys,drum set,"(['product-title-7248937', 'product-title-1308..."
7,girls,drum set,"(['product-title-7248937', 'product-title-8869..."
8,neutral,drum set,"(['product-title-7248937', 'product-title-5318..."
9,boys,puzzles,"(['product-title-1771125', 'product-title-9393..."


In [22]:
data['result'][4]

"(['product-title-9536836', 'product-title-9451423', 'product-title-8673439', 'product-title-1252053', 'product-title-1177994', 'product-title-1175587', 'product-title-8845401', 'product-title-5034095', 'product-title-8676742', 'product-title-4982416', 'product-title-3187573', 'product-title-9487734', 'product-title-8661702', 'product-title-9452006', 'product-title-9546888', 'product-title-7841925', 'product-title-9492671', 'product-title-8438038', 'product-title-1156142', 'product-title-9430523', 'product-title-2006923', 'product-title-9517886', 'product-title-1174997', 'product-title-8625250', 'product-title-8212773', 'product-title-7622175', 'product-title-9517446', 'product-title-5577198', 'product-title-9424137', 'product-title-9457032', 'product-title-9536836', 'product-title-9451423', 'product-title-8673439', 'product-title-1252053', 'product-title-1177994', 'product-title-1175587', 'product-title-8845401', 'product-title-5034095', 'product-title-8676742', 'product-title-4982416