In [1]:
import pandas as pd
import numpy as np

import requests
import json
from fake_useragent import UserAgent

import time
import warnings

In [2]:
ua = UserAgent()
headers = {"user-agent": ua.chrome}

### Loading list of toys

In [3]:
with open('../predoc_info/all_items.txt') as f:
    contents = f.read().splitlines()

## Loading pre-documented gender stereotyped toys

In [4]:
stereo_toys = pd.read_csv('../predoc_info/predoc_stereotyped_items.csv', delimiter =',')
stereo_toys

Unnamed: 0,BOY,GIRL,NEUTRAL
0,vehicle toys,doll,toy animals
1,sport,domestic toys,books
2,military toys,educational art,educational teaching
3,race cars,clothes,musical games
4,outer space toys,dollhouses,games
...,...,...,...
67,toy rocket,barbie furniture set,
68,soccer ball,,
69,blue ipad,pink ipad,
70,toy robots,,


### Dataset stats

In [5]:
boy_toys = stereo_toys['BOY'].dropna().unique().tolist()
girl_toys = stereo_toys['GIRL'].dropna().unique().tolist()
neutral_toys = stereo_toys['NEUTRAL'].dropna().unique().tolist()

print("stereotypical boy toys: ", len(boy_toys), 
      " stereotypical girl toys: ", len(girl_toys), 
      " stereotypically gender neutral toys: ", len(neutral_toys))

stereotypical boy toys:  71  stereotypical girl toys:  63  stereotypically gender neutral toys:  32


## Preparing the data for query

### Adding "for"

In [6]:
search_terms = []
for x in contents:
    search_terms.append((x, x+' for'))
search_terms[:5]

[('vehicle toys', 'vehicle toys for'),
 ('sport', 'sport for'),
 ('military toys', 'military toys for'),
 ('race cars', 'race cars for'),
 ('outer space toys', 'outer space toys for')]

### Defining scraping functions

In [7]:
def amazon_scrape(query):
    url = f"https://completion.amazon.com/api/2017/suggestions?session-id=131-6901588-5783061&customer-id=A373R49950VTB6&request-id=72HVV86S4JC3AK898B3X&page-type=Gateway&lop=en_gb&site-variant=desktop&client-info=amazon-search-ui&mid=ATVPDKIKX0DER&alias=aps&ks=undefined&prefix={query}&event=onFocusWithSearchTerm&limit=11&b2b=0&fresh=0&fb=1&suggestion-type=KEYWORD&suggestion-type=WIDGET&_=1637596795610"
    response = requests.get(url, headers=headers, verify=False).json()
    return response

In [8]:
def amazon_auto(item):
    res = amazon_scrape(item)
    suggestions = res['suggestions']
    results = []
    for s in suggestions:
        results.append(s['value'])
    return results

## Run queries

In [9]:
columns = ['platform', 'item', 'suggestions']
df = pd.DataFrame(columns = columns)

In [10]:
warnings.filterwarnings('ignore')
platforms = ['Amazon_UK']
data = []
for item, q in search_terms:
    for platform in platforms:
        result = amazon_auto(q) 
        values = [platform, item, result]
        zipped = zip(columns, values)
        a_dictionary = dict(zipped)
        #print(a_dictionary)
        time.sleep(1.5)
        data.append(a_dictionary)

In [11]:
data[0]

{'platform': 'Amazon_UK',
 'item': 'vehicle toys',
 'suggestions': ['vehicle toys for toddlers 1-3',
  'vehicle toys for 1 year old',
  'vehicle toys for toddlers',
  'vehicle toys for 2 year old',
  'vehicle toys for boys 5-8',
  'vehicle toys for boys 3-5 years old',
  'vehicle toys for 4 year old boys',
  'vehicle toys for kids',
  'vehicle toys for boys',
  'vehicle toys for 3 year old boys']}

In [12]:
#put data into a dataframe with the platform, query, and scraped suggestions
df = df.append(data, True)
df

Unnamed: 0,platform,item,suggestions
0,Amazon_UK,vehicle toys,"[vehicle toys for toddlers 1-3, vehicle toys f..."
1,Amazon_UK,sport,"[sport formula 99, sport formula 99 vitamin po..."
2,Amazon_UK,military toys,"[military toys for boys age 8-12, military toy..."
3,Amazon_UK,race cars,"[race cars for kids ages 3-5, race cars for bo..."
4,Amazon_UK,outer space toys,"[outer space toys for kids 5-7, outer space to..."
...,...,...,...
161,Amazon_UK,scooter,"[scooter for kids ages 6-12, scooter for kids ..."
162,Amazon_UK,drum set,"[drum set for toddlers 1-3, drum set for kids ..."
163,Amazon_UK,puzzles,"[puzzles for kids ages 3-5, puzzles for adults..."
164,Amazon_UK,board games,"[board games for adults, board games for kids ..."


In [13]:
df.to_csv('az_uk_query_suggestions.csv', index = False)