# Scrape URLs for each racket brand's page on the Tennis Warehouse website

This notebook shows how to scrape each brand page's URL from the sidebar of the Tennis Warehouse website

## Table of Contents
1. [Parsing the sidebar to get uls](#parsing-the-sidebar-to-get-uls)

2. [Parsing the uls to get li of brands](#parsing-ul-to-get-li-of-brands)

3. [Extracting and concatenating brand links](#extracting-and-concatenating-each-brand-link-into-a-list-of-brand-links)


In [None]:
# Imports
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

## Parsing the sidebar to get uls

In [None]:
# Starting URL
URL = "https://www.tennis-warehouse.com/TennisRacquets.html"

In [None]:
# Get html
webpage = requests.get(URL)

In [None]:
# Parse html
soup = BeautifulSoup(webpage.content, "html.parser")

In [None]:
# Extract sidebar links by 
# targeting unordered list with "left_menu-section" class
sidebar_links = soup.find_all("ul", attrs = {"class": "left_menu-section"})

In [None]:
# View first 5 links to see if it worked - 
# all of the brands are stored in ONE ul tag
sidebar_links[0:5]

[<ul class="left_menu-section"> <li class="nav-item"><a href="/TennisRacquets.html">Shop All</a></li> </ul>,
 <ul class="left_menu-section"> <li><a href="/Babolatracquets.html">Babolat</a></li> <li><a href="/Wilsonracquets.html">Wilson</a></li> <li><a href="/Headracquets.html">Head</a></li> <li><a href="/YonexRacquets.html">Yonex</a></li> <li><a href="/PrinceRacquets.html">Prince</a></li> <li><a href="/Tecnifibreracquets.html">Tecnifibre</a></li> <li><a href="/DunlopRacquets.html">Dunlop</a></li> <li><a href="/VolklRacquets.html">Volkl</a></li> <li><a href="/ProKennexracquets.html">ProKennex</a></li> <li><a href="/Solinco_Tennis_Racquets/catpage-SOLINCORAC.html">Solinco</a></li> <li><a href="/LacosteRacquets.html">Lacoste</a></li> </ul>,
 <ul class="left_menu-section"> <li><a href="/JrRacquets.html">Junior Racquets</a></li> <li><a href="/The_Vintage_Racquet_Collection/catpage-BOS.html">Vintage Racquets</a></li> <li><a href="/usedracquets.html">Used Racquets</a></li> <li><a class="is-sa

## Parsing ul to get li of brands

In [None]:
# Extract brand links by targeting li tags in the second ul tag
brands = sidebar_links[1].find_all("li")

In [None]:
# List all brands
brands

[<li><a href="/Babolatracquets.html">Babolat</a></li>,
 <li><a href="/Wilsonracquets.html">Wilson</a></li>,
 <li><a href="/Headracquets.html">Head</a></li>,
 <li><a href="/YonexRacquets.html">Yonex</a></li>,
 <li><a href="/PrinceRacquets.html">Prince</a></li>,
 <li><a href="/Tecnifibreracquets.html">Tecnifibre</a></li>,
 <li><a href="/DunlopRacquets.html">Dunlop</a></li>,
 <li><a href="/VolklRacquets.html">Volkl</a></li>,
 <li><a href="/ProKennexracquets.html">ProKennex</a></li>,
 <li><a href="/Solinco_Tennis_Racquets/catpage-SOLINCORAC.html">Solinco</a></li>,
 <li><a href="/LacosteRacquets.html">Lacoste</a></li>]

In [None]:
# Extract first brand link
link = brands[0].find("a").get("href")

In [None]:
# Display result of .get
link

'/Babolatracquets.html'

## Extracting and concatenating each brand link into a list of brand links

In [None]:
# Create a list of all brand links by iterating over brands list
brand_links = []
for brand in brands:
    link = brand.find("a").get("href")
    brand_links.append(link)

In [None]:
# View result of for loop
brand_links

['/Babolatracquets.html',
 '/Wilsonracquets.html',
 '/Headracquets.html',
 '/YonexRacquets.html',
 '/PrinceRacquets.html',
 '/Tecnifibreracquets.html',
 '/DunlopRacquets.html',
 '/VolklRacquets.html',
 '/ProKennexracquets.html',
 '/Solinco_Tennis_Racquets/catpage-SOLINCORAC.html',
 '/LacosteRacquets.html']

In [None]:
# Create a new list with full brand page URL 
# by adding brand_links to the main URL
brand_pages = ["https://www.tennis-warehouse.com"+link for link in brand_links]

In [None]:
# View brand pages
brand_pages

['https://www.tennis-warehouse.com/Babolatracquets.html',
 'https://www.tennis-warehouse.com/Wilsonracquets.html',
 'https://www.tennis-warehouse.com/Headracquets.html',
 'https://www.tennis-warehouse.com/YonexRacquets.html',
 'https://www.tennis-warehouse.com/PrinceRacquets.html',
 'https://www.tennis-warehouse.com/Tecnifibreracquets.html',
 'https://www.tennis-warehouse.com/DunlopRacquets.html',
 'https://www.tennis-warehouse.com/VolklRacquets.html',
 'https://www.tennis-warehouse.com/ProKennexracquets.html',
 'https://www.tennis-warehouse.com/Solinco_Tennis_Racquets/catpage-SOLINCORAC.html',
 'https://www.tennis-warehouse.com/LacosteRacquets.html']