# Metal Designz Scraping

## Imports and Setup

In [1]:
# Imports
import requests
from bs4 import BeautifulSoup
from time import sleep

# Set Variables
SLEEP_TIME = 5
STARTING_LINK = 'https://www.metaldesignz.com/jump-rings/'
LINK_FILE_PATH = '../../product_page_listings/metal_designz/metal_designz_products.csv'


## Get Data

### Find All Product Pages

Looking at their website it appears that the basic structure is a main page with side bar links to sub pages for each ring category. The sub pages link to product pages but if there are enough product pages they will be paginated.


In [2]:

# Helper function that gets the product links from a category page
def get_product_links(soup: BeautifulSoup) -> None:
    product_grid = soup.find('ul', {'class': 'productGrid'})
    products = product_grid.find_all('li', {'class': 'product'})

    for product in products:
        link_tag = product.find('a', {'class': 'card-figure__link'})
        link = link_tag['href']
        print(link)


# Get link to next page if applicable
def get_next_page(soup: BeautifulSoup) -> str:
    next_page = None
    pagination = soup.find('nav', {'class': 'pagination'})
    link_tag = pagination.find('a', {'aria-label': 'Next'})

    if link_tag:
        next_page = link_tag['href']
    
    return next_page


# From the starting link find all product pages
def page_parser(url: str) -> None:
    links_encountered = set()

    # Get page and create a parsable object
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Get links to category pages
    sidebar = soup.find('aside', {'class': 'page-sidebar'})
    category_sidebar = sidebar.find(lambda tag: tag.name == 'div' and tag.find('h2', string='Jump Rings') is not None)
    category_links = set(map(lambda tag: tag['href'], category_sidebar.find_all('a')))

    # Iterate through top level links
    for num, link in enumerate(category_links):
        print(f"Checking cateogry: {num} -  {link}")
        sleep(5)

        # Get first page of category
        response = requests.get(link)
        soup = BeautifulSoup(response.content, 'html.parser')

        # Get the product links from the page
        get_product_links(soup)

        # Get link to the next page
        next_page = get_next_page(soup)

        # Itterate through remaining pages if applicable
        while next_page:
            print(f'Found New Page: {next_page}')
            sleep(5)

            # Get new page            
            response = requests.get(next_page)
            soup = BeautifulSoup(response.content, 'html.parser')

            # Print product links
            get_product_links(soup)

            # Get next page if applicable
            next_page = get_next_page(soup)


page_parser(STARTING_LINK)

Checking cateogry: 0 -  https://www.metaldesignz.com/stainless-steel/
https://www.metaldesignz.com/diamond-coiled-square-stainless-steel-jump-rings-18-gauge-3-16/
https://www.metaldesignz.com/stainless-steel-jump-rings-19-gauge-5-16-id/
https://www.metaldesignz.com/stainless-steel-jump-rings-19-gauge-9-32-id/
https://www.metaldesignz.com/stainless-steel-jump-rings-19-gauge-17-64-id/
https://www.metaldesignz.com/stainless-steel-jump-rings-19-gauge-15-64-id/
https://www.metaldesignz.com/square-wire-stainless-steel-jump-rings-18-gauge-9-64-id/
https://www.metaldesignz.com/stainless-steel-half-round-rings-14g-7-32/
https://www.metaldesignz.com/stainless-steel-half-round-wire-14g-21-128/
https://www.metaldesignz.com/stainless-steel-jump-rings-19-gauge-13-64-id/
https://www.metaldesignz.com/stainless-steel-half-round-rings-14g-3-16/
https://www.metaldesignz.com/stainless-steel-half-round-wire-14g-5-32/
https://www.metaldesignz.com/stainless-steel-jump-rings-19-gauge-11-64-id/
Found New Page:

### Get Data From Product Pages