In [10]:
import time
import requests
import traceback
from random import randint
from itertools import cycle
from bs4 import BeautifulSoup
from selenium import webdriver
from builds_links_scraper import get_links
from selenium.webdriver.firefox.options import Options

In [11]:
def parsed_url(page = 1, low_range=250, up_range=5000, build_link = None):
    base_url = 'https://pcpartpicker.com'
    if build_link == None:
        fragment = f'/builds/#B=1&page={page}&X={low_range}00,{up_range}00'
    else: 
        fragment = f'{build_link}'

    return f'{base_url}{fragment}'

In [12]:
def clean_price(price):
    if price[0] == '$':
        price = price.replace('$', '').strip()
        if len(price.split(' ')) > 1:
            return False
    else: 
        return False

    return float(price)

In [17]:
def build_scraper(url, user_agent):
    builds_dict = {}
    build_comps = ['Name','CPU', 'CPU Cooler', 'Motherboard', 'Memory', 'Storage', 'Video Card', 'Case', 'Power Supply', 'Build Price']

    try:
        rq = requests.get(url, headers=user_agent)
    except Exception as e:
        print(e)
        return builds_dict

    soup = BeautifulSoup(rq.content, 'html.parser')
    builds_dict['Name'] = soup.find('h1', {"class": "build__name"}).text
    comp_table_rows = soup.find('table', {"class": "partlist partlist--mini"}).find_all('tr')
    extra_price = 0

    # Two rows is one component, one for the name of the comp and other for the features
    row_it = iter(comp_table_rows)
    for name, component in zip(row_it, row_it):
        try:
            name_text = name.find('h4').text.strip()
            # Getting the name and price components
            component_el = component.find('td', {'class':'td__name'}).findChildren(text=True)
            component_el = list(filter(lambda el: el != '\n', component_el))

            if len(component_el) == 2:
                comp_name = component_el[0]
                comp_price = clean_price(component_el[1])
                # If price isn't in USD
                if not comp_price: 
                    return {}
            else:
                comp_name, comp_price = *component_el, None

            # If the component are in the selected list for scrape
            if name_text in build_comps:
                comp_els = {'Name': comp_name, 'Price': comp_price}

                if name_text not in builds_dict:
                    builds_dict[name_text] = comp_els
                else:
                    comp_copy = builds_dict[name_text].copy()
                    builds_dict[name_text] = []
                    builds_dict[name_text].extend([comp_els, comp_copy])
            else:
                # Calculate the total of the components not taken into account
                extra_price += comp_price if isinstance(comp_price, float) else 0

            
        except Exception as e:
            print(e, url, name_text, component_el)
            continue

        total_table_row = soup.find('table', {"class": "block partlist partlist--mini partlist--totals"}).find('td', {"class": "td__price"}).text
        builds_dict['Build Price'] = round(float(total_table_row.replace('$', '')) - extra_price, 2)
    
    return builds_dict

In [14]:
def main():
    user_agent = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0) Gecko/20100101 Firefox/80.0'}
    builds_links = get_links()
    n_pages = 1
    builds_list = [] 

    for build in builds_links[:1]:
        try:
            build_url = parsed_url(build_link=build['link'])
            build_dict = build_scraper(build_url, user_agent)

            # If the build was scraped correctly
            if build_dict:
                builds_list.append(build_dict)

            delay = randint(2, 10)  
            time.sleep(delay)

        except Exception as e:
            print(traceback.format_exc())
            continue

    return builds_list

In [18]:
if __name__ == '__main__':
    builds = main()

In [20]:
from pprint import pprint
pprint(builds)

[{'Build Price': 1986.0,
  'CPU': {'Name': 'Intel Core i9-9900K 3.6 GHz 8-Core', 'Price': 379.99},
  'CPU Cooler': {'Name': 'Corsair iCUE H150i RGB PRO XT 75 CFM Liquid',
                 'Price': 159.99},
  'Case': {'Name': 'MSI Gungnir 100 ATX Mid Tower', 'Price': 109.99},
  'Memory': {'Name': 'Corsair Vengeance RGB Pro 32 GB (2 x 16 GB) DDR4-3200 '
                     'CL16',
             'Price': 142.99},
  'Motherboard': {'Name': 'MSI MPG Z390 GAMING PRO CARBON AC ATX LGA1151',
                  'Price': 225.99},
  'Name': "Ryland's PC",
  'Power Supply': {'Name': 'ADATA XPG CORE Reactor 850 W 80+ Gold Certified '
                           'Fully Modular ATX',
                   'Price': 158.16},
  'Storage': [{'Name': 'Western Digital BLACK SERIES 2 TB 3.5" 7200RPM',
               'Price': 99.84},
              [{'Name': 'Crucial P1 1 TB M.2-2280 NVME SSD', 'Price': 104.12},
               {'Name': 'Western Digital Blue 1 TB 2.5" SSD', 'Price': 99.99}]],
  'Video Card': {'Name