# 原價屋 零件清單 Parser

In [None]:
#!/usr/bin/python3

import re
import requests
from functools import reduce
from bs4 import BeautifulSoup
from tabulate import tabulate

def parsePage(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0"
    }
    # Send an HTTP request to the URL with custom headers
    response = requests.get(url, headers=headers)
    # Check if the request was successful (status code 200)
    assert(response.status_code == 200)
    return BeautifulSoup(response.text, 'html.parser')

In [None]:
def processAnyPage(soup):
    span_tags = soup.find_all('span')

    allData = []
    for span in span_tags:
        div_tags = span.find_all('div')
        data = []
        for div in div_tags:
            data += [div.text]
        allData += [data]
    #print(tabulate(allData, tablefmt="grid"))
    print(tabulate(sorted(allData), tablefmt="plain"))
processAnyPage(parsePage("https://www.coolpc.com.tw/eachview.php?IGrp=12"))

In [None]:
def processCasePage(soup):
    span_tags = soup.find_all('span')

    allData = []
    for span in span_tags:
        div_tags = span.find_all('div')
        title, size, price  = "", "", ""
        minLen = volume = 0.0
        for div in div_tags:
            #print(div)
            if div.has_attr('class') and 't' in div['class']:
                title = div.text
            elif div.has_attr('class') and 'x' in div['class']:
                price = re.search("NT(\d+)\W", div.text).group(1)
            else:
                m = re.search("尺寸：", div.text)
                if not m: continue
                size = div.text.replace("/", "*")
                if "~" in size: continue
                m = re.search("(\d+\.?\d*)\*(\d+\.?\d*)\*(\d+\.?\d*)", size)
                if m:
                    #print(size, m.groups())
                    szSpec = list(map(float, m.groups()))
                    volume = reduce(lambda x, y: x*y, szSpec)
                    minLen = min(szSpec)
                else:
                    print(f"CANT PARSE: {size}")
        allData += [(minLen, volume, price, size, title)]

    print(tabulate(sorted(allData), tablefmt="plain"))


processCasePage(parsePage("https://www.coolpc.com.tw/eachview.php?IGrp=14"))

In [None]:
def processCpuPage(soup):
    span_tags = soup.find_all('span')

    allData = []
    for span in span_tags:
        div_tags = span.find_all('div')
        title = ""
        for div in div_tags:
            #print(div)
            if div.has_attr('class') and 't' in div['class']:
                title = div.text
            elif div.has_attr('class') and 'x' in div['class']:
                price = re.search("NT(\d+)\W", div.text).group(1)
        if re.search("Intel", title, re.IGNORECASE) and "-13" in title:
            allData += [(float(price), title)]
    print(tabulate(sorted(allData), tablefmt="plain"))
processCpuPage(parsePage("https://www.coolpc.com.tw/eachview.php?IGrp=4"))

In [None]:
def processGpuPage(soup):
    span_tags = soup.find_all('span')

    allData = []
    for span in span_tags:
        div_tags = span.find_all('div')
        title = ""
        for div in div_tags:
            #print(div)
            if div.has_attr('class') and 't' in div['class']:
                title = div.text
            elif div.has_attr('class') and 'x' in div['class']:
                price = float(re.search("NT(\d+)\W", div.text).group(1))
        #if "3060" in title:
        if price <= 13000 and price >= 4000 and "3060" in title:
            allData += [(price, title)]

    print(tabulate(sorted(allData), tablefmt="plain"))
processGpuPage(parsePage("https://www.coolpc.com.tw/eachview.php?IGrp=12"))

In [None]:
def processMotherBoardPage(soup):
    span_tags = soup.find_all('span')

    allData = []
    for span in span_tags:
        div_tags = span.find_all('div')
        title = size = cpu = ram = display = network = ""
        for div in div_tags:
            #print(div)
            if div.has_attr('class') and 't' in div['class']:
                title = div.text
            elif div.has_attr('class') and 'x' in div['class']:
                price = re.search("NT(\d+)\W", div.text).group(1)
            elif "尺寸" in div.text:
                size = div.text
            elif "CPU：" in div.text:
                cpu = div.text
            elif "記憶體：" in div.text:
                ram = div.text
            elif "顯示：" in div.text:
                display = div.text
            elif "網路：" in div.text:
                network = div.text
        if re.search("ITX", size, re.IGNORECASE) \
            and re.search("1700", cpu, re.IGNORECASE) \
            and re.search("5", display, re.IGNORECASE): #PCIE 5.0
            allData += [(float(price), title, cpu, ram, size, display, network)]

    print(tabulate(sorted(allData), tablefmt="plain"))
processMotherBoardPage(parsePage("https://www.coolpc.com.tw/eachview.php?IGrp=5"))


In [None]:
def processPsu(soup):
    span_tags = soup.find_all('span')

    allData = []
    for span in span_tags:
        div_tags = span.find_all('div')
        title = ""
        price = power = 0
        for div in div_tags:
            if div.has_attr('class') and 't' in div['class']:
                title = div.text
                m = re.search("(\d\d\d)[Ww]", title)
                if m:
                    power = int(m.group(1))
            elif div.has_attr('class') and 'x' in div['class']:
                price = re.search("NT(\d+)\W", div.text).group(1)
        if "全模" in title and "SFX規格" in title and "金" in title\
            and power >= 600:
            allData += [(float(price), power, title)]

    print(tabulate(sorted(allData), tablefmt="plain"))
processPsu(parsePage("https://www.coolpc.com.tw/eachview.php?IGrp=15"))


In [None]:
def processCooler(soup):
    span_tags = soup.find_all('span')

    allData = []
    for span in span_tags:
        div_tags = span.find_all('div')
        title = ""
        height = price = 0
        for div in div_tags:
            if div.has_attr('class') and 't' in div['class']:
                title = div.text
                m = re.search("/(高.*?)/", title)
                if m:
                  height = re.search("(\d+\.?\d?)", m.group(1)).group(1)
                  #print(m.group(1), height)
            elif div.has_attr('class') and 'x' in div['class']:
                price = re.search("NT(\d+)\W", div.text).group(1)
        #if (height != 0.0):
        if "導熱係數" in title:
          allData += [(float(height), float(price), title)]
    print(tabulate(sorted(allData), tablefmt="plain"))
processCooler(parsePage("https://www.coolpc.com.tw/eachview.php?IGrp=10"))


In [None]:
def processWaterCooler(soup):
    span_tags = soup.find_all('span')

    allData = []
    for span in span_tags:
        div_tags = span.find_all('div')
        title = ""
        height = price = 0
        for div in div_tags:
            if div.has_attr('class') and 't' in div['class']:
                title = div.text
                m = re.search("/(厚.*?)/", title)
                if not m:
                    m = re.search("/(厚.*?\d.*)\W", title)
                if m:
                    #print(m.group(1))
                    height = re.search("(\d+\.?\d?)", m.group(1)).group(1)
                  #print(m.group(1), height)
            elif div.has_attr('class') and 'x' in div['class']:
                price = re.search("NT(\d+)\W", div.text).group(1)
        #if "360" not in title:
        if "" in title:
            allData += [(float(price), float(height), title)]
            #allData += [(float(height), float(price), title)]

    print(tabulate(sorted(allData), tablefmt="plain"))
processWaterCooler(parsePage("https://www.coolpc.com.tw/eachview.php?IGrp=11"))


In [None]:
def processSSD(soup):
    span_tags = soup.find_all('span')

    allData = []
    for span in span_tags:
        div_tags = span.find_all('div')
        title = ""
        readS = writeS = price = 0
        for div in div_tags:
            if div.has_attr('class') and 't' in div['class']:
                title = div.text
                m = re.search("讀:?(\d+)", title)
                if m:
                    readS = int(float(m.group(1)))
                m = re.search("寫:?(\d+)", title)
                if m:
                    writeS = int(float(m.group(1)))
                if not readS or not writeS:
                    print(title)
            elif div.has_attr('class') and 'x' in div['class']:
                price = re.search("NT(\d+)\W", div.text).group(1)
        if "2T" in title and readS >= 6000:#re.search("M.2", title, re.IGNORECASE):
            allData += [(readS, writeS, float(price), title)]
    print(tabulate(sorted(allData), tablefmt="plain"))
processSSD(parsePage("https://www.coolpc.com.tw/eachview.php?IGrp=7"))


In [None]:
def processRAM(soup):
    span_tags = soup.find_all('span')

    allData = []
    for span in span_tags:
        div_tags = span.find_all('div')
        title = ""
        cl = gen = freq = price = 0
        for div in div_tags:
            if div.has_attr('class') and 't' in div['class']:
                title = div.text
                m = re.search("\WDD?R?(\d)", title)
                if m:
                    gen = int(float(m.group(1)))
                m = re.search("(\d\d\d\d)M?", title)
                if m:
                    freq = int(float(m.group(1)))
                m = re.search("CL(\d\d)", title)
                if m:
                    cl = int(float(m.group(1)))

                if not freq or not gen:
                    if "DDR3" not in title:
                        print(f"check gen/freq regex: {title}")
            elif div.has_attr('class') and 'x' in div['class']:
                price = float(re.search("NT(\d+)\W", div.text).group(1))
        if "32G" in title and gen == 5 and freq == 5600 and "雙通16G" not in title:
            allData += [(freq, cl, price, title)]
    print(tabulate(sorted(allData), tablefmt="plain"))
processRAM(parsePage("https://www.coolpc.com.tw/eachview.php?IGrp=6"))


In [None]:
def processNotebook(soup):
    span_tags = soup.find_all('span')

    allData = []
    for span in span_tags:
        div_tags = span.find_all('div')
        weight = size = cpu = ram = ssd = title = ""
        price = 0
        for div in div_tags:            
            #print(div.text)
            if div.has_attr('class') and 't' in div['class']:
                title = div.text
            elif div.has_attr('class') and 'x' in div['class']:
                price = float(re.search("NT(\d*)", div.text).group(1))
            else:
                m = re.search("重量：(.*)[Kk]g", div.text)
                if m:
                    weight = m.group(1)
                m = re.search("尺寸：(.*)", div.text)
                if m:
                    size = m.group(1)
                m = re.search("SSD：(.*)GB", div.text)
                if m:
                    ssd = m.group(1)                
                m = re.search("RAM：(.*)", div.text)
                if m:
                    ram = m.group(1)                
                m = re.search("CPU：(.*)", div.text)
                if m:
                    cpu = m.group(1)                
        
        #if "32G" in title and gen == 5 and freq == 5600 and "雙通16G" not in title:
        #if price >= 20000 and price <= 25000:
        if price >= 15000:
            allData += [(weight, price, ram, ssd, cpu, size, title)]
    print(tabulate(sorted(allData, key=lambda row: tuple(row)), tablefmt="plain"))
processNotebook(parsePage("https://www.coolpc.com.tw/eachview.php?IGrp=2"))