## MySlabs Scraper

In [1]:
import requests
import urllib
import numpy as np
import pandas as pd
from scipy import stats
from selenium import webdriver
from bs4 import BeautifulSoup as bs

In [2]:
pages = 1840

In [112]:
def get_price(item):
    text = getattr(item.find('div', {'class': 'slab-feed-label'}), 'text', None).strip()
    text_list = str(text).split('-')
    return text_list[0].replace("$", '').replace(',', '').strip()

def get_sold_date(item):
    text = getattr(item.find('div', {'class': 'slab-feed-label'}), 'text', None).strip()
    text_list = str(text).split('-')
    return text_list[1].strip() if text_list[1] is not None else None

In [113]:
class MySlabsScraper:
    def __init__(self, pages) -> None:
        self.pages = pages

    def href_builder(self):
        url = 'https://myslabs.com/browse/archive/?page='
        links = []

        for i in range(self.pages):
            links.append(url + str(i + 1))
        return links

    def get_data(self):

        soups = []
        links = self.href_builder()

        for link in links:
            page = requests.get(link)
            soup = bs(page.text, 'html.parser')
            soups.append(soup)

        return soups

    def parse(self):

        soups = self.get_data()
        results = []
        product_list = []

        for soup in soups:
            results.extend(soup.find_all('div', {'class': 'slab_item psa'}))

        for item in results:
            print("Item: " + str(results.index(item) + 1) + ' of ' + str(len(results)))
            products = {
                'link': 'https://myslabs.com' + item.find('a')['href'],
                'soldprice': get_price(item),
                'solddate': get_sold_date(item),
            }
            product_list.append(products)
        return product_list

    def output(self):
        products = self.parse()
        df = pd.DataFrame(products)
        df['link'] = df['link'].astype(str)
        df['soldprice'] = pd.to_numeric(df['soldprice'])
        df['solddate'] = pd.to_datetime(df['solddate'])
        return df

In [114]:
slabs_test = MySlabsScraper(100)
out = slabs_test.output()

Item: 1 of 2400
Item: 2 of 2400
Item: 3 of 2400
Item: 4 of 2400
Item: 5 of 2400
Item: 6 of 2400
Item: 7 of 2400
Item: 8 of 2400
Item: 9 of 2400
Item: 10 of 2400
Item: 11 of 2400
Item: 12 of 2400
Item: 13 of 2400
Item: 14 of 2400
Item: 15 of 2400
Item: 16 of 2400
Item: 17 of 2400
Item: 18 of 2400
Item: 19 of 2400
Item: 20 of 2400
Item: 21 of 2400
Item: 22 of 2400
Item: 23 of 2400
Item: 24 of 2400
Item: 25 of 2400
Item: 26 of 2400
Item: 27 of 2400
Item: 28 of 2400
Item: 29 of 2400
Item: 30 of 2400
Item: 31 of 2400
Item: 32 of 2400
Item: 33 of 2400
Item: 34 of 2400
Item: 35 of 2400
Item: 36 of 2400
Item: 37 of 2400
Item: 38 of 2400
Item: 39 of 2400
Item: 40 of 2400
Item: 41 of 2400
Item: 42 of 2400
Item: 43 of 2400
Item: 44 of 2400
Item: 45 of 2400
Item: 46 of 2400
Item: 47 of 2400
Item: 48 of 2400
Item: 49 of 2400
Item: 50 of 2400
Item: 51 of 2400
Item: 52 of 2400
Item: 53 of 2400
Item: 54 of 2400
Item: 55 of 2400
Item: 56 of 2400
Item: 57 of 2400
Item: 58 of 2400
Item: 59 of 2400
Item: 