In [3]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
import time
import uuid
from uuid import UUID
import json


In [6]:
driver = webdriver.Chrome(service = Service('./chromedriver'))
driver.get("https://www.depop.com")
    
# class UUIDEncoder(json.JSONEncoder):
#     def default(self, obj):
#         if isinstance(obj, UUID):
#             # if the obj is uuid, we simply return the value of uuid
#             return obj.hex
#         return json.JSONEncoder.default(self, obj)

class Scrapper:

    def __init__(self):
        
        try:
            driver.find_element(by=By.CSS_SELECTOR, value="button[class='sc-kEqXSa sc-iqAclL sc-ciSkZP hQtFsL cmWQHQ exduyW']").click()
        except NoSuchElementException:
            print("no (accept cookies button) found")

    def nav_by_search(self, search_item):
        search_url = "https://www.depop.com/search/?q="+search_item
        driver.get(search_url)

    def nav_by_shop(self, shop_name):
        shop_url = "https://www.depop.com/"+shop_name
        driver.get(shop_url)

    def header_url_list(self):
        header_url = []
        top_level_elements = driver.find_elements(by=By.CLASS_NAME, value="styles__NavigationItem-sc-__sc-10mkzda-3")
        for i in top_level_elements:
            try:
                child_level_elements = i.find_elements(by=By.XPATH, value=".//div/ul/li")
                for j in child_level_elements:
                    child_nav_option = j.find_element(by=By.XPATH, value=".//a").get_attribute("href")
                    header_url.append(child_nav_option)
            except NoSuchElementException:
                pass
        return header_url


    def listing_url(self, listing_no):
        listing = driver.find_elements(by=By.CLASS_NAME, value="styles__ProductCardContainer-sc-__sc-13q41bc-8")
        listing_url = listing[listing_no].find_element(by=By.XPATH, value=".//a").get_attribute("href")
        return listing_url

    def scroll_to_bottom(self):
        webpage_height = driver.execute_script("return document.body.scrollHeight")
        current_height = 0
        while current_height <= webpage_height:
            driver.execute_script("window.scrollTo(0,"+str(current_height)+")")
            current_height += 30

    def back_page(self):
        driver.execute_script("window.history.go(-1)")

    def open_url_new_tab(self, url):
        driver.execute_script("window.open('" + url + "');")

    def close_tab(self):
        driver.close()

    def switch_tab(self, tab_no):
        driver.switch_to.window(driver.window_handles[tab_no])

    def get_shop_data(self):
        data_dictionary={}

        username = driver.find_element(by=By.CLASS_NAME, value="styles__UserName-sc-__r941b9-4").get_attribute("innerText")
        data_dictionary.update({"Username": username})
        items_sold = driver.find_element(by=By.XPATH, value="//*[@id='main']/div[1]/div[1]/div/div[2]/div[1]/p").get_attribute("innerText")
        data_dictionary.update({"Items Sold": items_sold})
        last_activity = driver.find_element(by=By.XPATH, value='//*[@id="main"]/div[1]/div[1]/div/div[2]/div[2]/p').get_attribute("innerText")
        data_dictionary.update({"Last Activity": last_activity})
        followers = driver.find_element(by=By.XPATH, value='//*[@id="main"]/div[1]/div[2]/button[1]/p[1]').get_attribute("innerText")
        data_dictionary.update({"Followers": followers})
        following = driver.find_element(by=By.XPATH, value='//*[@id="main"]/div[1]/div[2]/button[2]/p[1]').get_attribute("innerText")
        data_dictionary.update({"Following": following})
        try:
            bio_text = driver.find_element(by=By.XPATH, value='//*[@id="main"]/div[1]/div[3]/p').get_attribute("innerText")
        except NoSuchElementException:
            bio_text = "None"
        data_dictionary.update({"Bio Description":bio_text})
        return data_dictionary        

    def product_availability(self):
        try:
            driver.find_element(by=By.CSS_SELECTOR, value="button.egHolT[color='yellow']")
            sold = True
        except NoSuchElementException:
            sold = False
        print("Product sold? "+ str(sold))
        return sold


    def get_product_page_data(self):

        data_dictionary = {}

        data_dictionary.update({"Product ID": driver.current_url})
        data_dictionary.update({"UUID": str(uuid.uuid4())})
        shop_name = driver.find_element(by=By.CSS_SELECTOR, value="a[data-testid='bio__username']").get_attribute("innerText")
        data_dictionary.update({"Shop Name": shop_name})
        postcode = driver.find_element(by=By.XPATH, value="//*[@id='main']/div[1]/div[3]/div/div[1]/div[1]/div[1]/div/p").get_attribute("innerText")
        data_dictionary.update({"Location": postcode})

        # rating = 0
        # for i in range(1, 5):

        #     star = driver.find_element(by=By.XPATH, value="//*[@id='feedback-star-" + str(i) + "-19262048']/title").get_attribute("innerText")
        #     if star == "Full Star":
        #         rating += 1
        #     elif star == "Half Star":
        #         rating += 0.5
        #     elif star == "Empty Star":
        #         rating += 0
        
        review_num = driver.find_element(by=By.XPATH, value="//*[@id='main']/div[1]/div[3]/div/div[1]/div[1]/div[1]/div/button/p").get_attribute("innerText")
        data_dictionary.update({"No. of Reviews": review_num})
        sold_items = driver.find_element(by=By.XPATH, value="//*[@id='main']/div[1]/div[3]/div/div[1]/div[1]/div[2]/div[1]/p").get_attribute("innerText")
        data_dictionary.update({"No. of Items Sold": sold_items})
        activity = driver.find_element(by=By.XPATH, value="//*[@id='main']/div[1]/div[3]/div/div[1]/div[1]/div[2]/div[2]/p").get_attribute("innerText")
        data_dictionary.update({"Last Active Date": activity})

        try:
            likes_num = driver.find_element(by=By.XPATH, value="//*[@id='main']/div[1]/div[3]/div/div[1]/div[2]/span/b").get_attribute("innerText")
        except NoSuchElementException:
            likes_num = "0"
        data_dictionary.update({"No. of Likes": likes_num})

        price = driver.find_element(by=By.XPATH, value="//*[@id='main']/div[1]/div[3]/div/div[2]/div[1]/div/p").get_attribute("innerText")
        data_dictionary.update({"Price": price})
        item_description = driver.find_element(by=By.XPATH, value="//*[@id='main']/div[1]/div[3]/div/div[3]/p").get_attribute("innerText")
        data_dictionary.update({"Item Description": item_description})
        last_refresh = driver.find_element(by=By.XPATH, value="//*[@id='main']/div[1]/div[3]/div/div[3]/div/time").get_attribute("innerText")
        data_dictionary.update({"Last Update": last_refresh})

        try:
            one_size = driver.find_element(by=By.CSS_SELECTOR, value="tr[data-testid='product__singleSize']")
            size = one_size.find_element(by=By.XPATH, value=".//td").get_attribute("innerText")

        except NoSuchElementException:
            size = "Multiple sizes"
        data_dictionary.update({"Sizes Available": size})

        try:
            brand = driver.find_element(by=By.CSS_SELECTOR, value="a[data-testid='product__brand']").get_attribute("innerText")
        except NoSuchElementException:
            brand = "None"
        data_dictionary.update({"Brand": brand})

        condition = driver.find_element(by=By.CSS_SELECTOR, value="td[data-testid='product__condition']").get_attribute("innerText")
        data_dictionary.update({"Item Condition": condition})

        try:
            colour = driver.find_element(by=By.CSS_SELECTOR, value="td[data-testid='product__colour']").get_attribute("innerText")
        except NoSuchElementException:
            colour = "None"
        data_dictionary.update({"Colour": colour})

        try:
            style_tag = driver.find_element(by=By.CSS_SELECTOR, value="td[data-testid='selected__styles']").get_attribute("innerText")
        except NoSuchElementException:
            style_tag = "None"
        data_dictionary.update({"Style": style_tag})

        img_urls = []
        image_elements = driver.find_elements(by=By.CSS_SELECTOR, value="img[class='LazyLoadImage__StyledImage-sc-__bquzot-1 doaiRN styles__LazyImage-sc-__sc-1fk4zep-9 hRpLaq']")
        for image_element in image_elements:
            img_url = image_element.get_attribute("src")
            if img_url not in img_urls:
                img_urls.append(img_url)
        data_dictionary.update({"Image Urls": img_urls})

        # print(rating)
        return data_dictionary

    def scrape_listing(self, number_of_listing):
        for i in range(number_of_listing):
            if ((i)%24 == 0) and i !=0:
                self.scroll_to_bottom()
                time.sleep(1)
            listing_url = self.listing_url(i)
            self.open_url_new_tab(listing_url)
            self.switch_tab(1)
            self.scroll_to_bottom()
            scraped_data = self.get_product_page_data()
            self.add_data(scraped_data, "./raw_data/data.json", "Test_ShopData")
            self.close_tab()
            self.switch_tab(0)

    def create_json_file(self, filepath, main_dictionaries,):
        output ={}
        for dictionary in main_dictionaries:
            output[dictionary]=[]
        with open(filepath, "w") as outfile:
            json.dump(output, outfile)

    def add_data(self, new_data, filepath, main_dictionary):
        with open(filepath, 'r+') as file:
            file_data = json.load(file)
            file_data[main_dictionary].append(new_data)
            file.seek(0)
            json.dump(file_data, file)







        


if __name__ == "__main__":
    Scrapper()
    # Scrapper.nav_by_search(Scrapper, "top")
    # Scrapper.nav_by_url(Scrapper, Scrapper.header_url_list(Scrapper)[1])
    # Scrapper.scroll_to_bottom(Scrapper)
    # Scrapper.back_page(Scrapper)
    # Scrapper.open_url_new_tab(Scrapper, Scrapper.nav_listing(Scrapper, 2))
    # Scrapper.switch_tab(Scrapper, 0)
    # Scrapper.close_tab(Scrapper)

bot = Scrapper()
bot.nav_by_shop("robinrebecca")
print("Current Page Title is : %s" %driver.title)
bot.create_json_file("./raw_data/data.json", ["HI"])
bot.scrape_listing(30)

Current Page Title is : 🍭Robin Rebecca🍭's Shop - Depop


In [360]:
print(str(True))



True


In [365]:
driver = webdriver.Chrome(service = Service('./chromedriver'))
driver.get("https://www.depop.com/robinrebecca")

bot = Scrapper()
print("Current Page Title is : %s" %driver.title)
# <p data-testid="username" type="caption1" class="sc-jrsJWt styles__UserName-sc-__r941b9-4 eoZhdh dYxTjP">@robinrebecca</p>
# print(driver.find_element(by=By.CLASS_NAME, value="styles__FullName-sc-__r941b9-3").get_attribute("innerText"))
print(driver.find_element(by=By.CLASS_NAME, value="styles__UserName-sc-__r941b9-4").get_attribute("innerText"))
print(driver.find_element(by=By.XPATH, value="//*[@id='main']/div[1]/div[1]/div/div[2]/div[1]/p").get_attribute("innerText"))
print(driver.find_element(by=By.XPATH, value='//*[@id="main"]/div[1]/div[1]/div/div[2]/div[2]/p').get_attribute("innerText"))
print(driver.find_element(by=By.XPATH, value='//*[@id="main"]/div[1]/div[2]/button[1]/p[1]').get_attribute("innerText"))
print(driver.find_element(by=By.XPATH, value='//*[@id="main"]/div[1]/div[2]/button[2]/p[1]').get_attribute("innerText"))
print(driver.find_element(by=By.XPATH, value='//*[@id="main"]/div[1]/div[3]/p').get_attribute("innerText"))





# //*[@id="main"]/div[1]/div[1]/div/div[2]/div[2]/p

Current Page Title is : 🍭Robin Rebecca🍭's Shop - Depop
@robinrebecca
9461 sold
Active today
32K
157
✅ Instant buy is on ❌ No returns/refunds 🇬🇧 Delivery 1-4 working days 🌎 Worldwide shipping available 💌 Message for 1st class


In [384]:
dict={"name": "hi", "we":"no"}

with open("./raw_data/data.json", "r+") as file:
    file_data = json.load(file)
    file_data[]

In [385]:
dict = {}
dict["hi"] = 0
print(dict)

{'hi': 0}


{'hi': []}
{'hi': [], 'bye': []}


In [216]:
for i in range(1, 5):
    a = "/html/body/div/div/div/div[1]/div[3]/div/div[1]/div[1]/div[1]/div/button/span/svg["+ str(i) +"]/title"
    print(a)

/html/body/div/div/div/div[1]/div[3]/div/div[1]/div[1]/div[1]/div/button/span/svg[1]/title
/html/body/div/div/div/div[1]/div[3]/div/div[1]/div[1]/div[1]/div/button/span/svg[2]/title
/html/body/div/div/div/div[1]/div[3]/div/div[1]/div[1]/div[1]/div/button/span/svg[3]/title
/html/body/div/div/div/div[1]/div[3]/div/div[1]/div[1]/div[1]/div/button/span/svg[4]/title
