In [7]:
# Install packages

!pip install requests
!pip install beautifulsoup4
!pip install pandas
!pip install matplotlib
!pip install seaborn
!pip install plotly
!pip install selenium

Collecting selenium
  Downloading selenium-4.32.0-py3-none-any.whl.metadata (7.5 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.30.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)
Collecting websocket-client~=1.8 (from selenium)
  Using cached websocket_client-1.8.0-py3-none-any.whl.metadata (8.0 kB)
Collecting attrs>=23.2.0 (from trio~=0.17->selenium)
  Using cached attrs-25.3.0-py3-none-any.whl.metadata (10 kB)
Collecting sortedcontainers (from trio~=0.17->selenium)
  Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl.metadata (10 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting sniffio>=1.3.0 (from trio~=0.17->selenium)
  Using cached sniffio-1.3.1-py3-none-any.whl.metadata (3.9 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Downloading wsproto-1.2.0-py

In [29]:
# Import packages

import requests
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from bs4 import BeautifulSoup
import plotly.express as px
import plotly.graph_objects as go
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time
import re
from urllib.parse import urlencode

In [33]:
# Create filters dictionary

size_ids = {'XXS': 1226, 'XS': 2, 'S': 3, 'M': 4, 'L': 5, 'XL': 6, 'XXL': 7, 'XXXL': 310}

In [22]:

def parse_alt_text(alt_text):
    # Initialize a dictionary to hold the extracted details
    item_details = {}

    # Extract the title (text before the first comma)
    title_match = re.match(r'^(.*?)(?=,)', alt_text)
    item_details['title'] = title_match.group(1).strip() if title_match else None

    # Extract brand
    brand_match = re.search(r'brand:\s*([^,]+)', alt_text, re.IGNORECASE)
    item_details['brand'] = brand_match.group(1).strip() if brand_match else None

    # Extract condition
    condition_match = re.search(r'condition:\s*([^,]+)', alt_text, re.IGNORECASE)
    item_details['condition'] = condition_match.group(1).strip() if condition_match else None

    # Extract size
    size_match = re.search(r'size:\s*([^,]+)', alt_text, re.IGNORECASE)
    item_details['size'] = size_match.group(1).strip() if size_match else None

    # Extract item price
    price_match = re.search(r'£\d+\.\d{2}(?=,)', alt_text)
    item_details['price'] = price_match.group(0) if price_match else None

    # Extract total price including buyer protection
    total_price_match = re.search(r'£\d+\.\d{2}\s*includes Buyer Protection', alt_text)
    item_details['total_price'] = total_price_match.group(0) if total_price_match else None

    return item_details

In [70]:
def search_vinted_with_metadata(query, max_pages=1, filters=None, get_description=True):
    options = Options()
    options.add_argument("--headless=new")
    options.add_argument("--disable-gpu")
    options.add_argument("--window-size=1920,1080")
    driver = webdriver.Chrome(options=options)

    base_url = "https://www.vinted.co.uk/catalog"

    for page in range(1, max_pages + 1):
        params = {
            "search_text": query,
            "page": page
        }

        # Add additional filters to the parameters
        if filters:
            params.update(filters)

        # Construct the URL with parameters
        url = base_url + "?" + urlencode(params, doseq=True)
        driver.get(url)
        time.sleep(5)  # Wait for JS

        soup = BeautifulSoup(driver.page_source, 'html.parser')
        items = soup.find_all("div", class_="feed-grid__item")

        # Initialize an empty list to hold the extracted details
        results = []

        if not items:
            print(f"No items found on page {page}")
            continue

        for item in items:
            # Extract the item details
            link_tag = item.find("a", href=True)
            img_tag = item.find("img")
            item_url = f"{link_tag['href']}" if link_tag else "N/A"
            image_url = img_tag["src"] if img_tag else "N/A"
            alt_text = img_tag["alt"] if img_tag and "alt" in img_tag.attrs else "N/A"
            parsed_details = parse_alt_text(alt_text)

            # Go to detail page for each item to extract seller description
            seller_description = "N/A"
            if get_description:
                if item_url != "N/A":
                    cleaned_url = item_url.strip()
                    try:
                        driver.get(cleaned_url)
                        time.sleep(2)
                        detail_soup = BeautifulSoup(driver.page_source, 'html.parser')
                        desc_span = detail_soup.find(
                            "span",
                            class_="web_ui__Text__text web_ui__Text__body web_ui__Text__left web_ui__Text__format"
                        )
                        if desc_span:
                            seller_description = desc_span.get_text(strip=True)
                    except Exception as e:
                        print(f"Error loading {cleaned_url}: {e}")
                        seller_description = "Error"
            parsed_details['seller_description'] = seller_description

            # Append the urls to the parsed details dictionary
            parsed_details['item_url'] = item_url
            parsed_details['image_url'] = image_url
            results.append(parsed_details)

            # Print all items from parsed details dictionary
            print(f"Title: {parsed_details.get('title')}")
            print(f"Brand: {parsed_details.get('brand')}")
            print(f"Condition: {parsed_details.get('condition')}")
            print(f"Size: {parsed_details.get('size')}")
            print(f"Item Price: {parsed_details.get('price')}")
            print(f"Total Price: {parsed_details.get('total_price')}")
            print(f"Image: {image_url}")
            print(f"Link: {item_url}")
            print(f"Seller Description: {seller_description}")
            print("-" * 60)

    driver.quit()

    return results


In [71]:
filters = {
    "size_ids[]": size_ids['XXS']
}
maxi_skirt_results = search_vinted_with_metadata("maxi skirt", max_pages=2, filters=filters, get_description=False)

Title: Gap Wrap Skirt
Brand: GAP
Condition: Very good
Size: XXXS / 2
Item Price: £4.00
Total Price: £4.90 includes Buyer Protection
Image: https://images1.vinted.net/t/03_01ce5_fqDV8XSr7m2m8tLWKzi2SoHC/310x430/1725804329.jpeg?s=f34a4e4ae0e60580121de276f28042a7bd70bf56
Link: https://www.vinted.co.uk/items/5006873918-gap-wrap-skirt?referrer=catalog
Seller Description: N/A
------------------------------------------------------------
Title: Ralph Lauren Floral Maxi Skirt
Brand: Ralph Lauren
Condition: Very good
Size: XXXS / 2
Item Price: £4.00
Total Price: £4.90 includes Buyer Protection
Image: https://images1.vinted.net/t/02_022d9_CpoAWQ2WnPP8DSNchNZxjY91/310x430/1741520746.jpeg?s=75c6db090a1e147d7740e61922f41694b498eb16
Link: https://www.vinted.co.uk/items/5937868845-ralph-lauren-floral-maxi-skirt?referrer=catalog
Seller Description: N/A
------------------------------------------------------------
Title: Neon pink coverup with underskirt h+m
Brand: H&M Divided
Condition: Very good
Size: 

In [72]:
# Create a DataFrame from the results
df = pd.DataFrame(maxi_skirt_results)
df

Unnamed: 0,title,brand,condition,size,price,total_price,seller_description,item_url,image_url
0,H&M skirt size 32 XXS XS S,H&M,New without tags,XXXS / 2,£2.00,£2.80 includes Buyer Protection,,https://www.vinted.co.uk/items/3365599756-hm-s...,https://images1.vinted.net/t/03_020b8_BEV6iz5G...
1,Next floral maxi skirt,Next,Very good,XXXS / 2,£6.00,£7.00 includes Buyer Protection,,https://www.vinted.co.uk/items/3480170608-next...,https://images1.vinted.net/t/01_016ff_B13TBgMF...
2,River island skirt girls,River Island,New without tags,XXXS / 2,£3.00,£3.85 includes Buyer Protection,,https://www.vinted.co.uk/items/3076684731-rive...,https://images1.vinted.net/t/03_0186f_AhfDd1Mg...
3,Monsoon Maxi occasion dress age 11,Monsoon,Good,XXXS / 2,£4.50,£5.43 includes Buyer Protection,,https://www.vinted.co.uk/items/4915214705-mons...,https://images1.vinted.net/t/01_00530_6eTjkhjr...
4,Mini skirt,New Look,Good,XXXS / 2,£3.00,£3.85 includes Buyer Protection,,https://www.vinted.co.uk/items/5897314580-mini...,https://images1.vinted.net/t/03_00503_NLqcifQX...
...,...,...,...,...,...,...,...,...,...
93,Roman stunning tweed look fkared skirt brand n...,ROMAN,New with tags,XXXS / 2,£10.00,£11.20 includes Buyer Protection,,https://www.vinted.co.uk/items/5905858088-roma...,https://images1.vinted.net/t/04_009f0_DYnu1m8d...
94,Long skirt with slit,Jeff Gallano,New without tags,XXXS / 2,£6.00,£7.00 includes Buyer Protection,,https://www.vinted.co.uk/items/1814106346-long...,https://images1.vinted.net/t/02_01ae8_sHbnXFU5...
95,Ted Baker Long Skirt 0,Ted Baker,Very good,XXXS / 2,£15.00,£16.45 includes Buyer Protection,,https://www.vinted.co.uk/items/4579704004-ted-...,https://images1.vinted.net/t/02_01144_E3mUGmd7...
96,Midi floral Skirt girls age 8-9,Marks & Spencer,New without tags,XXXS / 2,£6.50,£7.53 includes Buyer Protection,,https://www.vinted.co.uk/items/3016518674-midi...,https://images1.vinted.net/t/02_00766_6ubcmrNg...


In [73]:
# Data Cleaning

# Make all text lowercase
df['title'] = df['title'].str.lower()
df['brand'] = df['brand'].str.lower()
df['condition'] = df['condition'].str.lower()
df['size'] = df['size'].str.lower()
df['seller_description'] = df['seller_description'].str.lower()

# Convert numeric columns to float
df['price'] = df['price'].str.replace('£', '').astype(float)
df['total_price'] = df['total_price'].str.replace(' includes Buyer Protection', '')
df['total_price'] = df['total_price'].str.replace('£', '').astype(float)


In [None]:
# Display the cleaned DataFrame

df

Unnamed: 0,title,brand,condition,size,price,total_price,seller_description,item_url,image_url
0,h&m skirt size 32 xxs xs s,h&m,new without tags,xxxs / 2,2.0,2.80,,https://www.vinted.co.uk/items/3365599756-hm-s...,https://images1.vinted.net/t/03_020b8_BEV6iz5G...
1,next floral maxi skirt,next,very good,xxxs / 2,6.0,7.00,,https://www.vinted.co.uk/items/3480170608-next...,https://images1.vinted.net/t/01_016ff_B13TBgMF...
2,river island skirt girls,river island,new without tags,xxxs / 2,3.0,3.85,,https://www.vinted.co.uk/items/3076684731-rive...,https://images1.vinted.net/t/03_0186f_AhfDd1Mg...
3,monsoon maxi occasion dress age 11,monsoon,good,xxxs / 2,4.5,5.43,,https://www.vinted.co.uk/items/4915214705-mons...,https://images1.vinted.net/t/01_00530_6eTjkhjr...
4,mini skirt,new look,good,xxxs / 2,3.0,3.85,,https://www.vinted.co.uk/items/5897314580-mini...,https://images1.vinted.net/t/03_00503_NLqcifQX...
...,...,...,...,...,...,...,...,...,...
93,roman stunning tweed look fkared skirt brand n...,roman,new with tags,xxxs / 2,10.0,11.20,,https://www.vinted.co.uk/items/5905858088-roma...,https://images1.vinted.net/t/04_009f0_DYnu1m8d...
94,long skirt with slit,jeff gallano,new without tags,xxxs / 2,6.0,7.00,,https://www.vinted.co.uk/items/1814106346-long...,https://images1.vinted.net/t/02_01ae8_sHbnXFU5...
95,ted baker long skirt 0,ted baker,very good,xxxs / 2,15.0,16.45,,https://www.vinted.co.uk/items/4579704004-ted-...,https://images1.vinted.net/t/02_01144_E3mUGmd7...
96,midi floral skirt girls age 8-9,marks & spencer,new without tags,xxxs / 2,6.5,7.53,,https://www.vinted.co.uk/items/3016518674-midi...,https://images1.vinted.net/t/02_00766_6ubcmrNg...


In [82]:
# Export to CSV
df.to_csv('maxi_skirt_results.csv', index=False)