# Web scraping to gather data: Women's New Collection

> - Using the smallable website, men's and women's products will be scraped to prepare for creating a clothing and accessories recommendation.

In [1]:
# try to webscrap smallable
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
from time import sleep
import random

In [2]:
# women's new collection
w_new_pages = range(1,7)

In [3]:
w_new = f"https://www.smallable.com/en/fashion/adult/women?_collection=new-collection&_page="

In [4]:
# dataframe where all results will be stored
womens_new = pd.DataFrame()

# iteration for webscrapping multiple pages
for page in w_new_pages: #change pages on the link
    r = requests.get(f"https://www.smallable.com/en/fashion/adult/women?_collection=new-collection&_page={page}")
    print(f'Response for page {page} :', r.status_code)
    soup = BeautifulSoup(r.content, 'html.parser')
    
    #empty lists
    link = []
    img = []
    tags = []
    brand = []
    product_color = []
    price_euro = []

    #scrapping infos from site
    for i in soup.find_all('a', attrs={"class": "ProductCard_content__fBfLV"}):
        link.append(i.get("href")) #link to product
        img.append(i.select('img')[1].get("src")) # photo link
        tags.append(i.find("div", attrs = {"class" : "ProductTags_container__3cc_M"}).get_text(strip = True, separator = "|"))

    for i in soup.find_all('ul', attrs={"class": "ProductCard_attr__2e2YT"}):
        brand.append(i.select("li")[0].get_text(strip = True)) #brand name
        product_color.append(i.select("li")[1].get_text(strip = True)) #product | color
        price_euro.append(i.select("li")[2].get_text(strip = True, separator = "|")) #price
    
    new_df = pd.DataFrame(list(zip(link, img, tags, brand, product_color, price_euro)),columns=("link", "img", "tags", "brand", "product_color", "price_euro"))
    # store everything in the previously prepared df and concatenate new results from new webpages
    womens_new = pd.concat([womens_new, new_df])
    
    wait_time = random.randint(2,7)
    print("I will sleep for " + str(wait_time) + " seconds.")
    sleep(wait_time) #Script will be stopped for a period of time ranging from 2 to 7 seconds

Response for page 1 : 200
I will sleep for 3 seconds.
Response for page 2 : 200
I will sleep for 7 seconds.
Response for page 3 : 200
I will sleep for 4 seconds.
Response for page 4 : 200
I will sleep for 4 seconds.
Response for page 5 : 200
I will sleep for 7 seconds.
Response for page 6 : 200
I will sleep for 7 seconds.


In [5]:
womens_new.to_csv('womens_new.csv', index=False)

In [6]:
womens_new.duplicated().sum()

0

In [7]:
womens_new

Unnamed: 0,link,img,tags,brand,product_color,price_euro
0,/en/product/racine-earrings-gold-elise-tsikis-...,https://static.smallable.com/1051694-648x648q8...,Promotion,Elise Tsikis,Racine Earrings | Gold,€164.50|€235.00|-30%
1,/en/product/bracelet-gold-5-octobre-228172,https://static.smallable.com/1258753-648x648q8...,New,5 Octobre,Bracelet | Gold,€110.00
2,/en/product/tencel-lite-t-shirt-dusty-pink-org...,https://static.smallable.com/1217107-648x648q8...,New|Greenable,Organic Basics,Tencel Lite T-Shirt | Dusty Pink,€55.00
3,/en/product/amma-earrings-green-5-octobre-228168,https://static.smallable.com/1258743-648x648q8...,New,5 Octobre,Amma Earrings | Green,€260.00
4,/en/product/olympe-ring-gold-alix-d-reynis-227923,https://static.smallable.com/1566300-648x648q8...,Promotion,Alix D. Reynis,Olympe Ring | Gold,€90.00|€150.00|-40%
...,...,...,...,...,...,...
32,/en/product/nina-long-dress-woman-collection-w...,https://static.smallable.com/997559-648x648q80...,Greenable,Numero 74,Nina Long Dress - Woman Collection | White S001,€85.00|7 colours
33,/en/product/nina-long-dress-woman-collection-d...,https://static.smallable.com/1000210-648x648q8...,Greenable,Numero 74,Nina Long Dress - Woman Collection | Dusty P...,€85.00|7 colours
34,/en/product/nina-long-dress-woman-collection-d...,https://static.smallable.com/997547-648x648q80...,Greenable,Numero 74,Nina Long Dress - Woman Collection | Dark Gr...,€85.00|7 colours
35,/en/product/nina-long-dress-woman-collection-p...,https://static.smallable.com/1000300-648x648q8...,Greenable,Numero 74,Nina Long Dress - Woman Collection | Powder ...,€85.00|7 colours


In [8]:
# code adapted from Cormac and Isi
def request_link(link):
    try:
        request = requests.get(link)
        request.raise_for_status()  # returns an HTTPError if the response is not OK
        print("Success! Response code", request.status_code) 
    except requests.exceptions.HTTPError as err:
        if request.status_code == 404:
            print("404: Oops, sorry we can't find that page!")
        else:
            print("The error code is", err.args[0]) # look up the 1st argument from HTTPError 
    return request

In [9]:
w = request_link(w_new)

Success! Response code 200


In [10]:
soup = BeautifulSoup(w.content, "html.parser")

In [11]:
for item in soup.find_all('a', attrs={"class": "ProductCard_content__fBfLV"}):
        print(item.get("href")) #link to product
        print(item.select('img')[1].get("src")) # photo link
        print(item.find("div", attrs = {"class" : "ProductTags_container__3cc_M"}).get_text(strip = True, separator = "|"))

/en/product/racine-earrings-gold-elise-tsikis-169683
https://static.smallable.com/1051694-648x648q80/racine-earrings.jpg
Promotion
/en/product/bracelet-gold-5-octobre-228172
https://static.smallable.com/1258753-648x648q80/bracelet.jpg
New
/en/product/tencel-lite-t-shirt-dusty-pink-organic-basics-220748
https://static.smallable.com/1217107-648x648q80/tencel-lite-t-shirt.jpg
New|Greenable
/en/product/amma-earrings-green-5-octobre-228168
https://static.smallable.com/1258743-648x648q80/amma-earrings.jpg
New
/en/product/olympe-ring-gold-alix-d-reynis-227923
https://static.smallable.com/1566300-648x648q80/olympe-ring.jpg
Promotion
/en/product/josephine-ring-blue-alix-d-reynis-227919
https://static.smallable.com/1302898-648x648q80/josephine-ring.jpg
Promotion
/en/product/tencel-lite-bra-light-blue-organic-basics-209482
https://static.smallable.com/1177538-648x648q80/tencel-lite-bra.jpg
Promotion
/en/product/shanga-necklace-white-soko-248333
https://static.smallable.com/1692469-648x648q80/.jpg

In [12]:
for i in soup.find_all('ul', attrs={"class": "ProductCard_attr__2e2YT"}):
        print(i.select("li")[0].get_text(strip = True)) #brand name
        print(i.select("li")[1].get_text(strip = True)) #product | color
        print(i.select("li")[2].get_text(strip = True, separator = "|")) #price

Elise Tsikis
Racine Earrings | Gold
€164.50|€235.00|-30%
5 Octobre
Bracelet | Gold
€110.00
Organic Basics
Tencel Lite T-Shirt  | Dusty Pink
€55.00
5 Octobre
Amma Earrings  | Green
€260.00
Alix D. Reynis
Olympe Ring  | Gold
€90.00|€150.00|-40%
Alix D. Reynis
Joséphine Ring  | Blue
€119.00|€170.00|-30%
Organic Basics
Tencel Lite Bra  | Light blue
€38.50|€55.00|-30%|5 colours
Soko
Shanga Necklace | White
€58.10|€83.00|-30%
Albertine
Jamie Bikini Top | Black
€76.00
Nonchalance
Silk Eye Mask | Mustard
€55.00|5 colours
Organic Basics
Active Leggings | Burgundy
€56.00|€80.00|-30%
Made in Tomboy
Felisia Jeans | Navy blue
€162.00|€324.00|-50%|3 colours
Poudre Organic
Pomelo Trousers - Women's Collection  | Brown
€75.00
Poudre Organic
Camisa Organic Cotton Shirt - Women’s Collection  | Black
€83.00
Longlivethequeen
Longlivethequeen x Smallable Collaboration  - Sweatshirt - Women's Collection  | Orange
€49.50|€99.00|-50%
Cotton Citizen
Brooklyn Shorts | Mauve
€94.80|€158.00|-40%|5 colours
Patagon