# Web scraping to gather data: Men's SS Collection

> - Using the smallable website, men's and women's products will be scraped to prepare for creating a clothing and accessories recommendation.

In [5]:
# try to webscrap smallable
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
from time import sleep
import random

In [6]:
# men's ss collection
m_ss_pages = range(1,28)

In [7]:
m_ss = f"https://www.smallable.com/en/fashion/adult/man?_collection=spring-summer-collection&_page="

In [8]:
# dataframe where all results will be stored
mens_ss = pd.DataFrame()

# iteration for webscrapping multiple pages
for page in m_ss_pages: #change pages on the link
    r = requests.get(f"https://www.smallable.com/en/fashion/adult/man?_collection=spring-summer-collection&_page={page}")
    print(f'Response for page {page} :', r.status_code)
    soup = BeautifulSoup(r.content, 'html.parser')
    
    #empty lists
    link = []
    img = []
    tags = []
    brand = []
    product_color = []
    price_euro = []

    #scrapping infos from site
    for i in soup.find_all('a', attrs={"class": "ProductCard_content__fBfLV"}):
        link.append(i.get("href")) #link to product
        img.append(i.select('img')[1].get("src")) # photo link
        tags.append(i.find("div", attrs = {"class" : "ProductTags_container__3cc_M"}).get_text(strip = True, separator = "|"))

    for i in soup.find_all('ul', attrs={"class": "ProductCard_attr__2e2YT"}):
        brand.append(i.select("li")[0].get_text(strip = True)) #brand name
        product_color.append(i.select("li")[1].get_text(strip = True)) #product | color
        price_euro.append(i.select("li")[2].get_text(strip = True, separator = "|")) #price
    
    new_df = pd.DataFrame(list(zip(link, img, tags, brand, product_color, price_euro)),columns=("link", "img", "tags", "brand", "product_color", "price_euro"))
    # store everything in the previously prepared df and concatenate new results from new webpages
    mens_ss = pd.concat([mens_ss, new_df])
    
    wait_time = random.randint(2,7)
    print("I will sleep for " + str(wait_time) + " seconds.")
    sleep(wait_time) #Script will be stopped for a period of time ranging from 2 to 7 seconds

Response for page 1 : 200
I will sleep for 7 seconds.
Response for page 2 : 200
I will sleep for 3 seconds.
Response for page 3 : 200
I will sleep for 3 seconds.
Response for page 4 : 200
I will sleep for 7 seconds.
Response for page 5 : 200
I will sleep for 3 seconds.
Response for page 6 : 200
I will sleep for 4 seconds.
Response for page 7 : 200
I will sleep for 7 seconds.
Response for page 8 : 200
I will sleep for 2 seconds.
Response for page 9 : 200
I will sleep for 2 seconds.
Response for page 10 : 200
I will sleep for 6 seconds.
Response for page 11 : 200
I will sleep for 5 seconds.
Response for page 12 : 200
I will sleep for 5 seconds.
Response for page 13 : 200
I will sleep for 2 seconds.
Response for page 14 : 200
I will sleep for 2 seconds.
Response for page 15 : 200
I will sleep for 4 seconds.
Response for page 16 : 200
I will sleep for 2 seconds.
Response for page 17 : 200
I will sleep for 3 seconds.
Response for page 18 : 200
I will sleep for 2 seconds.
Response for page 1

In [9]:
mens_ss.to_csv('mens_ss.csv', index=False)

In [10]:
mens_ss.duplicated().sum()

0

In [11]:
mens_ss

Unnamed: 0,link,img,tags,brand,product_color,price_euro
0,/en/product/adam-oxford-shirt-white-wood-wood-...,https://static.smallable.com/1524716-648x648q8...,Sale|Greenable,Wood Wood,Adam Oxford Shirt | White,€75.00|€125.00|-40%
1,/en/product/shorts-red-bather-319655,https://static.smallable.com/1682764-648x648q8...,New|Greenable,Bather,Shorts | Red,€85.00|4 colours
2,/en/product/cap-light-blue-bather-319662,https://static.smallable.com/1682773-648x648q8...,New,Bather,Cap | Light blue,€60.00
3,/en/product/plain-recycled-swim-shorts-navy-bl...,https://static.smallable.com/1682733-648x648q8...,New|Greenable,Bather,Plain Recycled Swim Shorts | Navy blue,€90.00|6 colours
4,/en/product/casquette-sunflare-trucker-black-d...,https://static.smallable.com/1702026-648x648q8...,Promotion,Deus Ex Machina,Casquette Sunflare Trucker | Black,€23.40|€39.00|-40%|3 colours
...,...,...,...,...,...,...
9,/en/product/waterproof-cape-green-rains-197875,https://static.smallable.com/1125687-648x648q8...,,Rains,Waterproof Cape | Green,€95.00
10,/en/product/d-screen-glasses-red-izipizi-93477,https://static.smallable.com/746636-648x648q80...,,IZIPIZI,#D Screen Glasses | Red,€40.00|4 colours
11,/en/product/c-mirror-glass-sunglasses-brown-iz...,https://static.smallable.com/746569-648x648q80...,,IZIPIZI,#C Mirror Glass Sunglasses | Brown,€40.00
12,/en/product/c-sunglasses-red-izipizi-93427,https://static.smallable.com/746626-648x648q80...,,IZIPIZI,#C Sunglasses | Red,€40.00|6 colours
