In [26]:
import requests
from bs4 import BeautifulSoup
import json
import logging as l

In [2]:
base_url = "https://foreignfortune.com"
page = requests.get(base_url)

In [3]:
soup = BeautifulSoup(page.content, 'lxml')

In [4]:
brand = soup.select_one("meta[property='og:site_name']")['content']
description = soup.select_one("meta[name='description']")['content']
links = soup.select("ul[id='SiteNav'] li a")

In [5]:
urls = []
for link in links:
    urls.append(base_url+link['href'])

In [6]:
urls

['https://foreignfortune.com/collections/men-unisex',
 'https://foreignfortune.com/collections/women',
 'https://foreignfortune.com/collections/kids',
 'https://foreignfortune.com/collections/coats-hats',
 'https://foreignfortune.com/collections/small-logo-embroidery-t-shirts-1',
 'https://foreignfortune.com/collections/frontpage',
 'https://foreignfortune.com/collections/foreign-accesories']

In [7]:
product_urls = []
for url in urls:
    temp_page = requests.get(url)
    temp_soup = BeautifulSoup(temp_page.content, 'lxml')
    product_links = temp_soup.select("div[id='Collection'] div div div a")
    for p_link in product_links:
        product_urls.append(base_url+p_link['href'])
    pages = temp_soup.select_one("li.pagination__text")
    if pages:
        page_len = pages.get_text().strip().split()[-1]
        for i in range(2,int(page_len)+1):
            temp_page = requests.get(url+'?page={}'.format(i))
            temp_soup = BeautifulSoup(temp_page.content, 'lxml')
            product_links = temp_soup.select("div[id='Collection'] div div div a")
            for p_link in product_links:
                product_urls.append(base_url+p_link['href'])

In [8]:
len(product_urls)

57

In [9]:
product_urls

['https://foreignfortune.com/collections/men-unisex/products/foreign-fortune-collection-joggers-1',
 'https://foreignfortune.com/collections/men-unisex/products/foreign-rovalf-outfit',
 'https://foreignfortune.com/collections/men-unisex/products/forign-luxury-tracksuits',
 'https://foreignfortune.com/collections/men-unisex/products/ff-coats-w-hats',
 'https://foreignfortune.com/collections/men-unisex/products/foreign-language-hoodie',
 'https://foreignfortune.com/collections/men-unisex/products/detroit-foreign-everybody-tees',
 'https://foreignfortune.com/collections/men-unisex/products/foreign-fortune-collection-short-sets',
 'https://foreignfortune.com/collections/men-unisex/products/long-socks-muti-color',
 'https://foreignfortune.com/collections/men-unisex/products/foreign-pattern-logo-socks',
 'https://foreignfortune.com/collections/men-unisex/products/foreign-fortune-socks-1',
 'https://foreignfortune.com/collections/men-unisex/products/long-foreign-socks',
 'https://foreignfortu

In [10]:
output = []
for p_url in product_urls:
    p_page = requests.get(p_url)
    p_soup = BeautifulSoup(p_page.content, 'lxml')
    
    temp = {}
    
    temp['brand'] = brand
    
    temp['description'] = description
        
    filter_containers = p_soup.select("div[class='selector-wrapper js product-form__item']")
    filter_dict = {}
    for con in filter_containers:
        filter_dict[con.select_one('label').get_text().lower().replace('\n', '').strip()] = []
        filter_values = con.select("select option")
        for value in filter_values:
            filter_dict[con.select_one('label').get_text().lower().replace('\n', '').strip()].append(value.get_text())       
    
    temp['models'] = {}
    
    if 'color' in filter_dict.keys():
        temp['models']['color'] = filter_dict['color']
    else:
        temp['models']['color'] = []
    
    if 'size' in filter_dict.keys():
        temp['models']['size'] = filter_dict['size']
    else:
        temp['models']['size'] = []
        
    temp['models']['variants'] = []
    
    variant_containers = p_soup.select("ul[class='grid grid--uniform product-single__thumbnails product-single__thumbnails-product-template'] li a")
    
    for v_con in variant_containers:
        var_dict = {'id':'', 'image':'', 'price':''}
        var_dict['id'] = v_con['data-thumbnail-id']
        var_dict['image'] = "https:"+v_con['href']
        var_dict['price'] = p_soup.select_one("span[id='ProductPrice-product-template']").get_text().replace('$','').replace('\n','').strip()
        temp['models']['variants'].append(var_dict)
    
    temp['price'] = p_soup.select_one("span[id='ProductPrice-product-template']").get_text().replace('$','').replace('\n','').strip()
    
    temp['sale_price'] = p_soup.select_one("span[id='ProductPrice-product-template']").get_text().replace('$','').replace('\n','').strip()
    
    temp['title'] = p_soup.select_one("h1.product-single__title").get_text()
    
    temp['url'] = p_url
    
    temp['product_id'] = p_url.split('/')[-1]
    
    output.append(temp)

In [11]:
len(output)

57

In [12]:
output

[{'brand': 'Foreign Fortune Clothing',
  'description': 'Foreign Fortune Clothing Is A Unisex Clothing Line That Provides Top Quality Products At Affordable Prices. We Also Do Customized Outfits And Wholesale Orders. We Take Pride In Great Customer Service! We Are Located In FairLane Mall ( Dearborn, Michigan ) On The 3rd Floor Next To Jimmy Jazz. Please Check Us Out :)',
  'models': {'color': ['Black', 'Green', 'Grey', 'Navy Blue', 'Red', 'Yellow'],
   'size': ['XS', 'M', 'L', 'XL', '2X', '3X', '4X', '5X'],
   'variants': [{'id': '32038042501313',
     'image': 'https://foreignfortune.com/cdn/shop/products/D30946DA-5D18-48D6-9890-63DC5DB36F77_1024x1024@2x.jpg?v=1647614117',
     'price': '180.00'},
    {'id': '31708542206145',
     'image': 'https://foreignfortune.com/cdn/shop/products/3EE2E126-721C-4DDE-A1EB-E3B355F2B674_1024x1024@2x.jpg?v=1647614117',
     'price': '180.00'},
    {'id': '31708542107841',
     'image': 'https://foreignfortune.com/cdn/shop/products/C8134772-8681-4EF4-

In [13]:
with open("output/foreign_fortune.json", "w") as json_file:
    json.dump(output, json_file, indent=4)

In [49]:
class Validator:
    
    def __init__(self, data):
        self.data = data
        
    def validate_price(self):
        if data['sale_price'] <= data['price']:
            print("Price is correct.")
        else:
            l.error("Price is not correct or unavailable.")
    
    def validate_mandatory_fields(self):
        if data['title'] != '':
            print("Title is available.")
        else:
            l.error("Title is mandatory. Please add title info in data.")
            
        if data['product_id'] != '':
            print("Product id is available.")
        else:
            l.error("Product id is mandatory. Please add product id in data.")
            
        if 'models' in data.keys() and data['models']['variants'][0]['id'] != '':
            print("Model id is available.")
        else:
            l.error("Model id is mandatory. Please add model id in data.")
            
    def validate_variants(self):
        if 'models' in data.keys() and data['models']['variants'][0]['image'] != '':
            print("Variant image is available.")
        else:
            l.error("Variant image is mandatory. Please add variant image url in data.")
            

In [50]:
data = output[0]

In [51]:
validator = Validator(data)
validator.validate_price()
validator.validate_mandatory_fields()
validator.validate_variants()

Price is correct.
Title is available.
Product id is available.
Model id is available.
Variant image is available.
