In [51]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import re
import json

## Read the URL of WEAR

In [2]:
coordinate_response = requests.get('https://wear.tw/coordinate/', headers={'User-Agent': 'Mozilla/5.0'})
coordinate_soup = BeautifulSoup(coordinate_response.text, 'html.parser')

## 1. All the different sub-category for Wear

- sex
- main category & sub-category
- color
- season
- height
- age
- hair style
- region

### 1.1 Get the Sex Category

In [3]:
sex_category = coordinate_soup.find("section", class_="sex")
sex_result =sex_category.find_all("li")

print(sex_result)

[<li class="current"><a class="icon_font over" href="/coordinate/">ALL</a></li>, <li><a class="icon_font over" href="/men-coordinate/">MEN</a></li>, <li><a class="icon_font over" href="/women-coordinate/">WOMEN</a></li>, <li><a class="icon_font over" href="/kids-coordinate/">KIDS</a></li>]


In [50]:
## create a dictionary to store all the sex
sex_dict = {}
for sex_line in sex_result:
    text = sex_line.find("a").getText()
    href = sex_line.find("a").get('href')
    sex_dict[text] = href.replace("-coordinate/", "").replace("/", "")

print(sex_dict)

{'ALL': 'coordinate', 'MEN': 'men', 'WOMEN': 'women', 'KIDS': 'kids'}


In [52]:
## save it as the json file
with open(r'C:\Users\user\Documents\Data Science\Project\1_OOTD_Image_Generator\Dataset\category_dict\sex_dict.json', 'w') as f:
    json.dump(sex_dict, f)

### 1.2 Get the Category

In [5]:
clothes_category = coordinate_soup.find("section", class_="category")
clothes_result =clothes_category.find_all("a", class_="icon_font")

print(clothes_result)


[<a class="icon_font" href="/category/tops/">上衣</a>, <a class="icon_font" href="/category/jacket-outerwear/">夾克／外套</a>, <a class="icon_font" href="/category/pants/">褲子</a>, <a class="icon_font" href="/category/allinone-salopette/">連體褲/背帶褲</a>, <a class="icon_font" href="/category/skirt/">裙子</a>, <a class="icon_font" href="/category/onepiece/">連衣裙/禮服裙</a>, <a class="icon_font" href="/category/suit/">正裝西服/小物</a>, <a class="icon_font" href="/category/bag/">包</a>, <a class="icon_font" href="/category/shoes/">鞋子</a>, <a class="icon_font" href="/category/fashion-accessories/">時尚配件</a>, <a class="icon_font" href="/category/wallet-accessory/">錢包/小物</a>, <a class="icon_font" href="/category/wrist-watch/">手錶</a>, <a class="icon_font" href="/category/hair-accessory/">頭飾</a>, <a class="icon_font" href="/category/accessory/">首飾</a>, <a class="icon_font" href="/category/underwear/">內衣</a>, <a class="icon_font" href="/category/leg-wear/">襪子</a>, <a class="icon_font" href="/category/loungewear/">居家服</

In [15]:
## create a dictionary to store all clothes category
main_category_list = []

for category in clothes_result:
   main_category = category.get("href").replace("/category", "")
   main_category = main_category.replace("/", "")
   main_category_list.append(main_category)

print(f"there are {len(main_category_list)} categories")
main_category_list[0:5]

there are 34 categories


['tops', 'jacket-outerwear', 'pants', 'allinone-salopette', 'skirt']

In [53]:
## using for loop to extract all the sub-category
## Dict format:
### - Key: main category
### - Value: list of sub-category

full_category_dict = {}

for category in main_category_list:
    temp_subcategory_list = []

    subcategory_soup_list = coordinate_soup.find_all("a", href=re.compile(f"/category/{category}/"))[2:]
    for i in range(len(subcategory_soup_list)):
        temp_subcategory = subcategory_soup_list[i].get("href").split("/")[3]
        temp_subcategory_list.append(temp_subcategory)

    full_category_dict[category] = temp_subcategory_list

In [54]:
## save it as the json file
with open(r'C:\Users\user\Documents\Data Science\Project\1_OOTD_Image_Generator\Dataset\category_dict\full_category_dict.json', 'w') as f:
    json.dump(full_category_dict, f)

### 1.3 Color

In [96]:
color_group_soup = coordinate_soup.find("section", class_="color")
color_group_soup = color_group_soup.find_all("li")

color_dict = {}
for i in range(len(color_group_soup)):
    color_text = color_group_soup[i].get("class")[0]
    color_code = color_group_soup[i].find_all("a")[0].get("href").replace("/coordinate/", "")
    color_dict[color_text] = color_code

In [98]:
## save it as the json file
with open(r'C:\Users\user\Documents\Data Science\Project\1_OOTD_Image_Generator\Dataset\category_dict\color_dict.json', 'w') as f:
    json.dump(color_dict, f)

### 1.4 Season

In [108]:
season_soup = coordinate_soup.find("section", class_="season")
season_soup = season_soup.find_all("a")[2:]

season_dict = {}
season_dict["Spring"] = season_soup[0].get("href").replace("/coordinate/", "")
season_dict["Summer"] = season_soup[1].get("href").replace("/coordinate/", "")
season_dict["Autumn"] = season_soup[2].get("href").replace("/coordinate/", "")
season_dict["Winter"] = season_soup[3].get("href").replace("/coordinate/", "")

In [110]:
## save it as the json file
with open(r'C:\Users\user\Documents\Data Science\Project\1_OOTD_Image_Generator\Dataset\category_dict\season_dict.json', 'w') as f:
    json.dump(season_dict, f)

### 1.5 Height

In [124]:
height_soup = coordinate_soup.find("section", class_="other").find("div", class_="height")
height_soup = height_soup.find_all("a")[2:]

height_dict = {}
for i in range(len(height_soup)):
    height_text = height_soup[i].getText()
    height_code = height_soup[i].get("href").replace("/coordinate/", "")
    height_dict[height_text] = height_code

In [123]:
## save it as the json file
with open(r'C:\Users\user\Documents\Data Science\Project\1_OOTD_Image_Generator\Dataset\category_dict\height_dict.json', 'w') as f:
    json.dump(height_dict, f)

### 1.6 Age

In [127]:
age_soup = coordinate_soup.find("section", class_="other").find("div", class_="age")
age_soup = age_soup.find_all("a")[2:]

age_dict = {}
for i in range(len(age_soup)):
    age_text = age_soup[i].getText().replace("歳", "")
    age_code = age_soup[i].get("href").replace("/coordinate/", "")
    age_dict[age_text] = age_code

In [129]:
## save it as the json file
with open(r'C:\Users\user\Documents\Data Science\Project\1_OOTD_Image_Generator\Dataset\category_dict\age_dict.json', 'w') as f:
    json.dump(age_dict, f)

### 1.7 Hair Style

In [130]:
hair_soup = coordinate_soup.find("section", class_="other").find("div", class_="hair")
hair_soup = hair_soup.find_all("a")[2:]

hair_dict = {}
for i in range(len(hair_soup)):
    hair_text = hair_soup[i].getText().replace("歳", "")
    hair_code = hair_soup[i].get("href").replace("/coordinate/", "")
    hair_dict[hair_text] = hair_code

In [132]:
## save it as the json file
with open(r'C:\Users\user\Documents\Data Science\Project\1_OOTD_Image_Generator\Dataset\category_dict\hair_dict.json', 'w') as f:
    json.dump(hair_dict, f)

### 1.8 Region

In [133]:
region_soup = coordinate_soup.find("section", class_="other").find("div", class_="region")
region_soup = region_soup.find_all("a")[2:]

region_dict = {}
for i in range(len(region_soup)):
    region_text = region_soup[i].getText().replace("歳", "")
    region_code = region_soup[i].get("href").replace("/coordinate/", "")
    region_dict[region_text] = region_code

In [135]:
## save it as the json file
with open(r'C:\Users\user\Documents\Data Science\Project\1_OOTD_Image_Generator\Dataset\category_dict\region_dict.json', 'w') as f:
    json.dump(region_dict, f)

## 2. Script all the photos

**URL** <br>
https://wear.tw/men-category/tops/tshirt-cutsew/?color_group_id=1&from_age=20&to_age=24&from_height=171&to_height=180&hairstyle_id=5&from_month=3&to_month=5

Model
- sex
- height
- age
- hair style
- region


Items
- main category & sub-category
- color
- season
