In [45]:
import requests
import re
import unicodedata
    
    
def join_dict_item_in_list(l: list, key: str):
    ''' input:  l = [{"name": "Ian"}, {"name": "Wang"}]
                key = 'name'
        output: "Ian、Wang"
    '''
    dict_item_list = [d[key] for d in l]
    return '、'.join(dict_item_list)


def join_element_in_list(l: list) -> str:
    ''' input:  ["A", "B", "C"]
        output: "A、B、C"
    '''
    return '、'.join(l)



def convert_full_width_to_half_width(text: str) -> str:
    ''' convert full-width character to half-width one, such as "Ａ" to "A"
    '''
    text = unicodedata.normalize('NFKC', text)
    return text


def remove_unicode_text(text: str) -> str:
    text = text.replace('\xa0', '')
    text = text.replace('\u3000', '')
    text = text.replace('\r', '')
    return text


def remove_extra_newline(text: str) -> str:
    text = re.sub('\n+', '\n', text)
    return text

    
def clean_text(text: str) -> str:
    text = convert_full_width_to_half_width(text)
    text = remove_unicode_text(text)
    text = remove_extra_newline(text)
    return text
    

In [3]:
url = 'https://www.104.com.tw/jobs/search/list?ro=0&keyword=python' # search page

In [4]:
headers = {
    'Referer': url
}

In [5]:
response = requests.get(url, headers=headers)
response

<Response [200]>

In [46]:
import json
j = json.loads(clean_text(response.text))

In [47]:
item = {}
item['search_page'] = j['data']['list'][0]
item['search_page']

{'jobType': '1',
 'jobNo': '8420316',
 'jobName': '軟體開發工程師 (車載產品自動化測試平台)_林口',
 'jobNameSnippet': '軟體開發工程師 (車載產品自動化測試平台)_林口',
 'jobRole': '1',
 'jobRo': '1',
 'jobAddrNo': '6001005008',
 'jobAddrNoDesc': '桃園市龜山區',
 'jobAddress': '華亞二亞270號',
 'description': '【部門說明】\nAutomotive electronics has become a global trend in the technology field. Being one of the leaders in the field, Garmin aims to become a superior supplier for international Automotive manufacturers with various products and professional services. We welcome you to join the team.\n\n In this position, you will learn to-\n1. Gain an exclusive insight of the Automotive industry, including the market status, supply chain and future trend\n2. Become familiar with Automotive systems of in Europe, America, China, Japan and their requirements in product development, quality and service\n3. Understand cutting edge technology, product realization and manufacture process of Garmin’s Automotive electronics projects\n4. Grasp the latest m

In [42]:
parsed_item = {}

# 職缺ID
parsed_item['job_id'] = re.search('job\/(.*)\?', item['search_page']['link']['job']).group(1)

# 職缺編號
parsed_item['job_no'] = item['search_page']['jobNo']

# 職缺名稱
parsed_item['job_name'] = item['search_page']['jobName']

# 公司ID
parsed_item['company_id'] = re.search('company/(.*)\?', item['search_page']['link']['cust']).group(1)

# 公司名稱
parsed_item['company_name'] = item['search_page']['custName']

# 公司編號
parsed_item['company_no'] = item['search_page']['custNo']

# 應徵人數
parsed_item['apply_count'] = int(item['search_page']['applyCnt'])

# 最低薪資
parsed_item['salary_min'] = int(item['search_page']['salaryLow'])

# 最高薪資
parsed_item['salary_max'] = int(item['search_page']['salaryHigh'])

# 薪資描述
parsed_item['salary_desc'] = item['search_page']['salaryDesc']

# 薪資類型code
parsed_item['salary_type'] = item['search_page']['s10']

# 職缺說明
parsed_item['job_desc'] = item['search_page']['description']

# 職缺類別
parsed_item['job_type'] = item['search_page']['jobType']

# 工作類型code
parsed_item['job_role'] = item['search_page']['jobRole']

# 學歷要求
parsed_item['edu'] = item['search_page']['optionEdu']

# 經驗要求
parsed_item['work_exp'] = int(item['search_page']['period'])

# 工作縣市
parsed_item['job_addr_dist'] = item['search_page']['jobAddrNoDesc']

# 工作地點
parsed_item['job_addr'] = item['search_page']['jobAddrNoDesc'] + item['search_page']['jobAddress']

# 經度
parsed_item['lon'] = item['search_page']['lon']

# 緯度
parsed_item['lat'] = item['search_page']['lat']

# 出現日期
parsed_item['appear_date'] = int(item['search_page']['appearDate'])

parsed_item

{'job_id': '50h5o',
 'job_no': '8420316',
 'job_name': '軟體開發工程師 (車載產品自動化測試平台)_林口',
 'company_name': 'GARMIN_台灣國際航電股份有限公司',
 'company_no': '23527104000',
 'apply_count': 5,
 'salary_min': 0,
 'salary_max': 0,
 'salary_desc': '待遇面議',
 'salary_type': '10',
 'job_desc': '【部門說明】\nAutomotive electronics has become a global trend in the technology field. Being one of the leaders in the field, Garmin aims to become a superior supplier for international Automotive manufacturers with various products and professional services. We welcome you to join the team.\n\n In this position, you will learn to-\n1. Gain an exclusive insight of the Automotive industry, including the market status, supply chain and future trend\n2. Become familiar with Automotive systems of in Europe, America, China, Japan and their requirements in product development, quality and service\n3. Understand cutting edge technology, product realization and manufacture process of Garmin’s Automotive electronics projects\n4. Grasp t