-
Notifications
You must be signed in to change notification settings - Fork 5
/
pydict_cn_bing_com.py
51 lines (41 loc) · 1.33 KB
/
pydict_cn_bing_com.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# -*- coding: utf-8 -*-
# version: python 3.5
import urllib
from bs4 import BeautifulSoup
from file_util import *
from http_util import *
from beautiful_soup_util import *
def get_definition_cn_bing_com(word, resource_path):
"""根据关键字得到必应翻译的结果并去除部分内容"""
html = http_util_get_html('http://cn.bing.com/dict/search?q=%s' % urllib.parse.quote(word)).decode('utf-8')
soup = BeautifulSoup(html, 'html5lib')
content = None
for e in soup.select('.lf_area'):
content = e
break
bsu_del_attr_by_select_dict(content, {
'[style]': 'style',
'a[href*="/dict"]': 'href',
'a[href*="/search"]': 'href',
'a[onmousemove*="alignWords"]': 'onmousemove',
'a[onclick*="BilingualAjax"]': 'onclick',
'a[title="点击朗读"]': 'title',
})
bsu_del_ele_by_select_list(content, [
'#defid',
'.wd_div',
'.df_div',
'.filter',
'#filshow',
'#filhide',
'.bi_pag',
'#loaddataid',
'.hd_area',
])
injection = []
injection_html = ''
file_util_get_files(resource_path, injection)
for p in injection:
if file_util_is_ext(p, 'html'):
injection_html += file_util_read_text(p)
return [bytes(str(content) + injection_html, encoding='utf-8')]