-
Notifications
You must be signed in to change notification settings - Fork 0
/
wallpaper.py
139 lines (130 loc) · 7.25 KB
/
wallpaper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
"""
故宫壁纸
版本: v1.8
作者: kingc2022
代码编辑器: Microsoft Visual Studio Code
文档&注释生成: Mintlify Doc Writer for Python, JavaScript, TypeScript, C++, PHP, Java, C#, Ruby & more
程序用到的库:
· requests
· re
· os
· datetime
· time
License: https://github.com/kingc2022/GuGongWallpaper/blob/main/LICENSE
Github: https://github.com/kingc2022
Github项目地址: https://github.com/kingc2022/GuGongWallpaper
--------------------
Wallpaper of the imperial palace
Version: v1.8
Author: kingc2022
Code editor: Microsoft Visual Studio Code
Documentation & comment generation: Mintlify Doc Writer for Python, JavaScript, TypeScript, C++, PHP, Java, C#, Ruby & more
Library used by the program:
· requests
· re
· os
· datetime
· time
License: https://github.com/kingc2022/GuGongWallpaper/blob/main/LICENSE
Github: https://github.com/kingc2022
Making project address: https://github.com/kingc2022/GuGongWallpaper
"""
# 导入程序运行所需的库。
import requests
import re
import os
from datetime import datetime
import time
def main(page):
"""
它从网站下载图像。
:param page: 您要抓取的页面数
"""
# 保存图片的名称
pic_name = "0"
# 翻页的for循环
for i in range(1,page):
# 爬虫的 headers
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Cookie': 'secure; acw_tc=b65cfd3716542646761064945e6b2e3f38ecdf13260981a1a809d960fa1afd; PHPSESSID=66a068a5b68beeb567eeb602a805272e; saw_terminal=default; UM_distinctid=18129dbb8c821a-088ba05f77c4f8-9126f2c-15f900-18129dbb8c9157; CNZZDATA1261553859=1879722444-1654263813-%7C1654263813; _abfpc=927f5ff44ead9af71b6ad7e5cd811d19dc9cfbf7_2.0; secure; cna=fccf890f80c665a90a7d9ad66ed40b13; cn_1261553859_dplus=%7B%22distinct_id%22%3A%20%2218129dbb8c821a-088ba05f77c4f8-9126f2c-15f900-18129dbb8c9157%22%2C%22%24_sessionid%22%3A%200%2C%22%24_sessionTime%22%3A%201654264687%2C%22%24dp%22%3A%200%2C%22%24_sessionPVTime%22%3A%201654264687%2C%22initial_view_time%22%3A%20%221654263813%22%2C%22initial_referrer%22%3A%20%22%24direct%22%2C%22initial_referrer_domain%22%3A%20%22%24direct%22%2C%22%24recent_outside_referrer%22%3A%20%22%24direct%22%7D',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="102", "Google Chrome";v="102"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
}
url = f"https://www.dpm.org.cn/lights/royal/p/{i}.html"
base_url = "https://www.dpm.org.cn"
big_img_base_url = "https://img.dpm.org.cn"
save_path = "D:/Desktop/imgs"
print(f"正在爬取第{str(i)}页:\n")
response = requests.get(url,headers=headers)
response.encoding = response.apparent_encoding
code = response.text
a_tag = re.compile('<a target="_blank" href="/light/(\d+).html"><img alt=".*?" title=".*?" src="https://img.dpm.org.cn/Uploads/Picture/.*?"></a>',re.S)
pic_ids = re.findall(a_tag,code)
# 图片详细页面的for循环
for j in range(len(pic_ids)):
pic_url = f"{base_url}/light/{pic_ids[j]}"
# 爬虫的 headers
h = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Cookie': 'secure; acw_tc=b65cfd3716542646761064945e6b2e3f38ecdf13260981a1a809d960fa1afd; PHPSESSID=66a068a5b68beeb567eeb602a805272e; saw_terminal=default; UM_distinctid=18129dbb8c821a-088ba05f77c4f8-9126f2c-15f900-18129dbb8c9157; CNZZDATA1261553859=1879722444-1654263813-%7C1654263813; _abfpc=927f5ff44ead9af71b6ad7e5cd811d19dc9cfbf7_2.0; cna=fccf890f80c665a90a7d9ad66ed40b13; Secure; cn_1261553859_dplus=%7B%22distinct_id%22%3A%20%2218129dbb8c821a-088ba05f77c4f8-9126f2c-15f900-18129dbb8c9157%22%2C%22%24_sessionid%22%3A%200%2C%22%24_sessionTime%22%3A%201654265359%2C%22%24dp%22%3A%200%2C%22%24_sessionPVTime%22%3A%201654265359%2C%22initial_view_time%22%3A%20%221654263813%22%2C%22initial_referrer%22%3A%20%22%24direct%22%2C%22initial_referrer_domain%22%3A%20%22%24direct%22%2C%22%24recent_outside_referrer%22%3A%20%22%24direct%22%7D',
'Pragma': 'no-cache',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="102", "Google Chrome";v="102"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
}
print(f"\t正在爬取第{str(j+1)}张壁纸")
res = requests.get(pic_url,headers=h)
res.encoding = res.apparent_encoding
sc = res.text
regular = re.compile('<img style="visibility: visible;width: 100%;" src="(.*?)">',re.S)
big_img_url = re.findall(regular,sc)
# 原始图片1920X1080的for循环
for k in range(len(big_img_url)):
big_img_url[k] = big_img_url[k].replace('" />\r\n <div class="hide_wall','')
r = requests.get(f"{big_img_base_url}{big_img_url[k]}")
content = r.content
if os.path.exists(save_path):
print(f"\t正在保存第{str(j+1)}张壁纸\n")
with open(f"{save_path}/{str(int(pic_name)+1)}.jpg","wb") as f:
f.write(content)
pic_name = str(int(pic_name)+1)
else:
raise ValueError("你输入的路径不存在!")
if __name__ == "__main__":
page = int(input("共124页, 每页9张, 爬取几页?\n"))
page = page + 1
start = datetime.now().strftime("%Y年%m月%d日 %H:%M:%S")
start_time = time.time()
main(page)
end = datetime.now().strftime("%Y年%m月%d日 %H:%M:%S")
end_time = time.time()
time_diff = end_time-start_time
print("爬取&保存成功!\n")
print(f"开始时间: {start}\n结束时间: {end}")
hours = time_diff // 3600
minutes = time_diff // 60
hours = int(hours)
minutes = int(minutes)
seconds = time_diff - hours*3600 - minutes*60
seconds = int(seconds)
print(f"共{hours}小时{minutes}分钟{seconds}秒")