Skip to content

Commit

Permalink
✨ feat: 随机获取一张图片 字节流
Browse files Browse the repository at this point in the history
  • Loading branch information
倪传方 committed May 12, 2023
1 parent a7c4c15 commit 0000afe
Show file tree
Hide file tree
Showing 8 changed files with 221 additions and 94 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/docker-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ jobs:
- name: 启动服务
uses: cross-the-world/ssh-scp-ssh-pipelines@latest
continue-on-error: true
env:
WELCOME: 'ssh scp ssh pipelines'
LASTSSH: 'Doing something after copying'
Expand All @@ -72,9 +73,9 @@ jobs:
first_ssh: |
docker rm -f crawler
cd ~/docker/docker-compose
docker-compose pull
docker rm -f crawler
docker rmi -f registry.cn-hangzhou.aliyuncs.com/jayzhou/crawler:latest
docker-compose up -d
docker image prune -f -a
# scp: |
# './test/*' => /home/github/test/
# ./test/test1* => /home/github/test/test1/
Expand Down
230 changes: 140 additions & 90 deletions api/xray_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,115 +6,145 @@
from api.base_api import get_required
from api.base_api import get_not_required
import json
import random
from flask import request
import subprocess
import re
from my_selenium.my_selenium import logging

# 判断该站点是域名
domain_regex = r'^(?=^.{3,255}$)(http(s)?:\/\/)?(www\.)?[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+(:\d+)*(\/\w+\.\w+)*$'

# 判断域名前缀是否符合规则
prefix_regex = r''

# 添加xray客户端路由规则
def add_client_route_rule():
# 要添加的规则(域名/子域名) 逗号分隔
rules = get_required('rules')
# 是否是子域名 is_subdomain 匹配当前域名与所有子域名 反之就是完全匹配 默认是子域名
is_subdomain = get_not_required('is_subdomain',True)
# 处理后的规则集
handled_rules = []
# 处理前的规则集
unhandled_rules = rules.split(',')

# 策略 支持direct proxy block
strategy = get_required('strategy')
# 定位 标识该规则放在 header body 还是 footer
position = get_required('position')

domain_regex = r'^(?=^.{3,255}$)(http(s)?:\/\/)?(www\.)?[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+(:\d+)*(\/\w+\.\w+)*$'
# 生成uuid
def generate_uuid():
return random.sample('0123456789abcdefghijklmnopqrstuvwxyz',16)

def format(unhandled_rule:str):
# 判断当前参数是否符合域名规则(正则匹配)
match = re.match(domain_regex,unhandled_rule.strip())
# 校验规则(基于域名)
def verify_rule(rule:str):
split_res = rule.strip().split(':')
if len(split_res) == 1:
# 不包含: 直接校验域名
match = re.match(domain_regex,split_res[0])
if match is None:
raise RuntimeError(f'规则列表中有非域名规则: {unhandled_rule.strip()}!')
if is_subdomain:
raise RuntimeError(f'规则{split_res[0]}不是一个合法的域名!')
pass


return ''

# 若xray-parser不存在则clone
print(subprocess.call('nsenter -m -u -i -n -p -t 1 sh -c "[ ! -d /root/code/xray-parser ] && cd /root/code && git clone https://github.com/nichuanfang/xray-parser.git"',shell=True))
# 若xray-parser存在则pull
print(subprocess.call('nsenter -m -u -i -n -p -t 1 sh -c "[ -d /root/code/xray-parser ] && cd /root/code/xray-parser && git checkout . && git checkout client && git pull"',shell=True))
# 获取routing/routing_header.json routing/routing_body.json routing/routing_footer.json
with open('/code/xray-parser/routing/routing_header.json') as header_file:
header:dict = json.load(header_file)
# 头部规则集
header_rules:list[str] = []
for rule_entity in header['rules']:
try:
header_rules.__add__(rule_entity['domain'])
except:
continue

with open('/code/xray-parser/routing/routing_body.json') as body_file:
body:dict = json.load(body_file)
# body规则集
body_rules:list[str] = []
for rule_entity in body['rules']:
try:
body_rules.__add__(rule_entity['domain'])
except:
continue

with open('/code/xray-parser/routing/routing_footer.json') as footer_file:
footer:dict = json.load(footer_file)
# 尾部规则集
footer_rules:list[str] = []
for rule_entity in footer['rules']:
try:
footer_rules.__add__(rule_entity['domain'])
except:
continue



def format(unhandled_rule:str):
judged_rule = unhandled_rule.strip()
# 判断当前参数是否符合域名规则(正则匹配)
domain_match = re.match(domain_regex,judged_rule)
prefix_match = re.match(prefix_regex,judged_rule)
if domain_match is None:
raise RuntimeError(f'规则列表中有非域名规则: {judged_rule}!')
# 当前域名和所有的子域名 可以自定义规则
return unhandled_rule.strip()
else:
# 完全匹配
return f'full:{unhandled_rule.strip()}'

def choose(handled_rules,unhandled_rules,special_rules):
for unhandled_rule in unhandled_rules:
unhandled_rule = format(unhandled_rule)
# 判断该规则是否最终添加
flag = True
for special_rule in special_rules:
domains:list = special_rule['domain']
if domains.__contains__(unhandled_rule):
flag = False
# 一旦碰到有相同的规则 直接退出循环
break
if flag:
handled_rules.append(unhandled_rule)


# 若xray-parser不存在则clone
print(subprocess.call('nsenter -m -u -i -n -p -t 1 sh -c "[ ! -d /root/code/xray-parser ] && cd /root/code && git clone https://github.com/nichuanfang/xray-parser.git"',shell=True))
# 若xray-parser存在则pull
print(subprocess.call('nsenter -m -u -i -n -p -t 1 sh -c "[ -d /root/code/xray-parser ] && cd /root/code/xray-parser && git checkout . && git checkout client && git pull"',shell=True))
# 获取routing/routing_header.json routing/routing_body.json routing/routing_footer.json
with open('/code/xray-parser/routing/routing_header.json') as header_file:
header = json.load(header_file)
return judged_rule

with open('/code/xray-parser/routing/routing_body.json') as body_file:
body = json.load(body_file)

with open('/code/xray-parser/routing/routing_footer.json') as footer_file:
footer = json.load(footer_file)


# 添加一条xray客户端路由规则 get请求添加
def add_client_route_rule():
# 要添加的规则(域名/子域名) 域名前缀需要正则校验!
rule = get_required('rule')
# 校验该规则是否合规 (基于domain的规则)
rule = verify_rule(rule)
# 规则id 非必填 如果存在 表示该条记录添加到当前规则下 如果不存在 新增一个规则体
rule_id = get_not_required('rule_id','')
# 策略 支持direct proxy block
strategy = get_required('strategy')
# 定位 标识该规则放在 header body 还是 footer
position = get_required('position')

# 当前规则所属的规则集 header | body | footer
position_data = {}
# 当前位置及之前的所有规则组成的超集
pre_rules = []
match position:
case 'header':
header_rules:list = header['rules']
# 判断当前域名是否已添加规则

# 最终要添加的规则集
handled_rules = []
choose(handled_rules,unhandled_rules,header_rules)
header_rules.append({
'type': 'field',
'outboundTag': strategy,
'domain': handled_rules
})
with open('/code/xray-parser/routing/routing_header.json','w+') as w_header_file:
json.dump(header,w_header_file)
position_data:dict = header
pre_rules:list = header_rules
case 'body':
body_rules:list = body['rules']
handled_rules = []
choose(handled_rules,unhandled_rules,body_rules)
body_rules.append({
'type': 'field',
'outboundTag': strategy,
'domain': handled_rules
})
with open('/code/xray-parser/routing/routing_body.json','w+') as w_body_file:
json.dump(body,w_body_file)
position_data:dict = body
pre_rules:list = header_rules.__add__(body_rules)
case 'footer':
footer_rules:list = footer['rules']
handled_rules = []
choose(handled_rules,unhandled_rules,footer_rules)
footer_rules.append({
'type': 'field',
'outboundTag': strategy,
'domain': handled_rules
})
with open('/code/xray-parser/routing/routing_footer.json','w+') as w_footer_file:
json.dump(footer,w_footer_file)
case _:
pass
position_data:dict = footer
pre_rules:list = header_rules.__add__(body_rules).__add__(footer_rules)
pass
position_rules:list = position_data['rules']

# 如果之前有配置过相同的规则 就会让这条规则失效! 所以应该不予添加 并返回友好提示
if pre_rules.__contains__(rule):
raise RuntimeError('已配置该规则!')

if rule_id == '':
# 新增规则体
position_rules.append({
'id': generate_uuid(),
'type': 'field',
'outboundTag': strategy,
'domain': ''
})
else:
# 修改规则体 往已存在的规则中添加域名/ip 仍需要判断之前没配置过

pass
# 最终要添加的规则集
# choose(handled_rules,unhandled_rules,position_rules)
# position_rules.append({
# 'type': 'field',
# 'outboundTag': strategy,
# 'domain': handled_rules
# })
with open(f'/code/xray-parser/routing/routing_{position}.json','w+') as w_file:
json.dump(position_data,w_file)
pass



if len(handled_rules) == 0:
return f'无需更新路由规则!'
return f'无需更新路由规则!'
user_name = 'github-actions[bot]'
email = 'github-actions[bot]@users.noreply.github.com'
# 指定用户和邮箱 git config user.name git config user.email
Expand All @@ -128,6 +158,26 @@ def choose(handled_rules,unhandled_rules,special_rules):
return f'xray客户端已更新{len(handled_rules)}条路由规则!'



# 添加多条路由规则 post请求
def batch_add_client_route_rule():
pass

# 删除xray客户端路由规则
def del_client_route_rule():
# 要删除的规则(域名/子域名) 逗号分隔
pass

# 查看xray客户端路由配置
def client_route_rule_preview():
# 要删除的规则(域名/子域名) 逗号分隔
rules = get_required('rules')
# 是否是子域名 is_subdomain 匹配当前域名与所有子域名 反之就是完全匹配 默认是子域名
is_subdomain = get_not_required('is_subdomain',True)

pass


if __name__ == '__main__':
add_client_route_rule()

12 changes: 12 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from api import tmdb_api
from api import xray_api
from cron.job.wallpaper_crawling import craw_wallpaper
from cron.job.wallpaper_crawling import random_wallpaper
from cron.job.tmm_movie_check import tmm_movie_check
from my_selenium.my_selenium import logging
from aliyundrive import ali_drive
Expand Down Expand Up @@ -49,6 +50,17 @@ def add_rule():
except Exception as e:
return e.__str__()

#=======================================图片=========================================================

@app.route(rule='/wallpaper/random',methods=['get'])
def wallpaper_random():
"""随机获取一张图片 字节流
Returns:
_type_: _description_
"""
return random_wallpaper()

# =====================================定时任务======================================================
@app.route(rule='/job/list', methods=['get'])
def list_job():
Expand Down
21 changes: 21 additions & 0 deletions crawling/wallpaper/wallpaper_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,27 @@ def craw_wallpaper():
print(subprocess.call('nsenter -m -u -i -n -p -t 1 sh -c "docker restart nginx"',shell=True))
logging.info('已重启nginx')

def craw_random_wallpaper():
# 随机挑一页数据(总数200)
soup = get_soup(f'https://wallhaven.cc/search?categories=110&purity=100&atleast=1280x720&sorting=date_added&order=desc&ai_art_filter=1&page={random.randint(1, 200)}')

#拖动到页面最底部,=0为拖动到页面最顶部 分多少页就滚动几次
# 获取图片链接并保存
figures = soup.findAll('figure')
# 随机获取一个figure
figure = figures[random.randint(1, 24)]
# 解析对应的url
link_soup = get_soup(figure.contents[1]['href'])
img_ele = link_soup.find(id='wallpaper')
if img_ele is None:
# 直到有数据
return craw_random_wallpaper()
else:
url = img_ele['src']
r = requests.get(url)
logging.info(f'已刮削图片:{url}')
return r.content

if __name__ == '__main__':
craw_wallpaper()
pass
6 changes: 5 additions & 1 deletion cron/job/wallpaper_crawling.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# 爬取壁纸
from crawling.wallpaper.wallpaper_crawler import craw_wallpaper as cw
from crawling.wallpaper.wallpaper_crawler import craw_random_wallpaper

def craw_wallpaper():
cw()
cw()

def random_wallpaper():
return craw_random_wallpaper()
15 changes: 14 additions & 1 deletion openapi/apidoc-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ tags:
- name: xray
description: xray配置api

- name: image
description: 图片服务

- name: 定时任务
description: 定时任务api

Expand Down Expand Up @@ -50,7 +53,7 @@ paths:
description: 偏移量
required: false
# path query header cookie
in: integer
in: query
schema:
# 参数类型
type: string
Expand Down Expand Up @@ -182,6 +185,16 @@ paths:
200:
description: OK

/wallpaper/random:
get:
tags:
- image
summary: '随机获取一张图片'
description: '随机获取一张图片(字节流)'
responses:
'200':
description: OK

/job/list:
get:
tags:
Expand Down
Loading

0 comments on commit 0000afe

Please sign in to comment.