Skip to content

Commit

Permalink
perf: change requests to urllib
Browse files Browse the repository at this point in the history
  • Loading branch information
shengchenyang committed Apr 2, 2024
1 parent 8112f14 commit f014030
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 31 deletions.
6 changes: 0 additions & 6 deletions ayugespidertools/common/params.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import platform
import random

__all__ = [
"Param",
Expand All @@ -15,11 +14,6 @@ class Param:
# stop_max_delay 限制最长重试时间
stop_max_delay = 5000

requests_req_timeout = 3
requests_res_timeout = 5
requests_time_sleep_list = [x / 10 for x in range(5, 19)]
requests_time_sleep_random = random.choice(requests_time_sleep_list)

aiohttp_retry_times_default = 3

# 部署运行的平台为 win 或 linux
Expand Down
36 changes: 14 additions & 22 deletions ayugespidertools/common/utils.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
import json
import random
import urllib.request
from functools import lru_cache
from pathlib import Path
from typing import TYPE_CHECKING, Any, List, Literal, Optional, Union
from urllib.parse import urlparse

import requests

from ayugespidertools.common.encryption import EncryptOperation
from ayugespidertools.common.multiplexing import ReuseOperation
from ayugespidertools.common.params import Param
from ayugespidertools.config import logger
from ayugespidertools.extras.ext import AppConfManageMixin
from ayugespidertools.formatdata import DataHandle
Expand Down Expand Up @@ -60,29 +58,23 @@ def get_remote_kvs(
Returns:
1). 远程配置中 key_values 的详细信息
"""
headers = {"X-Consul-Token": token} if remote_type == "consul" else None
try:
r = requests.get(
url,
headers=headers,
verify=False,
timeout=(
Param.requests_req_timeout,
Param.requests_res_timeout,
),
)
except (
requests.exceptions.ConnectionError,
requests.exceptions.ConnectTimeout,
) as e:
raise ValueError(f"请求远程配置 {remote_type} api 超时!") from e
headers = (
{"X-Consul-Token": token}
if all([remote_type == "consul", token is not None])
else {}
)
req = urllib.request.Request(url=url, headers=headers)
r = urllib.request.urlopen(req)
data = r.read().decode(errors="ignore")

url_params = urlparse(url).query
if remote_type == "consul":
if "raw" in url_params:
return r.text
return EncryptOperation.base64_decode(decode_data=r.json()[0]["Value"])
return r.text
return data

json_data = json.loads(data)
return EncryptOperation.base64_decode(decode_data=json_data[0]["Value"])
return data

@classmethod
def fetch_remote_conf(
Expand Down
9 changes: 6 additions & 3 deletions ayugespidertools/scraper/middlewares/proxy/exclusive.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import base64
import json
import urllib.request
from typing import TYPE_CHECKING

import requests
from scrapy import signals

__all__ = [
Expand Down Expand Up @@ -36,8 +37,10 @@ def from_crawler(cls, crawler: "Crawler") -> "Self":
def get_proxy_ip(self, proxy_url: str, index: int) -> str:
"""获取独享代理接口的索引为 proxy_index 的代理信息"""
try:
r = requests.get(proxy_url)
proxy_list = r.json().get("data").get("proxy_list")
req = urllib.request.Request(url=proxy_url)
r = urllib.request.urlopen(req)
content = r.read().decode(errors="ignore")
proxy_list = json.loads(content).get("data").get("proxy_list")
proxy_list.sort()
if index < len(proxy_list):
return proxy_list[index]
Expand Down

0 comments on commit f014030

Please sign in to comment.