In [2]:
import requests
stock_code = "HK2018"
url = f"https://stockpage.10jqka.com.cn/HK2018/"
headers = {
            'Referer': f"https://stockpage.10jqka.com.cn/{stock_code}/news/",
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
            'X-Requested-With': 'XMLHttpRequest',
        }
referer = f"https://stockpage.10jqka.com.cn/{stock_code}/news/"
response = requests.get(
            url,
            headers=headers,
            timeout=10
        )
response.raise_for_status()

In [None]:
from IPython.display import display, HTML
display(HTML(response.text))

In [None]:
response.text

In [14]:
import requests
from bs4 import BeautifulSoup
import json
import re

def get_stock_info(url):
    """
    从指定URL获取股票相关信息并返回JSON格式数据
    """
    try:
        # 设置请求头，模拟浏览器访问
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
            'Accept-Encoding': 'gzip, deflate, br',
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1',
            'Referer': 'https://www.10jqka.com.cn/'
        }
        
        # 发送GET请求
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        response.encoding = 'utf-8'
        
        # 解析HTML内容
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # 提取股票信息
        stock_data = extract_stock_data(soup, response.text)
        
        return json.dumps(stock_data, ensure_ascii=False, indent=2)
        
    except requests.RequestException as e:
        return json.dumps({"error": f"请求失败: {str(e)}"}, ensure_ascii=False, indent=2)
    except Exception as e:
        return json.dumps({"error": f"数据解析失败: {str(e)}"}, ensure_ascii=False, indent=2)

def extract_stock_data(soup, html_content):
    """
    从BeautifulSoup对象和原始HTML中提取股票数据
    """
    stock_info = {}
    
    try:
        # 提取JavaScript中的全局变量数据
        global_data = extract_global_data(html_content)
        if global_data:
            stock_info.update(global_data)
        
        # 提取隐藏div中的股票信息
        hidden_data = extract_hidden_data(soup)
        if hidden_data:
            stock_info.update(hidden_data)
        
        # 提取页面标题
        title_element = soup.find('title')
        if title_element:
            stock_info['page_title'] = title_element.get_text().strip()
        
        # 提取meta信息
        meta_keywords = soup.find('meta', attrs={'name': 'keywords'})
        if meta_keywords:
            stock_info['keywords'] = meta_keywords.get('content', '')
        
        meta_description = soup.find('meta', attrs={'name': 'description'})
        if meta_description:
            stock_info['description'] = meta_description.get('content', '')
        
        # 提取统计ID等信息
        script_tags = soup.find_all('script', string=re.compile(r'ta_sid|ta_fid'))
        for script in script_tags:
            script_text = script.string
            if script_text:
                ta_sid_match = re.search(r'var ta_sid = "([^"]+)"', script_text)
                if ta_sid_match:
                    stock_info['ta_sid'] = ta_sid_match.group(1)
                
                ta_fid_match = re.search(r'var ta_fid = "([^"]+)"', script_text)
                if ta_fid_match:
                    stock_info['ta_fid'] = ta_fid_match.group(1)
        
    except Exception as e:
        stock_info['extraction_error'] = str(e)
    
    return stock_info

def extract_global_data(html_content):
    """
    从HTML内容中提取JavaScript全局变量数据
    """
    global_data = {}
    
    try:
        # 查找global_data变量赋值
        global_data_pattern = r'global_data\.(\w+)\s*=\s*[\'"]([^\'"]+)[\'"]'
        matches = re.findall(global_data_pattern, html_content)
        
        for key, value in matches:
            global_data[key] = value
        
        # 查找STOCK_SKIN变量
        stock_skin_match = re.search(r'var STOCK_SKIN = [\'"]([^\'"]+)[\'"]', html_content)
        if stock_skin_match:
            global_data['stock_skin'] = stock_skin_match.group(1)
        
        return global_data
        
    except Exception as e:
        return {'global_data_error': str(e)}

def extract_hidden_data(soup):
    """
    从隐藏的div元素中提取股票数据
    """
    hidden_data = {}
    
    try:
        # 提取隐藏div中的数据
        hidden_elements = [
            ('pageStockCode', 'page_stock_code'),
            ('stockCode', 'stock_code'),
            ('pageStockName', 'page_stock_name'),
            ('pageStockMarketName', 'page_stock_market_name'),
            ('pageStockMarketCode', 'page_stock_market_code'),
            ('pageStockMarketId', 'page_stock_market_id'),
            ('pageFieldCode', 'page_field_code'),
            ('basicrating', 'basic_rating'),
            ('indexdataflash', 'index_data_flash'),
            ('indexBasicData', 'index_basic_data'),
            ('stockbbsinfo', 'stock_bbs_info')
        ]
        
        for element_id, json_key in hidden_elements:
            element = soup.find('div', id=element_id)
            if element:
                text = element.get_text().strip()
                if text:
                    hidden_data[json_key] = text
            
            # 也尝试查找input元素
            input_element = soup.find('input', id=element_id)
            if input_element:
                value = input_element.get('value', '').strip()
                if value:
                    hidden_data[json_key] = value
        
        return hidden_data
        
    except Exception as e:
        return {'hidden_data_error': str(e)}

def format_stock_json(stock_data):
    """
    格式化股票数据为标准JSON格式
    """
    formatted_data = {
        "stock_info": {
            "code": stock_data.get('code', ''),
            "name": stock_data.get('codename', ''),
            "thscode": stock_data.get('thscode', ''),
            "market": stock_data.get('page_stock_market_name', ''),
            "market_code": stock_data.get('page_stock_market_code', ''),
            "market_id": stock_data.get('page_stock_market_id', ''),
            "category": stock_data.get('cate', ''),
            "category_name": stock_data.get('catename', ''),
            "hycode": stock_data.get('hycode', ''),
            "hyname": stock_data.get('hyname', '')
        },
        "page_info": {
            "title": stock_data.get('page_title', ''),
            "keywords": stock_data.get('keywords', ''),
            "description": stock_data.get('description', ''),
            "stock_skin": stock_data.get('stock_skin', ''),
            "ta_sid": stock_data.get('ta_sid', ''),
            "ta_fid": stock_data.get('ta_fid', '')
        },
        "technical_data": {
            "field_code": stock_data.get('page_field_code', ''),
            "basic_rating": stock_data.get('basic_rating', ''),
            "index_data_flash": stock_data.get('index_data_flash', ''),
            "index_basic_data": stock_data.get('index_basic_data', ''),
            "stock_bbs_info": stock_data.get('stock_bbs_info', '')
        }
    }
    
    return formatted_data

# 主函数调用
def main():
    """
    主函数：获取股票信息并返回JSON格式数据
    """
    url = "https://stockpage.10jqka.com.cn/HK2018/"
    result = get_stock_info(url)
    
    # 解析JSON并重新格式化
    try:
        data = json.loads(result)
        if 'error' not in data:
            formatted_data = format_stock_json(data)
            result = json.dumps(formatted_data, ensure_ascii=False, indent=2)
    except:
        pass
    
    print(result)
    return result

# 执行主函数
if __name__ == "__main__":
    stock_json = main()

{
  "stock_info": {
    "code": "02018",
    "name": "瑞声科技",
    "thscode": "HK2018",
    "market": "恒生指数",
    "market_code": "HSZS",
    "market_id": "hk",
    "category": "index",
    "category_name": "首页概览",
    "hycode": "HS101025",
    "hyname": "工业 "
  },
  "page_info": {
    "title": "瑞声科技(02018)首页概览_港股行情_同花顺金融网",
    "keywords": "瑞声科技(02018)港股点评,瑞声科技(02018)港股查询,瑞声科技(02018)港股行情,瑞声科技(02018)港股资讯,瑞声科技(02018)港股数据,瑞声科技(02018)港股实时行情",
    "description": "提供瑞声科技(02018)实时行情数据；汇总瑞声科技(02018)基本资料及重大新闻、研究报告及行业资讯，解读瑞声科技(02018)主力资金动向、财务数据，为投资瑞声科技(02018)提供参考决策",
    "stock_skin": "white",
    "ta_sid": "sp_hk_index",
    "ta_fid": "29,info_gather,stockpage_hk"
  },
  "technical_data": {
    "field_code": "1155",
    "basic_rating": "",
    "index_data_flash": "",
    "index_basic_data": "",
    "stock_bbs_info": ""
  }
}
