In [1]:
import pandas as pd
import json

def xls_to_json(file_path, output_path):
    # 读取Excel文件
    df = pd.read_excel(file_path)
    
    # 将每一行的数据转换为JSON对象，并存储在一个列表中
    data = df.to_dict(orient='records')
    
    # 将数据写入到JSON文件中
    with open(output_path, 'w', encoding='utf-8') as json_file:
        json.dump(data, json_file, ensure_ascii=False, indent=4)

# 使用示例
file_path = '/Users/hugo/Projects/NLBGP/BGPAgent/raw_data/bgpleak.xls'
output_path = '/Users/hugo/Projects/NLBGP/BGPAgent/raw_data/bgpleak.json'
xls_to_json(file_path, output_path)

In [4]:
import json
from collections import defaultdict

def process_as_path(as_path):
    # 将as_path字符串按空格分隔
    as_numbers = as_path.strip().split()
    
    # 去除连续重复的对象
    processed_as_numbers = []
    prev_number = None
    for number in as_numbers:
        if number != prev_number:
            processed_as_numbers.append(number)
        prev_number = number
    
    # 将分隔符从空格改为"-"
    return '-'.join(processed_as_numbers)

def process_and_separate_json(input_path, output_directory):
    # 读取JSON文件
    with open(input_path, 'r', encoding='utf-8') as json_file:
        data = json.load(json_file)
    
    length_to_data = defaultdict(list)

    # 处理每个对象的as_path字段
    for obj in data:
        if 'as_path' in obj:
            obj['as_path'] = process_as_path(obj['as_path'])
            as_path_length = len(obj['as_path'].split('-'))
            length_to_data[as_path_length].append(obj)
    
    # 将按长度分类后的数据写回到不同的JSON文件
    for length, objects in length_to_data.items():
        output_path = f"{output_directory}/as_path_length_{length}.json"
        with open(output_path, 'w', encoding='utf-8') as json_file:
            json.dump(objects, json_file, ensure_ascii=False, indent=4)

    # 打印每个长度对应的对象个数
    for length, objects in length_to_data.items():
        print(f"Number of objects with as_path length {length}: {len(objects)}")

    # 打印总共有多少个不同长度的as_path
    print(f"Number of different as_path lengths: {len(length_to_data)}")

    # 打印总共有多少个对象
    print(f"Number of objects: {len(data)}")

# 使用示例
input_path = '/Users/hugo/Projects/NLBGP/BGPAgent/raw_data/bgpleak.json'            # 替换为你的输入JSON文件路径
output_directory = '/Users/hugo/Projects/NLBGP/BGPAgent/filtered_data/bgpleak_different_length'               # 替换为你希望输出的目录路径
process_and_separate_json(input_path, output_directory)

Number of objects with as_path length 11: 157
Number of objects with as_path length 12: 138
Number of objects with as_path length 9: 130
Number of objects with as_path length 10: 163
Number of objects with as_path length 14: 51
Number of objects with as_path length 16: 14
Number of objects with as_path length 7: 84
Number of objects with as_path length 8: 106
Number of objects with as_path length 13: 90
Number of objects with as_path length 15: 21
Number of objects with as_path length 19: 2
Number of objects with as_path length 6: 54
Number of objects with as_path length 17: 2
Number of objects with as_path length 18: 2
Number of different as_path lengths: 14
Number of objects: 1014


In [5]:
import asyncio
import json
from gql import Client, gql
from gql.transport.aiohttp import AIOHTTPTransport
from tqdm.asyncio import tqdm  # 使用tqdm的asyncio支持版本

# 定义GraphQL查询模板
query_template = """
{
  asn(asn:"{asn}") {
      asnDegree {
         transit
      }
   }
}
"""

# 创建一个GraphQL客户端
transport = AIOHTTPTransport(url="https://api.asrank.caida.org/v2/graphql")
client = Client(transport=transport, fetch_schema_from_transport=True)

# 获取AS的transit degree
async def get_as_transit_degree(asn):
    query = gql(query_template.format(asn=asn))
    try:
        response = await client.execute_async(query)
        return response["asn"]["asnDegree"]["transit"]
    except Exception as e:
        print(f"Failed to get transit degree for ASN {asn}: {e}")
        return None

# 读取JSON文件并提取AS列表
def read_as_numbers(input_path):
    with open(input_path, "r") as file:
        data = json.load(file)
    as_numbers = set()
    for entry in data:
        as_path = entry.get("as_path", "")
        as_numbers.update(as_path.split("-"))
    return as_numbers

# 处理主函数
async def main(input_path):
    as_numbers = read_as_numbers(input_path)
    as_transit_degrees = {}

    for asn in tqdm(as_numbers):
        transit_degree = await get_as_transit_degree(asn)
        if transit_degree is not None:
            as_transit_degrees[asn] = transit_degree

    return as_transit_degrees

# 运行主函数
if __name__ == "__main__":
    input_path = "/Users/hugo/Projects/NLBGP/BGPAgent/filtered_data/bgpleak_filtered.json"
    as_transit_degrees = asyncio.run(main(input_path))
    print(as_transit_degrees)

RuntimeError: asyncio.run() cannot be called from a running event loop