In [1]:
import pandas as pd
from geopy.distance import geodesic

# 读取站点数据
sites = pd.read_csv('./edge-servers/site-optus-melbCBD.csv')

# 读取用户数据
users = pd.read_csv('./users/users-melbcbd-generated.csv')

# 创建一个字典来记录每个站点服务的用户数
user_counts = {site['SITE_ID']: 0 for i, site in sites.iterrows()}

# 找到每个用户最近的站点，并增加该站点的用户计数器
for i, user in users.iterrows():
    min_dist = float('inf')
    min_site_id = ''
    for j, site in sites.iterrows():
        dist = geodesic((user['Latitude'], user['Longitude']), (site['LATITUDE'], site['LONGITUDE'])).km
        if dist < min_dist:
            min_dist = dist
            min_site_id = site['SITE_ID']
    user_counts[min_site_id] += 1

print(user_counts)
# 创建一个新的 DataFrame，用于存储站点和其对应的用户数量
site_counts = pd.DataFrame({'SITE_ID': list(user_counts.keys()), 'USER_COUNT': list(user_counts.values())})

# 根据用户数量对站点进行排序
sorted_sites = sites.merge(site_counts, on='SITE_ID').sort_values(by=['USER_COUNT'], ascending=False)

# 选取20个均匀分布的站点
num_sites = len(sorted_sites)
interval = int(num_sites / 20)
selected_sites = sorted_sites.iloc[::interval]

# 输出每个站点服务的用户数
for site_id, count in user_counts.items():
    print(f"Site {site_id} serves {count} users")

# 输出选取的20个站点
print(selected_sites[['SITE_ID', 'USER_COUNT']])

# 创建一个空列表来保存选中的站点的 SITE_ID
selected_site_ids = []

# 按照用户数排序并选取20个站点
for i in range(20):
    site = sorted_sites.iloc[i]
    selected_site_ids.append(site['SITE_ID'])

{10003026: 0, 10003027: 9, 10003238: 3, 10004167: 10, 10004576: 2, 101373: 9, 101381: 24, 101385: 4, 101636: 6, 11571: 5, 11579: 5, 11581: 9, 11590: 0, 11591: 11, 11593: 9, 11599: 2, 11600: 5, 11601: 3, 130005: 17, 130439: 14, 134245: 8, 134317: 5, 134329: 2, 134360: 6, 134386: 2, 134403: 3, 134449: 5, 134453: 5, 134454: 6, 134547: 7, 134554: 6, 134565: 9, 134574: 5, 134680: 8, 134733: 5, 134754: 24, 134822: 14, 134857: 1, 134872: 10, 134901: 3, 134906: 5, 134923: 5, 134941: 2, 134980: 3, 134990: 1, 135009: 14, 135011: 4, 135045: 3, 135073: 1, 135143: 17, 135213: 5, 135231: 4, 135237: 8, 135253: 7, 135306: 10, 135330: 13, 135390: 23, 206082: 4, 301205: 7, 301208: 4, 301240: 6, 301361: 9, 301382: 6, 301383: 13, 301386: 10, 301388: 8, 301393: 6, 301645: 13, 301658: 2, 301895: 2, 301896: 15, 302516: 4, 302517: 5, 302571: 12, 302854: 4, 302923: 4, 303255: 6, 303652: 3, 303676: 9, 303710: 3, 303712: 20, 304060: 6, 304363: 8, 304364: 4, 304365: 1, 304366: 4, 304368: 8, 304369: 6, 304370: 1, 

In [15]:
import itertools
# 从 selected_sites 中获取 SITE_ID、LATITUDE 和 LONGITUDE 列
site_ids = selected_sites['SITE_ID'].tolist()
latitudes = selected_sites['LATITUDE'].tolist()
longitudes = selected_sites['LONGITUDE'].tolist()

# 计算站点两两之间的距离，并将结果保存在 distances 字典中
distances = {}
for site1, site2 in itertools.combinations(range(len(site_ids)), 2):
    distance = geodesic((latitudes[site1], longitudes[site1]), (latitudes[site2], longitudes[site2])).km
    distances[(site_ids[site1], site_ids[site2])] = distance

# 输出 distances 字典
print(distances)

{(101381, 301896): 0.4291278408974207, (101381, 301645): 0.8162588393622291, (101381, 9014611): 1.157876221659297, (101381, 305394): 0.7878391420528229, (101381, 303676): 0.699994645182666, (101381, 9002262): 0.8113502598450336, (101381, 304363): 1.7542250128787402, (101381, 304369): 0.6354464707373141, (101381, 304060): 0.25523563501983515, (101381, 134923): 1.412857232921497, (101381, 134733): 0.6156812861890808, (101381, 135213): 0.24824015229749105, (101381, 50226): 0.41799074956300003, (101381, 302854): 0.1904045326697398, (101381, 11601): 1.2819504114705667, (101381, 34603): 1.18461715137242, (101381, 134941): 1.0849714260719745, (101381, 47316): 0.43264835979140986, (101381, 51590): 1.0356439635680994, (101381, 134857): 1.7874531062632382, (301896, 301645): 0.7732344159237403, (301896, 9014611): 0.8138273893527105, (301896, 305394): 1.0223116343388046, (301896, 303676): 0.771033561338839, (301896, 9002262): 0.9549879779526691, (301896, 304363): 1.4564466960603584, (301896, 30436

In [16]:
selected_site_ids

[101381,
 134754,
 135390,
 303712,
 135143,
 130005,
 301896,
 134822,
 135009,
 9013096,
 9009845,
 130439,
 301645,
 301383,
 135330,
 9001289,
 461423,
 302571,
 9014611,
 11591]

In [19]:
import pickle

# 保存 selected_site_ids
with open('../../exp/服务发现/selected_site_ids.pickle', 'wb') as f:
    pickle.dump(selected_site_ids, f)

# 保存 distances
with open('../../exp/服务发现/distances.pickle', 'wb') as f:
    pickle.dump(distances, f)


In [1]:
user_counts

NameError: name 'user_counts' is not defined