In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# === 1. 基础数据 ===
# 类似地点的采样数据（被采样的总人群是人口的2%）
sampled_traffic_location1 = np.array([20, 25, 22, 18, 30])  # 地点1的采样人流量
sampled_traffic_location2 = np.array([15, 18, 20, 17, 22])  # 地点2的采样人流量
sampling_rate = 0.02  # 采样率（2%）

# 采样误差范围（假设±5%）
sampling_error_range = 0.05

# === 2. 推断总人流量 ===
# 推断总人流量（考虑采样误差）
def estimate_total_traffic(sampled_traffic, sampling_rate, error_range):
    estimated_traffic = []
    for value in sampled_traffic:
        # 引入误差
        error = np.random.uniform(-error_range, error_range)
        adjusted_rate = sampling_rate * (1 + error)
        estimated_traffic.append(value / adjusted_rate)
    return np.array(estimated_traffic)

# 推断地点1和地点2的总人流量
total_traffic_location1 = estimate_total_traffic(
    sampled_traffic_location1, sampling_rate, sampling_error_range
)
total_traffic_location2 = estimate_total_traffic(
    sampled_traffic_location2, sampling_rate, sampling_error_range
)

# === 3. 计算总流量特征 ===
# 汇总地点1和地点2的总人流量
total_traffic = np.concatenate([total_traffic_location1, total_traffic_location2])

# 显示结果统计
print("地点1的推断总人流量:", total_traffic_location1)
print("地点2的推断总人流量:", total_traffic_location2)
print("整体统计描述:")
print(pd.DataFrame(total_traffic, columns=["Estimated Total Traffic"]).describe())

# === 4. 可视化 ===
# 推断流量分布
plt.hist(total_traffic, bins=20, alpha=0.7, label="Estimated Total Traffic")
plt.axvline(np.mean(total_traffic), color='red', linestyle='dashed', linewidth=1, label="Mean Traffic")
plt.xlabel('Estimated Total Traffic')
plt.ylabel('Frequency')
plt.title('Distribution of Estimated Total Traffic')
plt.legend()
plt.show()
