In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# === 1. 基础数据 ===
# 类似地点的采样数据（被采样的总人群是人口的2%）
sampled_traffic_location1 = np.array([20, 25, 22, 18, 30])  # 地点1的采样人流量
sampled_traffic_location2 = np.array([15, 18, 20, 17, 22])  # 地点2的采样人流量
sampling_rate = 0.02  # 采样率（2%）

# 采样误差范围（假设±5%）
sampling_error_range = 0.05

# === 2. 推断总人流量 ===
# 推断总人流量（考虑采样误差）
def estimate_total_traffic(sampled_traffic, sampling_rate, error_range):
    estimated_traffic = []
    for value in sampled_traffic:
        # 引入误差
        error = np.random.uniform(-error_range, error_range)
        adjusted_rate = sampling_rate * (1 + error)
        estimated_traffic.append(value / adjusted_rate)
    return np.array(estimated_traffic)

# 推断地点1和地点2的总人流量
total_traffic_location1 = estimate_total_traffic(
    sampled_traffic_location1, sampling_rate, sampling_error_range
)
total_traffic_location2 = estimate_total_traffic(
    sampled_traffic_location2, sampling_rate, sampling_error_range
)

# === 3. 计算总流量特征 ===
# 汇总地点1和地点2的总人流量
total_traffic = np.concatenate([total_traffic_location1, total_traffic_location2])

# 显示结果统计
print("地点1的推断总人流量:", total_traffic_location1)
print("地点2的推断总人流量:", total_traffic_location2)
print("整体统计描述:")
print(pd.DataFrame(total_traffic, columns=["Estimated Total Traffic"]).describe())

# === 4. 可视化 ===
# 推断流量分布
plt.hist(total_traffic, bins=20, alpha=0.7, label="Estimated Total Traffic")
plt.axvline(np.mean(total_traffic), color='red', linestyle='dashed', linewidth=1, label="Mean Traffic")
plt.xlabel('Estimated Total Traffic')
plt.ylabel('Frequency')
plt.title('Distribution of Estimated Total Traffic')
plt.legend()
plt.show()


In [None]:
# consider baseline + Esports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# === 1. 数据输入 ===
# 类似地点的采样数据（general流量）
sampled_traffic_location1 = np.array([20, 25, 22, 18, 30])
sampled_traffic_location2 = np.array([15, 18, 20, 17, 22])
sampling_rate = 0.02  # 采样率

# 活动相关性
activity_interest_min = 0.04  # 活动相关性最低值
activity_interest_max = 0.05  # 活动相关性最高值

# 预测地点的相关性权重
location_specific_weight = 0.7
general_weight = 0.3

# 模拟次数
n_simulations = 1000

# === 2. 推断类似地点的总人流量 ===
def estimate_total_traffic(sampled_traffic, sampling_rate):
    return sampled_traffic / sampling_rate

# 推断总人流量
total_traffic_location1 = estimate_total_traffic(sampled_traffic_location1, sampling_rate)
total_traffic_location2 = estimate_total_traffic(sampled_traffic_location2, sampling_rate)

# 综合类似地点流量（加权平均）
general_baseline_traffic = (
    np.mean(total_traffic_location1) * general_weight +
    np.mean(total_traffic_location2) * general_weight
)

# === 3. 模拟预测地点的流量 ===
# 基准人流量
baseline_traffic = general_baseline_traffic * (1 - location_specific_weight) 

# 活动相关调整因子
activity_adjustment = np.random.uniform(
    low=activity_interest_min, 
    high=activity_interest_max, 
    size=n_simulations
)

# 预测地点的总人流量
predicted_traffic = baseline_traffic * (1 + activity_adjustment)

# === 4. 结果分析 ===
# 汇总结果
results = pd.DataFrame({
    'Baseline Traffic': np.full(n_simulations, baseline_traffic),
    'Activity Adjustment': activity_adjustment,
    'Predicted Traffic': predicted_traffic
})

print("预测地点流量统计描述:")
print(results.describe())

# === 5. 可视化 ===
# 总人流量分布
plt.hist(results['Predicted Traffic'], bins=30, alpha=0.7, label='Predicted Traffic')
plt.axvline(results['Predicted Traffic'].mean(), color='red', linestyle='dashed', linewidth=1, label='Mean Traffic')
plt.xlabel('Predicted Traffic')
plt.ylabel('Frequency')
plt.title('Predicted Traffic Distribution')
plt.legend()
plt.show()


In [None]:
# consider one record is not sampled

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# === 1. 数据输入 ===
# 采样地点的采样数据（general流量）
sampled_traffic_location = np.array([20, 25, 22, 18, 30])  # 被采样的流量
sampling_rate = 0.02  # 采样率

# 非采样地点的全量流量（已知实际流量）
non_sampled_traffic_location = np.array([1000, 1200, 1100, 1050, 1300])

# 活动相关性
activity_interest_min = 0.04  # 活动相关性最低值
activity_interest_max = 0.05  # 活动相关性最高值

# 预测地点的相关性权重
location_specific_weight = 0.7  # 活动相关性权重
general_weight = 0.3  # 类似地点的基准权重

# 模拟天数
n_days = 30

# === 2. 推断采样地点的总人流量 ===
def estimate_total_traffic(sampled_traffic, sampling_rate):
    return sampled_traffic / sampling_rate

# 推断采样地点的总人流量
total_traffic_sampled_location = estimate_total_traffic(sampled_traffic_location, sampling_rate)

# 非采样地点流量直接使用全量数据
total_traffic_non_sampled_location = non_sampled_traffic_location

# 综合基准流量（加权平均）
general_baseline_traffic = (
    np.mean(total_traffic_sampled_location) * general_weight +
    np.mean(total_traffic_non_sampled_location) * general_weight
)

# === 3. 模拟每日流量 ===
# 活动调整因子
activity_adjustment = np.random.uniform(
    low=activity_interest_min, 
    high=activity_interest_max, 
    size=n_days
)

# 基准流量（预测地点）
baseline_traffic = general_baseline_traffic * (1 - location_specific_weight)

# 计算每日流量
daily_traffic = baseline_traffic * (1 + activity_adjustment)

# === 4. 结果分析 ===
# 将数据整理为 DataFrame
results = pd.DataFrame({
    'Day': np.arange(1, n_days + 1),
    'Baseline Traffic': np.full(n_days, baseline_traffic),
    'Activity Adjustment': activity_adjustment,
    'Daily Traffic': daily_traffic
})

print("每日流量统计:")
print(results)

# === 5. 可视化 ===
# 每日流量随时间变化
plt.plot(results['Day'], results['Daily Traffic'], marker='o', label='Daily Traffic')
plt.xlabel('Day')
plt.ylabel('Traffic')
plt.title('Daily Traffic with Adjustments')
plt.axhline(y=baseline_traffic, color='red', linestyle='--', label='Baseline Traffic')
plt.legend()
plt.show()


In [None]:
# Case: 只计算单日总人流量的可能范围

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# === 1. 数据输入 ===
# 采样地点的采样数据
sampled_traffic_location = np.array([20, 25, 22, 18, 30])  # 被采样的流量
sampling_rate = 0.02  # 采样率

# 非采样地点的全量流量（已知实际流量）
non_sampled_traffic_location = np.array([1000, 1200, 1100, 1050, 1300])

# 活动相关性
activity_interest_min = 0.04  # 活动相关性最低值
activity_interest_max = 0.05  # 活动相关性最高值

# 通知影响（活动发生时的额外影响）
notification_impact_min = 0.1
notification_impact_max = 0.15

# 模拟次数
n_simulations = 1000

# === 2. 基准流量计算 ===
# 推断采样地点的总人流量
total_traffic_sampled_location = sampled_traffic_location / sampling_rate

# 非采样地点的总人流量直接使用记录值
total_traffic_non_sampled_location = non_sampled_traffic_location

# 基准流量的均值（两地点加权平均）
general_weight = 0.5  # 权重可以调整
baseline_traffic = (
    np.mean(total_traffic_sampled_location) * general_weight +
    np.mean(total_traffic_non_sampled_location) * general_weight
)

# === 3. 模拟活动和通知影响 ===
# 活动调整因子
activity_adjustment = np.random.uniform(
    low=activity_interest_min, 
    high=activity_interest_max, 
    size=n_simulations
)

# 通知影响因子
notification_impact = np.random.uniform(
    low=notification_impact_min, 
    high=notification_impact_max, 
    size=n_simulations
)

# === 4. 计算单日总流量 ===
# 总流量公式
total_traffic = baseline_traffic * (1 + activity_adjustment + notification_impact)

# === 5. 结果分析 ===
# 汇总结果
results = pd.DataFrame({
    'Activity Adjustment': activity_adjustment,
    'Notification Impact': notification_impact,
    'Total Traffic': total_traffic
})

print("单日总流量统计描述:")
print(results.describe())

# === 6. 可视化 ===
# 总流量分布
plt.hist(results['Total Traffic'], bins=50, alpha=0.7, label='Total Traffic')
plt.axvline(results['Total Traffic'].mean(), color='red', linestyle='dashed', linewidth=1, label='Mean Traffic')
plt.xlabel('Total Traffic')
plt.ylabel('Frequency')
plt.title('Single-Day Total Traffic Distribution')
plt.legend()
plt.show()
