In [8]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

plt.rcParams['figure.dpi'] = 300
import numpy as np

def calculate_latency_mean_std_from(region1, region2, latency_stats):
    """
    Simulate latency between two regions using normal distribution.
    
    Parameters:
        region1 (str): First region name
        region2 (str): Second region name
        latency_stats (dict): Dictionary of latency DataFrames by percentiles and average
    
    Returns:
        float: Simulated latency value
    """
    # 평균 및 표준 편차 추정
    mean = latency_stats['50th'].loc[region1, region2]
    std = (latency_stats['75th'].loc[region1, region2] - latency_stats['25th'].loc[region1, region2]) / 2
    
    # 정규 분포에서 지연 시간 샘플링
    # simulated_latency = np.random.normal(mean, std)
    return {
        "mean": mean,
        "std": std,
    }


def extract_region_name(full_name):
    """
    Extract the name within parentheses from the full region name.
    
    Parameters:
        full_name (str): Full name of the region
    
    Returns:
        str: Extracted short name of the region
    """
    match = re.search(r'\((.*?)\)', full_name)
    if match:
        return match.group(1)  # 괄호 안의 문자열 반환
    return full_name  # 매치가 없으면 원본 이름 반환

# 파일 경로 설정
file_path = 'data/AWS Cross Region Latency 240419.xlsx'
# 선택한 리전 목록
selected_regions = [
    'Seoul', 'Sydney', 'Hong Kong', 
    'Tokyo', 'Frankfurt', 'N. California', 
    'London', 'Osaka', 'Singapore', 
    'N. Virginia', "Oregon", 'Ohio',
    'Stockholm', 'Paris', 'Ireland',
    'Mumbai','Central', 'São Paulo',
    'Bahrain','Milan', 'Cape Town'
]

# 시트 이름 및 필요한 데이터를 로드
percentiles = ['10th', '25th', '50th', '75th', '90th', '99th', 'Avg']

#
# 데이터 프레임을 저장할 딕셔너리
latency_data = {}
import re  # 정규 표현식 라이브러리

# 각 백분위수별로 데이터 로드
for percentile in percentiles:
    df = pd.read_excel(file_path, sheet_name=percentile, index_col=0)
    # 필터링된 데이터만 추출
    filtered_index = [idx for idx in df.index if any(region in idx for region in selected_regions)]
    filtered_columns = [col for col in df.columns if any(region in col for region in selected_regions)]
    # 데이터 프레임 생성
    filtered_df = df.loc[filtered_index, filtered_columns]
    # 인덱스 및 컬럼 이름 정리
    filtered_df.index = [extract_region_name(idx) for idx in filtered_df.index]
    filtered_df.columns = [extract_region_name(col) for col in filtered_df.columns]
    # 저장
    latency_data[percentile] = filtered_df
    # print(filtered_df)

# print(latency_data)
# 데이터 프레임 예시 확인 (50th 백분위)
# latency_data['50th'].head()
# 예시: Tokyo와 Seoul 간의 지연 시간 시뮬레이션
simulated_parameters = []

for src in selected_regions:
    for dst in selected_regions:
        # if src == dst:
        #     continue
        param = calculate_latency_mean_std_from(src, dst, latency_data)
        simulated_parameters.append({
            "src": src,
            "dst": dst,
            "mean": param["mean"],
            "std": param["std"],
        })

# print(simulated_parameters)
        # simulated_parameters 리스트의 각 요소를 한 줄에 출력
csv_file_path="data/AWSCrossRegionLatencyMatrixParams_240419.csv"
df_simulated = pd.DataFrame(simulated_parameters)
df_simulated['std'] = df_simulated['std'].map(lambda x: f"{x:.4f}")

# [{'src': 'Seoul', 'dst': 'Sydney', 'mean': 146.99, 'std': 2.680000000000007}, {'src': 'Seoul', 'dst': 'Hong Kong', 'mean': 38.16, 'std': 0.9800000000000004}, {'src': 'Seoul', 'dst': 'Tokyo', 'mean': 36.02, 'std': 1.5949999999999989}, {'src': 'Seoul', 'dst': 'Frankfurt', 'mean': 235.02, 'std': 2.8799999999999955}, {'src': 'Seoul', 'dst': 'N. California', 'mean': 132.62, 'std': 2.1199999999999903}, {'src': 'Seoul', 'dst': 'London', 'mean': 248.68, 'std': 1.4500000000000028}, {'src': 'Seoul', 'dst': 'Osaka', 'mean': 26.09, 'std': 1.6349999999999998}, {'src': 'Seoul', 'dst': 'Singapore', 'mean': 84.45, 'std': 2.134999999999998}, {'src': 'Sydney', 'dst': 'Seoul', 'mean': 145.41, 'std': 1.855000000000004}, {'src': 'Sydney', 'dst': 'Hong Kong', 'mean': 136.52, 'std': 2.450000000000003}, {'src': 'Sydney', 'dst': 'Tokyo', 'mean': 111.1, 'std': 2.6000000000000014}, {'src': 'Sydney', 'dst': 'Frankfurt', 'mean': 252.35, 'std': 1.5300000000000011}, {'src': 'Sydney', 'dst': 'N. California', 'mean': 139.65, 'std': 1.710000000000008}, {'src': 'Sydney', 'dst': 'London', 'mean': 268.97, 'std': 1.414999999999992}, {'src': 'Sydney', 'dst': 'Osaka', 'mean': 119.84, 'std': 1.5549999999999997}, {'src': 'Sydney', 'dst': 'Singapore', 'mean': 94.51, 'std': 1.6950000000000003}, {'src': 'Hong Kong', 'dst': 'Seoul', 'mean': 38.25, 'std': 0.5399999999999991}, {'src': 'Hong Kong', 'dst': 'Sydney', 'mean': 136.01, 'std': 1.2750000000000057}, {'src': 'Hong Kong', 'dst': 'Tokyo', 'mean': 55.7, 'std': 1.6799999999999997}, {'src': 'Hong Kong', 'dst': 'Frankfurt', 'mean': 200.51, 'std': 1.6300000000000097}, {'src': 'Hong Kong', 'dst': 'N. California', 'mean': 156.4, 'std': 0.29999999999999716}, {'src': 'Hong Kong', 'dst': 'London', 'mean': 214.79, 'std': 1.2349999999999994}, {'src': 'Hong Kong', 'dst': 'Osaka', 'mean': 49.27, 'std': 0.5549999999999997}, {'src': 'Hong Kong', 'dst': 'Singapore', 'mean': 44.25, 'std': 1.7149999999999999}, {'src': 'Tokyo', 'dst': 'Seoul', 'mean': 35.83, 'std': 0.6099999999999994}, {'src': 'Tokyo', 'dst': 'Sydney', 'mean': 109.39, 'std': 1.980000000000004}, {'src': 'Tokyo', 'dst': 'Hong Kong', 'mean': 55.42, 'std': 0.7049999999999983}, {'src': 'Tokyo', 'dst': 'Frankfurt', 'mean': 226.44, 'std': 1.0899999999999892}, {'src': 'Tokyo', 'dst': 'N. California', 'mean': 109.15, 'std': 0.9849999999999994}, {'src': 'Tokyo', 'dst': 'London', 'mean': 226.67, 'std': 1.9150000000000063}, {'src': 'Tokyo', 'dst': 'Osaka', 'mean': 10.67, 'std': 1.1100000000000003}, {'src': 'Tokyo', 'dst': 'Singapore', 'mean': 70.63, 'std': 0.6199999999999974}, {'src': 'Frankfurt', 'dst': 'Seoul', 'mean': 235.2, 'std': 2.4749999999999943}, {'src': 'Frankfurt', 'dst': 'Sydney', 'mean': 250.33, 'std': 1.9849999999999994}, {'src': 'Frankfurt', 'dst': 'Hong Kong', 'mean': 199.76, 'std': 1.4300000000000068}, {'src': 'Frankfurt', 'dst': 'Tokyo', 'mean': 226.66, 'std': 1.6200000000000045}, {'src': 'Frankfurt', 'dst': 'N. California', 'mean': 151.88, 'std': 0.8100000000000023}, {'src': 'Frankfurt', 'dst': 'London', 'mean': 18.23, 'std': 0.6799999999999997}, {'src': 'Frankfurt', 'dst': 'Osaka', 'mean': 234.47, 'std': 0.9650000000000034}, {'src': 'Frankfurt', 'dst': 'Singapore', 'mean': 166.14, 'std': 3.75}, {'src': 'N. California', 'dst': 'Seoul', 'mean': 132.96, 'std': 1.5049999999999955}, {'src': 'N. California', 'dst': 'Sydney', 'mean': 139.08, 'std': 0.3050000000000068}, {'src': 'N. California', 'dst': 'Hong Kong', 'mean': 155.91, 'std': 0.5600000000000023}, {'src': 'N. California', 'dst': 'Tokyo', 'mean': 108.24, 'std': 0.6749999999999972}, {'src': 'N. California', 'dst': 'Frankfurt', 'mean': 151.19, 'std': 0.5350000000000108}, {'src': 'N. California', 'dst': 'London', 'mean': 148.23, 'std': 1.1800000000000068}, {'src': 'N. California', 'dst': 'Osaka', 'mean': 109.24, 'std': 0.46999999999999886}, {'src': 'N. California', 'dst': 'Singapore', 'mean': 170.2, 'std': 0.75}, {'src': 'London', 'dst': 'Seoul', 'mean': 247.35, 'std': 1.7549999999999955}, {'src': 'London', 'dst': 'Sydney', 'mean': 266.26, 'std': 1.2800000000000011}, {'src': 'London', 'dst': 'Hong Kong', 'mean': 213.82, 'std': 1.3400000000000034}, {'src': 'London', 'dst': 'Tokyo', 'mean': 225.17, 'std': 1.4200000000000017}, {'src': 'London', 'dst': 'Frankfurt', 'mean': 18.33, 'std': 1.004999999999999}, {'src': 'London', 'dst': 'N. California', 'mean': 147.48, 'std': 0.9099999999999966}, {'src': 'London', 'dst': 'Osaka', 'mean': 225.1, 'std': 0.7999999999999972}, {'src': 'London', 'dst': 'Singapore', 'mean': 174.37, 'std': 1.0649999999999977}, {'src': 'Osaka', 'dst': 'Seoul', 'mean': 24.98, 'std': 2.209999999999999}, {'src': 'Osaka', 'dst': 'Sydney', 'mean': 119.06, 'std': 0.4550000000000054}, {'src': 'Osaka', 'dst': 'Hong Kong', 'mean': 49.14, 'std': 0.3550000000000004}, {'src': 'Osaka', 'dst': 'Tokyo', 'mean': 9.81, 'std': 0.7949999999999999}, {'src': 'Osaka', 'dst': 'Frankfurt', 'mean': 235.35, 'std': 0.8500000000000085}, {'src': 'Osaka', 'dst': 'N. California', 'mean': 110.08, 'std': 0.7700000000000031}, {'src': 'Osaka', 'dst': 'London', 'mean': 225.44, 'std': 1.835000000000008}, {'src': 'Osaka', 'dst': 'Singapore', 'mean': 79.0, 'std': 0.6299999999999955}, {'src': 'Singapore', 'dst': 'Seoul', 'mean': 84.92, 'std': 1.8399999999999963}, {'src': 'Singapore', 'dst': 'Sydney', 'mean': 94.93, 'std': 1.3800000000000026}, {'src': 'Singapore', 'dst': 'Hong Kong', 'mean': 43.35, 'std': 1.8300000000000018}, {'src': 'Singapore', 'dst': 'Tokyo', 'mean': 70.58, 'std': 1.1949999999999932}, {'src': 'Singapore', 'dst': 'Frankfurt', 'mean': 163.14, 'std': 2.2700000000000102}, {'src': 'Singapore', 'dst': 'N. California', 'mean': 170.18, 'std': 0.5750000000000028}, {'src': 'Singapore', 'dst': 'London', 'mean': 174.92, 'std': 1.125}, {'src': 'Singapore', 'dst': 'Osaka', 'mean': 78.86, 'std': 0.9549999999999983}]
df_simulated.to_csv(csv_file_path, index=False)
print(df_simulated)  # 처음 몇 행만 출력하여 구조 확인


# [print(f"src: {item['src']}, dst: {item['dst']}, mean: {item['mean']:.2f}, std: {item['std']:.2f}") for item in simulated_parameters]

# print(f"Simulated parameters between Tokyo and Seoul: {simulated_parameters} ms")





           src        dst    mean     std
0        Seoul      Seoul    3.31  1.0400
1        Seoul     Sydney  146.99  2.6800
2        Seoul  Hong Kong   38.16  0.9800
3        Seoul      Tokyo   36.02  1.5950
4        Seoul  Frankfurt  235.02  2.8800
..         ...        ...     ...     ...
436  Cape Town    Central  226.29  1.7500
437  Cape Town  São Paulo  336.42  1.5200
438  Cape Town    Bahrain  146.53  1.2600
439  Cape Town      Milan  145.61  0.4750
440  Cape Town  Cape Town    8.03  1.9750

[441 rows x 4 columns]
