In [1]:
import random
import pandas as pd
import numpy as np
import cvxpy as cp
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
from tqdm import tqdm
import os
matplotlib.rc('font', family='BIZ UDGothic')

(CVXPY) Sep 10 04:05:22 AM: Encountered unexpected exception importing solver OSQP:
ImportError('DLL load failed while importing qdldl: 指定されたモジュールが見つかりません。')


In [2]:
"""pathの定義"""
DIR_HOURLY = '../data/2020_jikoku_danjonenso.csv'
DIR_FULL_DAY = '../data/2020_4shihyo_danjonenso.csv'
DIR_GENRE = '../data/nhk_genre.csv'

In [3]:
def calc_list_diff(list1:list, list2:list):
    # リストの長さが異なる場合のエラーチェック
    if len(list1) != len(list2):
        raise ValueError("リストの長さが異なります。")
    
    # 各要素の差分を計算して新しいリストを生成
    difference = [a - b for a, b in zip(list1, list2)]
    return difference

# not P_iの算出による P_iの算出
def get_P_i(hourly_table:pd.DataFrame,Transition_rates:list,activity_ordering:list):
    t_0_table = sort_df_by_list(hourly_table[(hourly_table["Time"].dt.hour == 0) & (hourly_table["Time"].dt.minute == 0)],"Activity",activity_ordering).copy()
    initial_rate = np.array(t_0_table["Rate"])

    # 正規化
    initial_rate = initial_rate / np.sum(initial_rate)

    P_i_list = []
    for i in range(len(activity_ordering)):

        # 行列の加工
        n = len(initial_rate)
        one_vector = np.ones(n)
        modified_transition_rates = []

        for A in Transition_rates:
            A_i = A.copy()
            A_i[i, :] = 0  # 行 i を全て 0 に置き換える
            modified_transition_rates.append(A_i)

        # 初期行為者率ベクトルの加工
        modified_initial_rate = initial_rate.copy()
        modified_initial_rate[i] = 0

        # 最終的な行為者率ベクトルの計算
        final_rate = modified_initial_rate

        for A_i in modified_transition_rates:
            final_rate = A_i @ final_rate

        # 行為 i を一度も行わない確率の計算
        P_not_i = one_vector @ final_rate

        P_i = 1-P_not_i

        # 独自定義のラベルを適用しているため、重複集計の関係上どうしても例外が発生する。
        if P_i < 0:
            P_i = 0

        P_i_list.append(P_i)

    return P_i_list

# データフレームを参照リストに基づいてソートするヘルパー関数
def sort_df_by_list(df, column, reference_list):
    return df.set_index(column).reindex(reference_list).reset_index()

# 遷移率を計算する関数
def calc_transition_rates(beta_i: np.ndarray, time_list: list, time_table: pd.DataFrame, activity_list: list):
    transition_rates = []
    middle = activity_list  # 活動のリストを基準として使用
    
    for i in range(len(time_list)):
        time_t = time_list[i]
        time_t1 = time_list[(i+1) % len(time_list)]  # 次の時間帯
        
        # 両方の時間帯の行動率を取得
        y_t = np.array(sort_df_by_list(time_table[(time_table["Time"].dt.hour == time_t.hour) & (time_table["Time"].dt.minute == time_t.minute) ], "Activity", middle)["Rate"])
        y_t1 = np.array(sort_df_by_list(time_table[(time_table["Time"].dt.hour == time_t1.hour) & (time_table["Time"].dt.minute == time_t1.minute) ], "Activity", middle)["Rate"])
        
        # 変数の定義
        no_of_elem = len(middle)
        a = cp.Variable((no_of_elem, no_of_elem), nonneg=True)  # 非負の要素を持つ遷移行列

        # 目的関数と制約条件の定義
        delta = np.eye(no_of_elem)  # クロネッカーのデルタ
        # 目的関数: (1 - beta_i * delta) で修正された、二乗差の合計を最小化
        objective = cp.Minimize(cp.sum(cp.multiply(1 - beta_i * delta, cp.square(a))))

        # 行の和が1になる制約に変更
        constraints = [
            cp.sum(a, axis=1) == 1,  # 遷移行列の各行の和が1になる制約
            a >= 0,  # 非負制約
        ]

        # 行動率を正規化
        y_t = y_t / np.sum(y_t)
        y_t1 = y_t1 / np.sum(y_t1)

        # 次の時間帯の行動率と遷移行列の適用が一致する制約
        constraints.append(y_t1 - a.T @ y_t == 0)

        # 問題を定義する
        problem = cp.Problem(objective, constraints)

        # ソルバーを使用して問題を解く（別のソルバーを指定して解く）
        problem.solve(solver=cp.SCS, verbose=False)

        # 得られた遷移行列を格納
        transition_rates.append(a.value)

    return np.array(transition_rates)

def roulette_wheel_selection(weights:list,labels:list):
    """
    Performs roulette wheel selection on a list of weights.
    
    Args:
    weights (list of float): The weights or probabilities for each item.
    
    Returns:
    int: The index of the selected item.
    """
    if (len(weights) != len(labels)):
        print("weights and labels has different length @roulette_wheel_selection")
        os._exit(1)
    # Calculate the cumulative sum of weights
    cumulative_sum = [sum(weights[:i+1]) for i in range(len(weights))]
    total_sum = cumulative_sum[-1]
    
    # Generate a random number in the range [0, total_sum)
    random_num = random.uniform(0, total_sum)
    
    # Find the index where the random number would fit in the cumulative sum
    for i, cum_sum in enumerate(cumulative_sum):
        if random_num < cum_sum:
            return i, labels[i]

In [4]:
# 独自定義のラベルを適用するためにまず辞書を作る
small = [
    "睡眠", "食事", "身のまわりの用事", "療養・静養", "仕事", "仕事のつきあい", 
    "授業・学内の活動", "学校外の学習", "炊事・掃除・洗濯", "買い物", 
    "子どもの世話", "家庭雑事", "通勤", "通学", "社会参加", "会話・交際", 
    "スポーツ", "行楽・散策", "趣味・娯楽・教養(インターネット除く)", 
    "趣味・娯楽・教養のインターネット(動画除く)", "インターネット動画", 
    "テレビ", "録画番組・DVD", "ラジオ", "新聞", "雑誌・マンガ・本", 
    "音楽", "休息", "その他", "不明"
]
label_master = pd.read_csv(DIR_GENRE)
label_dict = {}
for k,v in zip(label_master["小分類"],label_master["モデル用定義"]):
    label_dict[k]=v

Activity_Ordering = list(set(label_dict.values()))

In [5]:
# 時間帯ごとの行為者率テーブルの処理
data_hourly = pd.read_csv(DIR_HOURLY)
data_hourly = data_hourly.set_axis(["Day","Group","Activity","Time","Rate"],axis=1)
data_hourly["Time"] = pd.to_datetime(data_hourly["Time"], format="%H:%M")
data_hourly = data_hourly.query("Group == '男１０代' & Activity in @small & Day == '平日'")
data_hourly["Activity"] = data_hourly["Activity"].map(label_dict) # 独自定義のラベル適用
data_hourly_cleaned = data_hourly[["Activity","Time","Rate"]].groupby(["Time","Activity"],as_index=False).sum().copy()
data_hourly_cleaned = data_hourly_cleaned.sort_values(by="Time")
# 一日の行為者率のテーブルの処理
data_full_day = pd.read_csv(DIR_FULL_DAY)
data_full_day = data_full_day.set_axis(["Day","Group","Activity","DailyRate","NetAverageTime","GrossAverageTime","Gross_SD"],axis=1)
data_full_day = data_full_day.query("Group == '男１０代' & Activity in @small & Day == '平日'")
data_full_day["Activity"] = data_full_day["Activity"].map(label_dict) # 独自定義のラベル適用
data_full_day_cleaned = data_full_day[["Activity","DailyRate"]].groupby(["Activity"],as_index=False).sum().copy()
data_full_day_cleaned = sort_df_by_list(data_full_day_cleaned,"Activity",Activity_Ordering)
Orig_P_i = (data_full_day_cleaned["DailyRate"]/100).to_list()
# アクティビティと時間のリストを取得
time_list_hourly = data_hourly_cleaned['Time'].unique()

In [6]:
# 遷移確率を計算
transition_rates_hourly = calc_transition_rates(np.ones(len(Activity_Ordering)), time_list_hourly, data_hourly_cleaned, Activity_Ordering)
# 原因は不明だが、一部1超過する確率が含まれる 故に、1を超過したものは1に修正する。
transition_rates_hourly[transition_rates_hourly > 1] = 1
# 遷移確率を表示
print(transition_rates_hourly)



[[[9.59225217e-01 5.61496592e-08 0.00000000e+00 ... 5.61496592e-08
   5.61496592e-08 5.61496592e-08]
  [7.82674829e-08 9.99998435e-01 7.82674829e-08 ... 7.82674829e-08
   7.82674829e-08 7.82674829e-08]
  [3.99597136e-03 4.54675799e-08 9.66653580e-01 ... 4.54675799e-08
   4.54675799e-08 4.54675799e-08]
  ...
  [7.82674830e-08 7.82674830e-08 7.82674830e-08 ... 9.99998435e-01
   7.82674830e-08 7.82674830e-08]
  [7.82674830e-08 7.82674830e-08 7.82674830e-08 ... 7.82674830e-08
   9.99998435e-01 7.82674830e-08]
  [7.82674830e-08 7.82674830e-08 7.82674830e-08 ... 7.82674830e-08
   7.82674830e-08 9.99998435e-01]]

 [[8.82112775e-01 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
   0.00000000e+00 0.00000000e+00]
  [8.85070583e-09 9.99999823e-01 8.85070582e-09 ... 8.85070582e-09
   8.85070582e-09 8.85070582e-09]
  [1.05580881e-01 9.63989425e-08 4.05410311e-01 ... 9.63989425e-08
   9.63989425e-08 9.63989425e-08]
  ...
  [8.85070837e-09 8.85070836e-09 8.85070836e-09 ... 9.99999823e-01
   8.85070

In [None]:
# """SA法によるβのパラメータフィッティング(事前知識加味)"""
# def objective(beta_i:np.ndarray, time_list:list, time_table:pd.DataFrame, activity_list:list,orig_P_i:list):
#     transition_rates = calc_transition_rates(beta_i, time_list, time_table, activity_list)
#     calclated_P_i = get_P_i(time_table,transition_rates,activity_list) # 一日のユニークな行為者率の算出
#     diff_1 = [e for e in calc_list_diff(orig_P_i,calclated_P_i)]
#     diff_2 = sum([e**2 for e in diff_1])
#     return diff_2,diff_1

# def simulated_annealing(objective, bounds, n_iterations, step_size, temp, time_list, time_table, activity_ordering, orig_P_i):
#     # 初期解を生成
#     initial_beta = np.random.uniform(bounds[:, 0], bounds[:, 1])
#     # 初期評価値を計算
#     best_eval, best_eval_raw = objective(initial_beta,time_list,time_table,activity_ordering,orig_P_i)
#     curr, curr_eval, curr_eval_raw = initial_beta, best_eval, best_eval_raw
#     print(f"Starting Simulated Annealing >>> Initial Evaluation : {best_eval}")
    
#     for i in tqdm(range(n_iterations)):
#         # 新しい候補解を生成
#         candidate_R = np.random.uniform(0, step_size, len(activity_ordering))
#         """候補を事前知識から補正(βは大きいほど同行為の継続確率が大きくなる)"""
#         candidate_updated = []
#         for curr_,candidate_r,eval_raw in zip(list(curr),list(candidate_R),curr_eval_raw):
#             if (eval_raw > 0): # 計算値が実データを上回る場合、βiを大きく設定する
#                 if (curr_ == 0):
#                     additional = np.random.uniform(0,1)
#                 else:
#                     inverse = 1/curr_
#                     additional = np.random.uniform(1,inverse) * curr_
#                 new_candidate = curr_ + (additional*candidate_r)
#             elif (eval_raw < 0): # 計算値が実データを下回る場合、βiを小さく設定する。
#                 new_candidate = np.random.uniform(0,1)*candidate_r*curr_
#             else:
#                 new_candidate = curr_ # 現状値を維持
#             candidate_updated.append(new_candidate)
#         candidate = candidate_updated
#         # 境界チェック
#         candidate = np.clip(candidate, bounds[:, 0], bounds[:, 1])
#         # 新しい評価値を計算
#         candidate_eval, candidate_eval_raw = objective(candidate,time_list,time_table,activity_ordering,orig_P_i)
#         # 温度の減衰
#         t = temp * (0.99 ** i)  # 指数的に減衰
#         # 新しい解を受け入れるかの確率を計算
#         if candidate_eval < curr_eval or np.exp((curr_eval - candidate_eval) / t) > np.random.rand():
#             curr, curr_eval = candidate, candidate_eval
#         # ベスト解を更新
#         if candidate_eval < best_eval:
#             best, best_eval, best_eval_raw = candidate, candidate_eval, candidate_eval_raw
#             print(f"Best Evaluation is Updated [BestEvaluation] : {best_eval}")

#     return best, best_eval


# # パラメータの設定


# bounds = np.array([[0.0,1.0] for i in range(len(Activity_Ordering))])
# n_iterations = 100
# step_size = 0.9
# temp = 1000.0

# # 最適化の実行
# best_solution, best_evaluation = simulated_annealing(objective=objective, bounds=bounds, n_iterations=n_iterations, step_size=step_size, temp=temp, time_list=time_list_hourly, time_table=data_hourly_cleaned, activity_ordering=Activity_Ordering, orig_P_i=Orig_P_i)

# print(f'Best Solution: {best_solution}')
# print(f'Best Evaluation: {best_evaluation}')

In [None]:
# 目的関数
def objective(beta_i:np.ndarray, time_list:list, time_table:pd.DataFrame, activity_list:list,orig_P_i:list):
    transition_rates = calc_transition_rates(beta_i, time_list, time_table, activity_list)
    calclated_P_i = get_P_i(time_table,transition_rates,activity_list) # 一日のユニークな行為者率の算出
    diff_1 = [e for e in calc_list_diff(orig_P_i,calclated_P_i)]
    diff_2 = sum([e**2 for e in diff_1])
    return diff_2,diff_1
# PSOのパラメータ
num_particles = 30  # 粒子の数
n_iterations = 10  # 反復回数
w = 0.5  # 慣性項
c1 = 1.5  # 粒子の最良位置への係数
c2 = 1.5  # 全体の最良位置への係数

def pso(objective, bounds, n_iterations, num_particles, w, c1, c2, time_list, time_table, activity_ordering, orig_P_i):
    # 粒子の位置と速度を初期化
    num_dimensions = len(activity_ordering)
    particles_position = np.random.uniform(bounds[:, 0], bounds[:, 1], (num_particles, num_dimensions))
    particles_velocity = np.random.uniform(-1, 1, (num_particles, num_dimensions))

    # 粒子の最良位置と全体の最良位置を初期化
    personal_best_position = particles_position.copy()
    personal_best_eval = np.array([objective(p, time_list, time_table, activity_ordering, orig_P_i)[0] for p in particles_position])
    global_best_position = personal_best_position[np.argmin(personal_best_eval)]
    global_best_eval = np.min(personal_best_eval)

    print(f"Starting PSO >>> Initial Global Best Evaluation : {global_best_eval}")

    for i in tqdm(range(n_iterations)):
        for j in range(num_particles):
            # 粒子の速度を更新
            r1 = np.random.rand(num_dimensions)
            r2 = np.random.rand(num_dimensions)
            particles_velocity[j] = (w * particles_velocity[j] +
                                     c1 * r1 * (personal_best_position[j] - particles_position[j]) +
                                     c2 * r2 * (global_best_position - particles_position[j]))
            
            # 粒子の位置を更新
            particles_position[j] += particles_velocity[j]
            particles_position[j] = np.clip(particles_position[j], bounds[:, 0], bounds[:, 1])  # 境界のチェック
            
            # 新しい評価値を計算
            current_eval = objective(particles_position[j], time_list, time_table, activity_ordering, orig_P_i)[0]
            
            # 粒子の最良位置を更新
            if current_eval < personal_best_eval[j]:
                personal_best_position[j] = particles_position[j].copy()
                personal_best_eval[j] = current_eval
            
            # 全体の最良位置を更新
            if current_eval < global_best_eval:
                global_best_position = particles_position[j].copy()
                global_best_eval = current_eval
                print(f"Global Best Evaluation is Updated [Iteration {i+1}] : {global_best_eval}")

    return global_best_position, global_best_eval


# パラメータの設定
bounds = np.array([[0.0, 1.0] for _ in range(len(Activity_Ordering))])

# 最適化の実行
best_solution, best_evaluation = pso(objective=objective, bounds=bounds, n_iterations=n_iterations, 
                                     num_particles=num_particles, w=w, c1=c1, c2=c2, 
                                     time_list=time_list_hourly, time_table=data_hourly_cleaned, 
                                     activity_ordering=Activity_Ordering, orig_P_i=Orig_P_i)

print(f'Best Solution: {best_solution}')
print(f'Best Evaluation: {best_evaluation}')

In [9]:
from concurrent.futures import ThreadPoolExecutor

In [10]:
# 目的関数
def objective(beta_i:np.ndarray, time_list:list, time_table:pd.DataFrame, activity_list:list,orig_P_i:list):
    transition_rates = calc_transition_rates(beta_i, time_list, time_table, activity_list)
    calclated_P_i = get_P_i(time_table,transition_rates,activity_list) # 一日のユニークな行為者率の算出
    diff_1 = [e for e in calc_list_diff(orig_P_i,calclated_P_i)]
    diff_2 = sum([e**2 for e in diff_1])
    return diff_2,diff_1
# PSOのパラメータ
num_particles = 30  # 粒子の数
n_iterations = 10  # 反復回数
w = 0.5  # 慣性項
c1 = 1.5  # 粒子の最良位置への係数
c2 = 1.5  # 全体の最良位置への係数

def evaluate_particle(objective, particle_position, time_list, time_table, activity_ordering, orig_P_i):
    """各粒子の評価を並列で行う関数"""
    return objective(particle_position, time_list, time_table, activity_ordering, orig_P_i)[0]

def pso(objective, bounds, n_iterations, num_particles, w, c1, c2, time_list, time_table, activity_ordering, orig_P_i):
    # 粒子の位置と速度を初期化
    num_dimensions = len(activity_ordering)
    particles_position = np.random.uniform(bounds[:, 0], bounds[:, 1], (num_particles, num_dimensions))
    particles_velocity = np.random.uniform(-1, 1, (num_particles, num_dimensions))

    # 粒子の最良位置と全体の最良位置を初期化
    personal_best_position = particles_position.copy()
    
    # 並列で初期評価を計算
    with ThreadPoolExecutor() as executor:
        personal_best_eval = list(executor.map(lambda p: evaluate_particle(objective, p, time_list, time_table, activity_ordering, orig_P_i), particles_position))
    
    global_best_position = personal_best_position[np.argmin(personal_best_eval)]
    global_best_eval = np.min(personal_best_eval)

    print(f"Starting PSO >>> Initial Global Best Evaluation : {global_best_eval}")

    for i in tqdm(range(n_iterations)):
        for j in range(num_particles):
            # 粒子の速度を更新
            r1 = np.random.rand(num_dimensions)
            r2 = np.random.rand(num_dimensions)
            particles_velocity[j] = (w * particles_velocity[j] +
                                     c1 * r1 * (personal_best_position[j] - particles_position[j]) +
                                     c2 * r2 * (global_best_position - particles_position[j]))
            
            # 粒子の位置を更新
            particles_position[j] += particles_velocity[j]
            particles_position[j] = np.clip(particles_position[j], bounds[:, 0], bounds[:, 1])  # 境界のチェック
        
        # 並列で各粒子の評価を計算
        with ThreadPoolExecutor() as executor:
            current_evals = list(executor.map(lambda p: evaluate_particle(objective, p, time_list, time_table, activity_ordering, orig_P_i), particles_position))
        
        # 粒子の最良位置を更新
        for j in range(num_particles):
            if current_evals[j] < personal_best_eval[j]:
                personal_best_position[j] = particles_position[j].copy()
                personal_best_eval[j] = current_evals[j]
            
            # 全体の最良位置を更新
            if current_evals[j] < global_best_eval:
                global_best_position = particles_position[j].copy()
                global_best_eval = current_evals[j]
                print(f"Global Best Evaluation is Updated [Iteration {i+1}] : {global_best_eval}")

    return global_best_position, global_best_eval


# パラメータの設定
bounds = np.array([[0.0, 1.0] for _ in range(len(Activity_Ordering))])

# 最適化の実行
best_solution, best_evaluation = pso(objective=objective, bounds=bounds, n_iterations=n_iterations, 
                                     num_particles=num_particles, w=w, c1=c1, c2=c2, 
                                     time_list=time_list_hourly, time_table=data_hourly_cleaned, 
                                     activity_ordering=Activity_Ordering, orig_P_i=Orig_P_i)

print(f'Best Solution: {best_solution}')
print(f'Best Evaluation: {best_evaluation}')

Starting PSO >>> Initial Global Best Evaluation : 0.32791009783595065


 10%|█         | 1/10 [01:32<13:51, 92.34s/it]

Global Best Evaluation is Updated [Iteration 1] : 0.32512509530134287
Global Best Evaluation is Updated [Iteration 1] : 0.3091767450944461


 20%|██        | 2/10 [02:51<11:16, 84.59s/it]

Global Best Evaluation is Updated [Iteration 2] : 0.2670165684857261
Global Best Evaluation is Updated [Iteration 2] : 0.24268153953856375


 30%|███       | 3/10 [04:06<09:21, 80.28s/it]

Global Best Evaluation is Updated [Iteration 3] : 0.22412690084125542


 40%|████      | 4/10 [05:21<07:48, 78.08s/it]

Global Best Evaluation is Updated [Iteration 4] : 0.1506997284653801
Global Best Evaluation is Updated [Iteration 4] : 0.11451367158477152


 50%|█████     | 5/10 [06:39<06:29, 77.99s/it]

Global Best Evaluation is Updated [Iteration 5] : 0.10509885402245993


 60%|██████    | 6/10 [07:58<05:13, 78.46s/it]

Global Best Evaluation is Updated [Iteration 6] : 0.08851326663693111


 70%|███████   | 7/10 [09:17<03:55, 78.58s/it]

Global Best Evaluation is Updated [Iteration 7] : 0.08840114705454577
Global Best Evaluation is Updated [Iteration 7] : 0.08332611414725662


 80%|████████  | 8/10 [10:36<02:37, 78.88s/it]

Global Best Evaluation is Updated [Iteration 8] : 0.08292259491121289
Global Best Evaluation is Updated [Iteration 8] : 0.08241267386576565
Global Best Evaluation is Updated [Iteration 8] : 0.07771106977596517


 90%|█████████ | 9/10 [12:13<01:24, 84.48s/it]

Global Best Evaluation is Updated [Iteration 9] : 0.07752898899976293
Global Best Evaluation is Updated [Iteration 9] : 0.0772219811351483


100%|██████████| 10/10 [14:00<00:00, 84.03s/it]

Global Best Evaluation is Updated [Iteration 10] : 0.0770199987987852
Best Solution: [0.9978305  0.43388824 0.         0.         0.01591624 1.
 0.         1.         1.         1.         0.         1.
 1.         0.         0.98295211 0.         0.         1.
 0.81986692 1.         0.        ]
Best Evaluation: 0.0770199987987852





以下可視化

In [217]:
# calc_P_i = [round(p_i*100,1) for p_i in get_P_i(data_hourly_cleaned,transition_rates_hourly,Activity_Ordering)]
# pd.DataFrame({
#     "Activity":Activity_Ordering,
#     "calc_P_i":calc_P_i,
#     "original_P_i":data_full_day_cleaned["DailyRate"]
#     })

# 遷移確率を計算
transition_rates_hourly = calc_transition_rates(best_solution, time_list_hourly, data_hourly_cleaned, Activity_Ordering)
# 原因は不明だが、一部1超過する確率が含まれる 故に、1を超過したものは1に修正する。
transition_rates_hourly[transition_rates_hourly > 1] = 1

forVisualizetion = pd.DataFrame({
    "Activity":Activity_Ordering,
    "calc_P_i":get_P_i(data_hourly_cleaned,transition_rates_hourly,Activity_Ordering),
    "original_P_i":np.array(data_full_day_cleaned["DailyRate"])/100
    })
forVisualizetion["diff"] = forVisualizetion["calc_P_i"] - forVisualizetion["original_P_i"]
forVisualizetion = forVisualizetion.sort_values("diff", ascending=False)

# 可視化
import plotly.express as px
import plotly.graph_objects as go
# 折れ線グラフを作成

# Figureを作成
fig = go.Figure()

# calc_P_iの折れ線グラフ
fig.add_trace(go.Scatter(x=forVisualizetion['Activity'], y=forVisualizetion['calc_P_i'], mode='lines', name='calc_P_i'))

# original_P_iの折れ線グラフ
fig.add_trace(go.Scatter(x=forVisualizetion['Activity'], y=forVisualizetion['original_P_i'], mode='lines', name='original_P_i'))

# diffの点線グラフ（右側のy軸）
fig.add_trace(go.Scatter(x=forVisualizetion['Activity'], y=forVisualizetion['diff'], mode='lines', name='diff(calc-orig)',
                         line=dict(dash='dot'), yaxis='y2'))

# レイアウトの設定
fig.update_layout(
    title="Activity vs. calc_P_i, original_P_i and diff",
    xaxis_title="Activity",
    yaxis_title="P_i",
    yaxis2=dict(
        title="diff(calc-orig)",
        overlaying='y',  # 同じグラフ内で表示
        side='right'     # 右側に配置
    ),
    legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
    )
)

# グラフを表示
fig.show()

In [111]:
# calc_P_i = [round(p_i*100,1) for p_i in get_P_i(data_hourly_cleaned,transition_rates_hourly,Activity_Ordering)]
# pd.DataFrame({
#     "Activity":Activity_Ordering,
#     "calc_P_i":calc_P_i,
#     "original_P_i":data_full_day_cleaned["DailyRate"]
#     })

forVisualizetion = pd.DataFrame({
    "Activity":Activity_Ordering,
    "calc_P_i":get_P_i(data_hourly_cleaned,transition_rates_hourly,Activity_Ordering),
    "original_P_i":np.array(data_full_day_cleaned["DailyRate"])/100
    })
forVisualizetion["diff"] = forVisualizetion["calc_P_i"] - forVisualizetion["original_P_i"]
forVisualizetion = forVisualizetion.sort_values("diff", ascending=False)

# 可視化
import plotly.express as px
import plotly.graph_objects as go
# 折れ線グラフを作成

# Figureを作成
fig = go.Figure()

# calc_P_iの折れ線グラフ
fig.add_trace(go.Scatter(x=forVisualizetion['Activity'], y=forVisualizetion['calc_P_i'], mode='lines', name='calc_P_i'))

# original_P_iの折れ線グラフ
fig.add_trace(go.Scatter(x=forVisualizetion['Activity'], y=forVisualizetion['original_P_i'], mode='lines', name='original_P_i'))

# diffの点線グラフ（右側のy軸）
fig.add_trace(go.Scatter(x=forVisualizetion['Activity'], y=forVisualizetion['diff'], mode='lines', name='diff(calc-orig)',
                         line=dict(dash='dot'), yaxis='y2'))

# レイアウトの設定
fig.update_layout(
    title="Activity vs. calc_P_i, original_P_i and diff",
    xaxis_title="Activity",
    yaxis_title="P_i",
    yaxis2=dict(
        title="diff(calc-orig)",
        overlaying='y',  # 同じグラフ内で表示
        side='right'     # 右側に配置
    ),
    legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
    )
)

# グラフを表示
fig.show()

In [105]:
forVisualizetion

Unnamed: 0,Activity,calc_P_i,original_P_i,diff
16,休息,0.485489,0.23,0.255489
3,スポーツ,0.263406,0.112,0.151406
14,会話・交際,0.283988,0.134,0.149988
11,仕事のつきあい,0.147032,0.021,0.126032
18,仕事,0.128416,0.016,0.112416
12,趣味・娯楽・教養,0.571864,0.481,0.090864
19,行楽・散策,0.079652,0.037,0.042652
4,通勤,0.073172,0.043,0.030172
0,睡眠,0.996647,0.989,0.007647
2,子どもの世話,0.0,0.0,0.0


In [79]:

transition_rates_hourly[transition_rates_hourly > 1]

array([], dtype=float64)

個票生成

In [218]:
# 遷移確率を計算
transition_rates_hourly = calc_transition_rates(best_solution, time_list_hourly, data_hourly_cleaned, Activity_Ordering)
# 原因は不明だが、一部1超過する確率が含まれる 故に、1を超過したものは1に修正する。
transition_rates_hourly[transition_rates_hourly > 1] = 1

In [12]:
# 推定したβをもとに遷移確率を算出
no_of_pattern = 10000

patterns =[]

for i in tqdm(range(no_of_pattern)):
    # 初期行動の規定
    t_0_table = sort_df_by_list(data_hourly_cleaned[(data_hourly_cleaned["Time"].dt.hour == 0)&(data_hourly_cleaned["Time"].dt.minute == 0)],"Activity",Activity_Ordering) # 0:00における行為者率を抽出
    idx, action = roulette_wheel_selection(t_0_table["Rate"].to_list(),t_0_table["Activity"].to_list()) # その時刻における行為者率を重みとしてルーレット選択
    Actions =[] # 選択した行為種別の記録
    Actions.append(action)
    for t in range(4*24-1): # 15min間隔で24時間分繰り返す。
        transition_r_t = transition_rates_hourly[t] # 時刻tの遷移確率行列を抽出
        idx, action = roulette_wheel_selection(list(transition_r_t[idx,:]),Activity_Ordering) # 遷移確率をもとにt+1における行動種別を決定
        Actions.append(action) # 行動種別履歴に追加
    patterns.append(pd.DataFrame({"Time":time_list_hourly,"Id":[str(i+1)]*len(time_list_hourly),"Activity":Actions}))
result = pd.concat(patterns)

100%|██████████| 10000/10000 [00:46<00:00, 216.58it/s]


In [26]:
result.head(100*4*24).pivot(index="Id",columns="Time",values="Activity").to_csv("C:/Users/tora2/downloads/result_100.csv")

In [24]:
import plotly.express as px
import plotly.graph_objects as go
# result tableから集計
df_grouped = result.groupby(['Time', 'Activity']).size().unstack(fill_value=0)
df_percentage = df_grouped.div(df_grouped.sum(axis=1), axis=0)
result_cleaned = df_percentage.reset_index().melt(id_vars=["Time"],value_name="Rate")

# 各アクティビティに対して色を指定
color_map = {
    '食事': '#5DAE8B',  # 濃いめの柔らかいグリーン
    '授業・学内の活動': '#E59866',  # 濃いめの柔らかいオレンジ
    '買い物': '#F7DC6F',  # 濃いめのイエロー
    '会話・交際': '#5499C7',  # 濃いめの柔らかいブルー
    'マスメディア接触': '#AF7AC5',  # 濃いめのパープル
    '仕事': '#F4D03F',  # 濃いライトゴールド
    '療養・静養': '#EC7063',  # 濃いめのピンク
    '子どもの世話': '#5DADE2',  # 濃いライトブルー
    '行楽・散策': '#DC7633',  # 濃いめのオレンジ
    '休息': '#58D68D',  # 濃いめのグリーン
    '睡眠': '#28B463',  # 濃いめのグリーン
    '通勤': '#85C1E9',  # 濃いめのブルー
    'スポーツ': '#F1C40F',  # 濃いめのライトイエロー
    '身のまわりの用事': '#E74C3C',  # 濃いめのレッド
    '家庭雑事': '#48C9B0',  # 濃いめのアクア
    '通学': '#A569BD',  # 濃いラベンダー
    '炊事・掃除・洗濯': '#F1948A',  # 濃いめのピンクベージュ
    '趣味・娯楽・教養': '#F5B041',  # 濃いゴールド
    '仕事のつきあい': '#85929E',  # 濃いめのグレー
    '社会参加': '#F8C471',  # 濃いめのライトオレンジ
    '学校外の学習': '#76D7C4'  # 濃いめのアクア
}



for activity in tqdm(Activity_Ordering):
    # Plotlyで折れ線グラフを作成
    fig = go.Figure()
    
    # 元データの点線グラフ  
    filtered_df = data_hourly_cleaned[data_hourly_cleaned['Activity'] == activity]
    fig.add_trace(go.Scatter(
        x=filtered_df['Time'], 
        y=filtered_df['Rate']/100, 
        mode='lines', 
        name=f'{activity} (sim)', 
        line=dict(dash='dash', color=color_map.get(activity))
    ))

# 算出データの実線グラフ
# for activity in Activity_Ordering:
    filtered_df = result_cleaned[result_cleaned['Activity'] == activity]
    fig.add_trace(go.Scatter(
        x=filtered_df['Time'], 
        y=filtered_df['Rate'], 
        mode='lines', 
        name=f'{activity} (統計値)', 
        line=dict(dash='solid', color=color_map.get(activity))
    ))

    fig.update_layout(
        title=f"平日 10代男性 {activity} における推定遷移確率によるマイクロシミュレーションの結果と統計値の比較",
        xaxis_title="時間",
        yaxis_title="割合",
        legend_title="活動",
        template="plotly",
    )

    fig.write_html(f"C:/Users/tora2/downloads/マルコフ連鎖モデル_{activity}.html")

100%|██████████| 21/21 [00:01<00:00, 18.49it/s]


In [302]:
fig.write_html("C:/Users/tora2/downloads/マルコフ連鎖モデル.html")