In [18]:
import plotly.io as pio
pio.templates.default = 'plotly'
pio.templates
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import geopandas as gpd
import statsmodels.api as sm
from tqdm import tqdm

In [2]:
def latlonTable2GDF(table: pd.DataFrame, colName_lat: str, colName_lon: str):
    return gpd.GeoDataFrame(table, geometry=gpd.points_from_xy(table[colName_lon], table[colName_lat])).set_crs(6668)
NTT_TIME_LIST = ['0000', '0100', '0200', '0300', '0400', '0500', '0600', '0700', '0800', '0900', '1000', '1100', '1200', '1300', '1400', '1500', '1600', '1700', '1800', '1900', '2000', '2100', '2200', '2300']

In [20]:
DIR_LOGS = "Z:\\lab\\output\\logs\\"
DIR_MOBILE = "Z:\\lab\\SSI2024_検証\\モバイル空間統計_WEEKDAY\\"
seed = 20000
noOfPeople = 40000
FILE_NAME_HEAD = f"seed_{seed}_no_{noOfPeople}_"

In [21]:
spot = pd.read_csv(DIR_LOGS + FILE_NAME_HEAD + "spot_log.csv")
location = pd.read_csv(DIR_LOGS + FILE_NAME_HEAD + "location_log.csv",dtype={"NttTime":str})

In [78]:
# spot.filter(items=["CurrentTime","CurrentDay","Person2"]).query("CurrentTime.str.contains(':00:00')", engine='python')

In [19]:
mesh_poly = gpd.read_file("Z:\\lab\\SSI2024_検証\\47207_mesh_poly\\47207_mesh_poly.shp").drop(columns=["JCODE"])

In [49]:
## 5×5の可視化
# 全データのx, y最大値を計算するためのリスト
all_sim_pop = []
all_population = []

# 相関係数を保存するリスト
corr_list = []

# すべてのデータを読み込み、最大値を算出
for HHMM in tqdm(NTT_TIME_LIST):
    df = pd.read_csv(DIR_MOBILE + f"{HHMM}.csv", dtype={"KEY_CODE": str, "time": str, "population": int})

    location_count = gpd.sjoin(latlonTable2GDF(location, "Latitude", "Longitude"), mesh_poly, how="left", predicate="intersects")
    location_count = location_count.filter(items=["KEY_CODE", "NttTime", "PersonId"]).groupby(["KEY_CODE", "NttTime"], as_index=False).count()
    location_count = location_count.set_axis(["mesh_code", "ntt_time", "sim_pop"], axis=1).query("ntt_time == @HHMM")

    result = pd.merge(df, location_count, left_on="KEY_CODE", right_on="mesh_code", how="left").filter(["KEY_CODE", "population", "sim_pop"])
    result["sim_pop"] = result["sim_pop"].fillna(0.0)

    # 全データのx, y値を集める
    all_sim_pop.extend(result['sim_pop'].values)
    all_population.extend(result['population'].values)

    # 相関係数を計算しリストに保存
    corr_value = result['population'].corr(result['sim_pop'])
    corr_list.append(corr_value)

# x, y の最大値を取得
max_value = max(max(all_sim_pop), max(all_population))

# 相関係数を含めたサブプロットタイトルを生成
subplot_titles = [f"{str(i).zfill(2)}:00 (Corr: {corr_list[i]:.3f})" for i in range(len(NTT_TIME_LIST))]

100%|██████████| 24/24 [01:35<00:00,  3.99s/it]


In [50]:
# 7x3のサブプロットを作成
fig = make_subplots(rows=7, cols=3, subplot_titles=subplot_titles,
                    vertical_spacing=0.05, horizontal_spacing=0.05)  # 間隔を調整

# 行列位置を追跡するためのカウンタ
row, col = 1, 1

# 各時刻のデータに対して散布図をプロット
for idx, HHMM in enumerate(tqdm(NTT_TIME_LIST)):
    df = pd.read_csv(DIR_MOBILE + f"{HHMM}.csv", dtype={"KEY_CODE": str, "time": str, "population": int})

    location_count = gpd.sjoin(latlonTable2GDF(location, "Latitude", "Longitude"), mesh_poly, how="left", predicate="intersects")
    location_count = location_count.filter(items=["KEY_CODE", "NttTime", "PersonId"]).groupby(["KEY_CODE", "NttTime"], as_index=False).count()
    location_count = location_count.set_axis(["mesh_code", "ntt_time", "sim_pop"], axis=1).query("ntt_time == @HHMM")

    result = pd.merge(df, location_count, left_on="KEY_CODE", right_on="mesh_code", how="left").filter(["KEY_CODE", "population", "sim_pop"])
    result["sim_pop"] = result["sim_pop"].fillna(0.0)

    # 列と行の更新（5x5グリッド）
    row = (idx // 3) + 1
    col = (idx % 3) + 1
    
    # 各時刻の散布図を追加
    scatter_trace = go.Scatter(x=result['sim_pop'], y=result['population'], mode='markers', name=f"{HHMM[:2]}:{HHMM[2:]}")
    fig.add_trace(scatter_trace, row=row, col=col)

    # x, y 軸の範囲を固定
    fig.update_xaxes(range=[0, max_value], row=row, col=col)
    fig.update_yaxes(range=[0, max_value], row=row, col=col)

# グラフ全体のタイトル設定
fig.update_layout(
    annotations=[
        dict(
            x=0.5,  # 横方向の中央
            y=-0.15,  # 縦方向、少し下に配置
            xref='paper',
            yref='paper',
            showarrow=False,
            text="シミュレーション値（人）",
            font=dict(size=16)
        ),
        dict(
            x=-0.15,
            y=0.5,
            xref='paper',
            yref='paper',
            showarrow=False,
            text="モバイル空間統計（人）",
            font=dict(size=16),
            textangle=-90  # 縦向きに回転
        )
    ],
    height=1000, width=1000,  # グラフのサイズも調整して有効活用
    showlegend=False
)

# グラフを保存
fig.write_html(f"Z:/lab/SSI2024_検証/chart/相関図/seed_{seed}-no_{noOfPeople}-correlation_plots_7×3_add_label.html")
fig

 88%|████████▊ | 21/24 [01:27<00:12,  4.14s/it]


Exception: The (row, col) pair sent is out of range. Use Figure.print_grid to view the subplot grid. 

In [44]:
# 各時刻のデータに対して個別の相関図を出力
for HHMM in tqdm(NTT_TIME_LIST):
    df = pd.read_csv(DIR_MOBILE + f"{HHMM}.csv", dtype={"KEY_CODE": str, "time": str, "population": int})

    location_count = gpd.sjoin(latlonTable2GDF(location, "Latitude", "Longitude"), mesh_poly, how="left", predicate="intersects")
    location_count = location_count.filter(items=["KEY_CODE", "NttTime", "PersonId"]).groupby(["KEY_CODE", "NttTime"], as_index=False).count()
    location_count = location_count.set_axis(["mesh_code", "ntt_time", "sim_pop"], axis=1).query("ntt_time == @HHMM")

    result = pd.merge(df, location_count, left_on="KEY_CODE", right_on="mesh_code", how="left").filter(["KEY_CODE", "population", "sim_pop"])
    result["sim_pop"] = result["sim_pop"].fillna(0.0)

    # 各時刻のデータのx, yの最大値を計算
    max_value = max(result["sim_pop"].max(), result["population"].max())

    # 相関係数の計算
    corr_value = result['population'].corr(result['sim_pop'])

    # 各時刻の散布図を作成
    fig = go.Figure()

    scatter_trace = go.Scatter(x=result['sim_pop'], y=result['population'], mode='markers', name=f"{HHMM[:2]}:{HHMM[2:]}")
    fig.add_trace(scatter_trace)

    # x, y 軸の範囲を固定 (0から各時刻の最大値まで)
    fig.update_xaxes(range=[0, max_value])
    fig.update_yaxes(range=[0, max_value])

    # グラフのタイトルを相関係数付きで設定
    fig.update_layout(
        title_text=f"相関図 {HHMM[:2]}:{HHMM[2:]} (Corr: {corr_value:.3f})",
        xaxis_title="シミュレーション値",
        yaxis_title="モバイル空間統計",
        height=600,
        width=600,
        showlegend=False
    )
    

    # グラフを保存
    fig.write_html(f"Z:/lab/SSI2024_検証/chart/相関図/seed_{seed}-no_{noOfPeople}-correlation_plots_{HHMM}.html")


SyntaxError: closing parenthesis '}' does not match opening parenthesis '(' on line 39 (3602306343.py, line 44)

In [41]:
## 相関係数の折れ線グラフ
corr_s = []
for HHMM in tqdm(NTT_TIME_LIST):
    df = pd.read_csv(DIR_MOBILE + f"{HHMM}.csv", dtype={"KEY_CODE": str, "time": str, "population": int})

    location_count = gpd.sjoin(latlonTable2GDF(location, "Latitude", "Longitude"), mesh_poly, how="left", predicate="intersects")
    location_count = location_count.filter(items=["KEY_CODE", "NttTime", "PersonId"]).groupby(["KEY_CODE", "NttTime"], as_index=False).count()
    location_count = location_count.set_axis(["mesh_code", "ntt_time", "sim_pop"], axis=1).query("ntt_time == @HHMM")

    result = pd.merge(df, location_count, left_on="KEY_CODE", right_on="mesh_code", how="left").filter(["KEY_CODE", "population", "sim_pop"])
    result["sim_pop"] = result["sim_pop"].fillna(0.0)

    # 相関係数の計算
    corr_value = result['population'].corr(result['sim_pop'])
    corr_s.append((HHMM,corr_value))

100%|██████████| 24/24 [01:32<00:00,  3.85s/it]


In [30]:
print(corr_s)

[('0000', 0.9325290195771836), ('0100', 0.9312377745093523), ('0200', 0.9337450710754305), ('0300', 0.9357898991287035), ('0400', 0.9342556388200544), ('0500', 0.9504296911732172), ('0600', 0.8305619480064528), ('0700', 0.7255960556662129), ('0800', 0.4760842139524341), ('0900', 0.6398410174939843), ('1000', 0.5456440552129626), ('1100', 0.4274700153427039), ('1200', 0.25198443990838776), ('1300', 0.4122400739515708), ('1400', 0.35839101491483677), ('1500', 0.44032590547276573), ('1600', 0.24364017006897196), ('1700', 0.38880243297149514), ('1800', 0.3865434397025216), ('1900', 0.43566082825464714), ('2000', 0.55762608923348), ('2100', 0.6581296856854868), ('2200', 0.7921972796445244), ('2300', 0.837413727426291)]


In [43]:
corr = pd.DataFrame(corr_s).set_axis(["時刻","相関係数"],axis=1)
corr["時刻"] = corr["時刻"].apply(lambda x:x[:2]+":"+x[2:])
fig = px.line(corr,x="時刻",y="相関係数",range_y=[0,1])
fig.write_html(f"Z:/lab/SSI2024_検証/chart/相関図/seed_{seed}-no_{noOfPeople}-Corr_line_chart.html")