生物気象観測
多種データを横持ちで保存

In [1]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path

def add_project_root(marker="project_paths.py") -> Path:
    p = Path.cwd().resolve()
    for d in [p, *p.parents]:
        if (d / marker).exists():
            sys.path.insert(0, str(d))
            return d
    raise FileNotFoundError(f"Could not find {marker} from cwd={p}")

PROJECT_ROOT = add_project_root()
print("Project root:", PROJECT_ROOT)

from project_paths import ROOT as PROJECT, JMA_DATA, OUT, FIG, TAB, REP, CFG
from project_paths import OUT
from meteo_lib.core import getBlock, get_HourlyData, get_10minData
from meteo_lib.paths import JMA_DATA, JMA_GPV_DATA, ERA5_DATA, CACHE_DIR

print("Data root:",JMA_DATA)


Project root: /Users/takumi/Projects/meteo-analysis/JMA
Data root: /Users/takumi/Projects/MetData/JMA


In [2]:
import numpy as np
import pandas as pd
import math
from datetime import datetime, timedelta, date, time
import calendar
import os
from pathlib import Path

In [3]:
def datetime_value(x, column_value, base_year=2000):

    if pd.isna(x):
        return pd.NaT

    x = int(x)

    # yyyymmdd 型
    if x >= 10000000:
        s = f"{v:08d}"
        mm = int(s[4:6])
        dd = int(s[6:8])
    # mmdd 型
    else:
        mm = x // 100
        dd = x % 100

    # 月日だけを固定年に乗せる
    return pd.Timestamp(base_year, mm, dd)

In [4]:
# 生物季節観測のcsvファイルを読み込む
def read_data(fn):
    cs1 = ['rm', '番号']
    cs2 = ['平年値', '最早値', '最晩値', '最早年', '最晩年']
    end_year = 2024

#    print(fn)
    title_row = pd.read_csv(fn, encoding='shift-jis', nrows=1, header=None).iloc[0,1]
    df = pd.read_csv(fn, header=1, encoding='shift-JIS',
                 usecols=lambda column: all(substring not in column for substring in cs1),
                 index_col='地点名')

    df = df.drop(columns=cs2)
    df = df.drop(df.columns[df.columns>str(end_year)], axis=1)
    df.index = df.index.str.strip()  # indexから空白文字を削除
    df = df.where(df>=100, other=np.nan)
#    display(df)

    for col in df.columns:
        df[col] = df[col].apply(datetime_value, args=(col,))

    df = df.transpose()  # 行と列を入れ替える
    df.index.name = '年'

    return df, title_row

In [6]:
fp = JMA_DATA / 'raw/生物季節観測'
station = "仙台"

subs = {'うめ開花':1, 'さくら開花':4, 'さくら満開':5, 'あじさい開花':9, 'すすき開花':11,
        'いちょう黄葉':13, 'いちょう落葉':14,'かえで紅葉':15,'かえで落葉':16}

series_list = []

for sub_name in subs.keys():
    sub_no = str(subs[sub_name]).zfill(3)
    fn = fp / f"{sub_no}{sub_name}.csv"
#    print("file name:", fn)

    df_part, title_row = read_data(fn)

    s = df_part[station].copy()   # ← station列だけ取り出す（Series）
    s.name = sub_name              # ← 列名をsub名にする
    series_list.append(s)

# 横に連結（列方向）
df = pd.concat(series_list, axis=1)
df_md = df.copy()
print(df_md)

# 2/29はNaN、それ以外は「非うるう年基準」のDOYにする
def md_to_doy_A(ts):
    if pd.isna(ts):
        return np.nan
    if ts.month == 2 and ts.day == 29:
        return np.nan
    return pd.Timestamp(2001, ts.month, ts.day).dayofyear

df_doy = df_md.map(md_to_doy_A)
df_doy_int = df_doy.round().astype("Int64")
out_fn = OUT / f"生物季節観測_{station}_DOY.csv"
df_doy_int.to_csv(out_fn)

print(df_doy)

           うめ開花      さくら開花      さくら満開     あじさい開花      すすき開花     いちょう黄葉  \
年                                                                        
1953 2000-03-14 2000-04-11 2000-04-17        NaT        NaT        NaT   
1954 2000-03-03 2000-04-06 2000-04-12        NaT        NaT        NaT   
1955 2000-03-10 2000-04-11 2000-04-16        NaT        NaT        NaT   
1956 2000-03-14 2000-04-16 2000-04-19 2000-06-05        NaT 2000-11-16   
1957 2000-04-07 2000-04-16 2000-04-21 2000-06-27        NaT 2000-11-06   
...         ...        ...        ...        ...        ...        ...   
2020 2000-03-01 2000-03-28 2000-04-03 2000-06-24 2000-07-27 2000-12-01   
2021 2000-03-10 2000-03-28 2000-03-31 2000-06-21 2000-07-18 2000-11-30   
2022 2000-03-15 2000-04-08 2000-04-11 2000-06-27 2000-08-22 2000-11-28   
2023 2000-03-04 2000-03-26 2000-03-31 2000-06-17 2000-08-14 2000-12-05   
2024 2000-02-19 2000-04-02 2000-04-09 2000-06-16 2000-08-05 2000-11-28   

         いちょう落葉      かえで紅葉      かえで落葉

In [8]:
import requests
from bs4 import BeautifulSoup
import lxml

url = "https://www.data.jma.go.jp/stats/etrn/view/monthly_s3.php?prec_no=34&block_no=47590&year=2023&month=1&day=&view="

tables = pd.read_html(url)   # ページ内の表を全部DataFrameで取得
#print(tables)
len(tables), [t.shape for t in tables]
for i, t in enumerate(tables):
    print(i, t.columns)
df_doy2 = df_doy.copy()
df_doy2.index = df_doy2.index.astype(int)   # 文字列→整数

df_temp = tables[0]
df_temp = df_temp.set_index('年')
print(df_temp)
print(df_doy)

df = pd.merge(df_doy2, df_temp, left_index=True, right_index=True, how="inner")

print(df)
out_fn_df = OUT / f"生物季節観測_{station}_DOY_Temp.csv"
df.to_csv(out_fn_df)


0 Index(['年', '1月', '2月', '3月', '4月', '5月', '6月', '7月', '8月', '9月', '10月', '11月',
       '12月', '年の値'],
      dtype='object')
1 Index([0, 1, 2, 3], dtype='int64')
         1月   2月   3月    4月    5月    6月    7月    8月    9月   10月   11月  12月  \
年                                                                            
1926    NaN  NaN  NaN   NaN   NaN   NaN   NaN   NaN   NaN  11.8   6.8  1.7   
1927   -0.6 -1.5  2.5   9.0  13.6  17.6  23.4  24.2  18.5  13.9   8.3  2.1   
1928    0.4  0.1  2.9   8.8  13.7  16.9  21.1  22.4  21.6  14.2   8.8  1.4   
1929   -1.3 -0.7  2.9   8.7  12.9  16.7  23.3  24.9  18.7  14.3   8.2  5.0   
1930   -0.6  1.9  5.6   9.9  14.4  18.4  22.4  24.5  19.2  13.9   6.8  2.3   
...     ...  ...  ...   ...   ...   ...   ...   ...   ...   ...   ...  ...   
2022    1.7  1.9  6.4  11.8  16.5  20.2  24.9  25.1  22.2  15.5  11.9  4.2   
2023    2.1  3.0  9.3  13.3  16.6  21.6  26.6  28.6  25.1  16.7  11.4  5.7   
2024    4.2  4.7  6.0  14.8  17.8  21.5  26.1  27.4  23.5

In [None]:
print(df_temp.index.dtype, df_doy.index.dtype)
print(df_temp.index[:5])
print(df_doy.index[:5])
