# オープンデータ・経済統計・公的統計

## 経済統計と公的統計とは

## 世界の公的機関とオープンデータ

## 日本の公的機関とオープンデータ

## 日本の公的統計とe-Stat

### e-Statとは

### e-Stat API機能

### e-Stat API登録とアプリケーションID取得

### e-Stat APIをPythonで利用する


In [None]:
from urllib.parse import urljoin
import requests
import pandas as pd

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()
appId = os.getenv("ESTAT_APP_ID")
version = "3.0"
base_url = f"https://api.e-stat.go.jp/rest/{version}/"

#### 統計表情報を取得する

In [None]:
statslist_endpoint = "app/json/getStatsList"
statslist_url = urljoin(base_url, statslist_endpoint)
statslist_params = {"appId": appId, "surveyYears": 2020, "limit":100}
statslist_res = requests.get(statslist_url, params=statslist_params)
statslist_out = statslist_res.json()

In [None]:
table_inf = pd.json_normalize(statslist_out, 
    record_path=["GET_STATS_LIST", "DATALIST_INF", "TABLE_INF"], 
    sep="_"
)
table_inf.columns

In [None]:
table_inf[["@id", "TITLE_SPEC_TABLE_NAME", "TITLE_$","OVERALL_TOTAL_NUMBER"]].head()

#### メタ情報を取得する

In [None]:
statsDataId = "0002070010"
meta_endpoint = "app/json/getMetaInfo"
meta_url = urljoin(base_url, meta_endpoint)
meta_params = {"appId": appId, "statsDataId": statsDataId}
meta_res = requests.get(meta_url, params=meta_params)
meta = meta_res.json()
meta["GET_META_INFO"]["METADATA_INF"]["TABLE_INF"].keys()

In [None]:
metadata = meta["GET_META_INFO"]["METADATA_INF"]
overall_total_number = metadata["TABLE_INF"]["OVERALL_TOTAL_NUMBER"]
overall_total_number

In [None]:
[[n, c["@id"], c["@name"]] for n, c in enumerate(metadata["CLASS_INF"]["CLASS_OBJ"])]

In [None]:
metadata["CLASS_INF"]["CLASS_OBJ"][0]["CLASS"]

In [None]:
pd.DataFrame(metadata["CLASS_INF"]["CLASS_OBJ"][1]["CLASS"]).iloc[15:25, :]

In [None]:
pd.DataFrame(metadata["CLASS_INF"]["CLASS_OBJ"][2]["CLASS"])

In [None]:
pd.DataFrame(metadata["CLASS_INF"]["CLASS_OBJ"][3]["CLASS"]).head()

In [None]:
metadata["CLASS_INF"]["CLASS_OBJ"][4]["CLASS"]

In [None]:
pd.DataFrame(metadata["CLASS_INF"]["CLASS_OBJ"][5]["CLASS"]).head()

#### 統計データを取得する


In [None]:
statsDataId = "0002070010"
data_params = {
    "appId": appId, 
    "statsDataId": statsDataId,
    "lvCat01": "4",  # 用途分類を階層4で絞る
    "cdCat02": "04",  # 世帯区分を二人以上の世帯のうち勤労者世帯（2000年～）で絞る
    "cdCat03": "A00",  # 世帯主の年齢階級を平均で絞る
    "cdTimeFrom": "2020000101",  # 2020年1月以降で絞る
    "cdTimeTo": "2022001212",  #  2022年12月以前で絞る
}

##### CSVで取得する


In [None]:
from io import StringIO
csv_data_endpoint = "app/getSimpleStatsData"
csv_data_url = urljoin(base_url, csv_data_endpoint)
csv_data_res = requests.get(csv_data_url, params=data_params)
print(csv_data_res.text[:1000])

In [None]:
pd.read_csv(StringIO(csv_data_res.text), skiprows=28).head()

In [None]:
# 最初の[1]で2分割したリストの2つ目を指定し、[1:]で冒頭の改行\nを除く
pd.read_csv(StringIO(csv_data_res.text.split('"VALUE"')[1][1:])).head()

##### JSONで取得する


In [None]:
data_endpoint = "app/json/getStatsData"
data_url = urljoin(base_url, data_endpoint)
data_res = requests.get(data_url, params=data_params)
data = data_res.json()

In [None]:
value_df = pd.json_normalize(data, 
    record_path=["GET_STATS_DATA", "STATISTICAL_DATA", "DATA_INF", "VALUE"]
)
value_df.head()

In [None]:
value_df = value_df.rename(
    columns=lambda col: col.lstrip("@").replace("$", "value")
)

In [None]:
note = data["GET_STATS_DATA"]["STATISTICAL_DATA"]["DATA_INF"]["NOTE"]
note

In [None]:
import numpy as np
note_char = [n["@char"] for n in note]
value_df = value_df.assign(**{
    "value": lambda df: df["value"]
        .replace(note_char, np.nan)
        .astype(float)
})

In [None]:
from typing import List, Dict, Union
def missing_to_nan(
        value: pd.DataFrame, 
        note: Union[Dict[str, str], List[Dict[str, str]]]
    ) -> pd.DataFrame:
    if isinstance(note, list):
        note_char = [n["@char"] for n in note]
    elif isinstance(note, dict):
        note_char = note["@char"]
    else:
        return value
    return value.assign(**{
        "value": lambda df: df["value"]
            .replace(note_char, np.nan)
            .astype(float)
    })

In [None]:
class_obj = data["GET_STATS_DATA"]["STATISTICAL_DATA"]["CLASS_INF"]["CLASS_OBJ"]
for co in class_obj:
    class_entries  = co["CLASS"]
    # "CLASS"はlistとdictの場合があります
    if isinstance(class_entries, list):
        cls_df = pd.DataFrame(class_entries)
    elif isinstance(class_entries, dict):
        cls_df = pd.DataFrame(pd.Series(class_entries)).T
    else:
        print("Unexpected CLASS type:", type(class_entries))
        continue
    cls_df = (cls_df
        .set_index("@code")
        .rename(columns=lambda col: f"{co['@name']}{col.lstrip('@')}")
    )
    value_df = (value_df
        .merge(cls_df, left_on=co["@id"], right_index=True, how="left")
        .rename(columns={co["@id"]: f"{co['@name']}code"})
    )
value_df.columns

In [None]:
attr_map = {"value": "値", "code": "コード", "name": "", "level": "階層レベル", 
    "unit": "単位", "parentCode": "親コード", "addInf": "追加情報", "tab": "表章項目", 
    "cat": "分類", "area": "地域", "time": "時間軸", "annotation": "注釈記号"  
}
def _convert(c):
    for k, v in attr_map.items():
        if k in c:
            return c.replace(k, v)
    return c
value_df = value_df.rename(columns=_convert)

In [None]:
value_df.head(1).T

#### 関数を用意する


In [None]:
from estat import (
    get_metainfo, 
    get_statsdata, 
    cleansing_statsdata, 
    colname_to_japanese, 
    create_hierarchy_dataframe
)

In [None]:
statsDataId = "0002070010"
meta = get_metainfo(appId, statsDataId)
medatdata = meta["GET_META_INFO"]["METADATA_INF"]
total_num = medatdata["TABLE_INF"]["OVERALL_TOTAL_NUMBER"]
total_num

In [None]:
data = get_statsdata(appId, statsDataId)
data["GET_STATS_DATA"]["STATISTICAL_DATA"]["RESULT_INF"]["NEXT_KEY"]

In [None]:
%%time
dfs = []
dfs.append(colname_to_japanese(cleansing_statsdata(data)))
max_position = 500000
while "NEXT_KEY" in data["GET_STATS_DATA"]["STATISTICAL_DATA"]["RESULT_INF"]:
    # 10万件を超える場合、次のデータを取得するための開始位置を取得
    start_position = data["GET_STATS_DATA"]["STATISTICAL_DATA"]["RESULT_INF"].get("NEXT_KEY")
    print("NEXT_KEY: ", start_position)
    # 取得データが大きすぎる(max_iterationsを超える)場合は、取得の繰り返しを終了
    if start_position > max_position:
        break
    # 次のデータを取得し、DataFrameのリストに追加
    data = get_statsdata(appId, statsDataId, params={"startPosition": start_position})
    dfs.append(colname_to_japanese(cleansing_statsdata(data)))
    
# 取得したDataFrameを結合
df = pd.concat(dfs)
df.shape

### 統計Dashboard API


In [None]:
indicator_url = "https://dashboard.e-stat.go.jp/api/1.0/Json/getIndicatorInfo"
indicator_params = {"StatName": "家計調査"}
indicator_res = requests.get(indicator_url, params=indicator_params)
indicator = indicator_res.json()
indicator_metadata = indicator["GET_META_INDICATOR_INF"]["METADATA_INF"]
indicator_classobj = indicator_metadata["CLASS_INF"]["CLASS_OBJ"][0]
print(indicator_classobj["@name"], indicator_classobj["@code"])

In [None]:
data_url = "https://dashboard.e-stat.go.jp/api/1.0/Json/getData"
data_params = {"IndicatorCode": "0704010101000010000", "TimeFrom": "20200100"}
data_res = requests.get(data_url, params=data_params)
data = data_res.json()
data["GET_STATS"]["STATISTICAL_DATA"]["DATA_INF"]["DATA_OBJ"][:2]

In [None]:
data_df = pd.DataFrame(
    [d["VALUE"] for d in data["GET_STATS"]["STATISTICAL_DATA"]["DATA_INF"]["DATA_OBJ"]]
)
data_df = data_df.rename(
    columns=lambda col: col.lstrip("@").replace("$", "value")
)
data_df.head()

In [None]:
region_url = "https://dashboard.e-stat.go.jp/api/1.0/Json/getRegionInfo"
region_res = requests.get(region_url)
region = region_res.json()
region_dfs = []
region_co = region["GET_META_REGION_INF"]["METADATA_INF"]["CLASS_INF"]["CLASS_OBJ"]
for c in region_co:
    region_dfs += [pd.DataFrame(c["CLASS"])]
region_df = pd.concat(region_dfs)
region_df = region_df.rename(
    columns=lambda col: col.lstrip("@").replace("name", "地域")
)
region_df.tail()

In [None]:
data_df = data_df.merge(region_df[["regionCode", "地域"]], on="regionCode", how="left")

In [None]:
data_df = data_df.assign(**{
    "年":  lambda df: df["time"].astype(int) // 10000,
    "月":  lambda df: df["time"].str[4:6].astype(int),
    "年月": lambda df: pd.to_datetime(
        df["年"].astype(str) + "-" +
        df["月"].astype(str) + "-01"
    )
})
data_df.head()

In [None]:
socialevent_url = "https://dashboard.e-stat.go.jp/api/1.0/Json/getSocialEventInfo"
socialevent_params = {"TimeFrom": "20200100"}
socialevent_res = requests.get(socialevent_url, params=socialevent_params)
socialevent = socialevent_res.json()
socialevent_df = pd.json_normalize(socialevent, 
    record_path=["GET_META_SOCIAL_INFO", "METADATA_INF", "CLASS_INF", "CLASS_OBJ"]
)
socialevent_df.drop("CLASS", axis=1)