# オープンデータとデータ取得

## オープンデータ

### オープンデータとは

### オープンデータの公開レベル

#### RDFとLOD

## データ取得

### CSV等ファイルダウンロード


In [None]:
from pathlib import Path
import pandas as pd
current_dir = Path.cwd()
data_path = (current_dir / "data" / "ch09").resolve()
csv_data = pd.read_csv(data_path / "EdinetcodeDlInfo.csv", encoding="cp932", skiprows=1)

### APIとPythonのRequestsライブラリ

#### REST API

#### Pythonライブラリのrequests

In [None]:
import requests
url = "https://dashboard.e-stat.go.jp/api/1.0/Json/getData"
params = {
    "IndicatorCode": "0704010101000010000", 
    "TimeFrom": "20200100",
}
res = requests.get(url=url, params=params)

In [None]:
res.status_code

In [None]:
res.encoding

In [None]:
res.text[:100]

In [None]:
json_data = res.json()
json_data.keys()

In [None]:
json_data["GET_STATS"]["STATISTICAL_DATA"]["DATA_INF"]["DATA_OBJ"][:2]

In [None]:
with requests.Session() as session:
    res = session.request(method="GET", url=url, params=params)
    print(res.json()["GET_STATS"]["STATISTICAL_DATA"]["DATA_INF"]["DATA_OBJ"][:2])

#### APIキーの設定


In [None]:
api_key = "ここにAPIキーを書く"
data_url = "https://api.e-stat.go.jp/rest/3.0/app/json/getStatsData"
data_params = {
    "appId": api_key,
    "statsDataId": "0003000795",
    "limit": 100,
}
data_res = requests.get(url=data_url, params=data_params)
data_res.json()["GET_STATS_DATA"]["STATISTICAL_DATA"]["DATA_INF"]["VALUE"]

##### APIキーを環境変数に設定する


In [None]:
import os
api_key = os.getenv("ESTAT_APP_ID")

##### .envファイルに設定する


In [None]:
!pip install python-dotenv

In [None]:
import os
from dotenv import load_dotenv
# .envファイルの内容を読み込む
load_dotenv()
# 環境変数からAPIキーを取得
api_key = os.getenv("ESTAT_APP_ID")

##### ColabでAPIキーを設定する

#### OpenAPIとSwagger

#### LODとSPARQL


In [None]:
!pip install sparqlwrapper

In [None]:
from SPARQLWrapper import SPARQLWrapper, JSON
SPARQL_ENDPOINT = "http://data.e-stat.go.jp/lod/sparql/alldata/query"
sparql = SPARQLWrapper(SPARQL_ENDPOINT)

In [None]:
sparql.setReturnFormat(JSON)

年と人口のみを取得

In [None]:
sparql.setQuery("""
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX sdmx-dimension: <http://purl.org/linked-data/sdmx/2009/dimension#>
PREFIX estat-measure: <http://data.e-stat.go.jp/lod/ontology/measure/>
PREFIX cd-dimension: <http://data.e-stat.go.jp/lod/ontology/crossDomain/dimension/>
PREFIX cd-code: <http://data.e-stat.go.jp/lod/ontology/crossDomain/code/>
PREFIX g00200521-dimension-2010:<http://data.e-stat.go.jp/lod/ontology/g00200521/dimension/2010/>
PREFIX g00200521-code-2010:<http://data.e-stat.go.jp/lod/ontology/g00200521/code/2010/>
select  ?year ?population
where {
      ?s estat-measure:population ?population ;
         sdmx-dimension:refArea / rdfs:label "新宿区"@ja ;
         cd-dimension:timePeriod ?year ;
         cd-dimension:sex cd-code:sex-all ;
         cd-dimension:nationality cd-code:nationality-japan ;
         g00200521-dimension-2010:area g00200521-code-2010:area-all ;
         cd-dimension:age cd-code:age-all .
}
    """
)

In [None]:
ret = sparql.queryAndConvert()
ret

主語（?s）も含めて取得

In [None]:
sparql.setQuery("""
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX sdmx-dimension: <http://purl.org/linked-data/sdmx/2009/dimension#>
PREFIX estat-measure: <http://data.e-stat.go.jp/lod/ontology/measure/>
PREFIX cd-dimension: <http://data.e-stat.go.jp/lod/ontology/crossDomain/dimension/>
PREFIX cd-code: <http://data.e-stat.go.jp/lod/ontology/crossDomain/code/>
PREFIX g00200521-dimension-2010:<http://data.e-stat.go.jp/lod/ontology/g00200521/dimension/2010/>
PREFIX g00200521-code-2010:<http://data.e-stat.go.jp/lod/ontology/g00200521/code/2010/>
select  ?s ?year ?population
where {
      ?s estat-measure:population ?population ;
         sdmx-dimension:refArea / rdfs:label "新宿区"@ja ;
         cd-dimension:timePeriod ?year ;
         cd-dimension:sex cd-code:sex-all ;
         cd-dimension:nationality cd-code:nationality-japan ;
         g00200521-dimension-2010:area g00200521-code-2010:area-all ;
         cd-dimension:age cd-code:age-all .
}
    """
)
ret = sparql.queryAndConvert()
ret

In [None]:
ret.keys()

In [None]:
ret["head"]

In [None]:
ret["results"].keys()

In [None]:
sparql_df = pd.json_normalize(ret, record_path=["results", "bindings"])
sparql_df

In [None]:
sparql_df.columns = pd.MultiIndex.from_tuples([tuple(col.split(".")) for col in sparql_df.columns])
sparql_df