In [1]:
import pandas as pd
import json
import re
import requests
import urllib
import numpy as np
import time

In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import Select

city code

In [3]:
response = requests.get("https://madefor.github.io/jisx0402/api/v1/all.json")
data = response.json()

In [4]:
cities = pd.DataFrame(data).T.rename_axis('code_6digit').reset_index()

In [5]:
cities['code_5digit'] = cities.code_6digit.str.extract(r'(.*)\d').astype(str)

In [6]:
cities.head()

Unnamed: 0,code_6digit,prefecture,city,prefecture_kana,city_kana,code_5digit
0,11002,北海道,札幌市,ホッカイドウ,サッポロシ,1100
1,11011,北海道,札幌市中央区,ホッカイドウ,サッポロシチュウオウク,1101
2,11029,北海道,札幌市北区,ホッカイドウ,サッポロシキタク,1102
3,11037,北海道,札幌市東区,ホッカイドウ,サッポロシヒガシク,1103
4,11045,北海道,札幌市白石区,ホッカイドウ,サッポロシシロイシク,1104


In [7]:
#exclude cities without data
#government ordinance-designated cities
exclude = ['大阪市','名古屋市','京都市','横浜市','神戸市','北九州市','札幌市',
             '川崎市','福岡市','広島市','仙台市','千葉市','さいたま市','静岡市',
             '堺市','新潟市','浜松市','岡山市','相模原市','熊本市']

#Northern territories in dispute with Russia
kuril = ["01695", "01696", "01697", "01698", "01699", "01700"]

cities = cities[(~cities.city.isin(exclude))&(~cities.code_5digit.isin(kuril))]

data

In [8]:
driver = webdriver.Chrome(ChromeDriverManager().install())




  driver = webdriver.Chrome(ChromeDriverManager().install())


In [9]:
residential_care = pd.DataFrame()
specified = pd.DataFrame()

In [10]:
def get_data(code, residential_care, specified):
    #get data
    url = f"https://jmap.jp/cities/detail/city/{code}"
    driver.get(url)
    time.sleep(1)

    #1. population
    elements = driver.find_elements(By.XPATH, "/html/body/div[1]/div[2]/div[1]/div[3]/form/div[3]/table/tbody/tr/td")
    elements_text = [element.text for element in elements]

    population = pd.DataFrame({
        'age40_64': elements_text[14:21],
        'over65': elements_text[21:28],
        'over75': elements_text[28:35]
    }, index = [2020, '2020_e', 2025, 2030, 2035, 2040, 2045]
    ).drop('2020_e',axis=0)

    population = population.replace({'\D':''}, regex=True).astype(int)

    ##demand for care＝age40-64*1.0 + age65-74*9.7 + over75-*87.3
    population['demand'] = population.age40_64*0.01 + (population.over65 - population.over75)*0.097 + population.over75*0.873

    #2. facilities
    elements = driver.find_elements(By.XPATH, "/html/body/div[1]/div[2]/div[1]/div[3]/form/div[10]/table/tbody/tr/td")
    elements_text = [element.text for element in elements]
    facilities = pd.DataFrame({'total': elements_text[0:2],
                               'home-visit': elements_text[4:6],
                               'daycare': elements_text[8:10],
                               'residential-care': elements_text[12:14],
                               'specified': elements_text[16:18],
                               'home-care-support': elements_text[20:22],
                               'assistive-products': elements_text[24:26]}, 
                             index = ['number', 'per_over75']
                             ).replace({r',':''},regex=True).astype(float).T

    #3. capacity 
    elements = driver.find_elements(By.XPATH, "/html/body/div[1]/div[2]/div[1]/div[3]/form/div[11]/table/tbody/tr/td")
    elements_text = [element.text for element in elements]
    capacity = pd.DataFrame({'residential-care': elements_text[0:2],
                             'specified': elements_text[4:6]}, 
                            index = ['number', 'per_over75']
                           ).replace({'\D':''}, regex=True).astype(int)

    #4. care_workers
    elements = driver.find_elements(By.XPATH, "/html/body/div[1]/div[2]/div[1]/div[3]/form/div[12]/table/tbody/tr[3]/td")
    elements_text = [float(element.text.replace(',','')) for element in elements[:2]]
    care_workers = dict(zip(['number', 'per_over75'], elements_text))

    #data
    data = {'code': code,
            'population': population.to_dict(),
            'facilities': facilities.to_dict(), 
            'capacity': capacity.to_dict(), 'care_workers': care_workers}

    #detail: residential-care facilities 
    detail_url1 = f"https://jmap.jp/facilities/search/type:3/kaigoType:3/searchArea:city/searchId:{code}"
    driver.get(detail_url1)
    time.sleep(1)
    Select(driver.find_element(By.ID, "targets")).select_by_visible_text("介護定員数")
    html_tag = driver.find_element(By.XPATH,"/html/body/div[1]/div[2]/div[1]/div[3]/div[2]").get_attribute('innerHTML')
    table = pd.read_html(html_tag)[0].iloc[:,:4]
    table.columns = ['type', 'name', 'address', 'capacity']
    table['code'] = code
    residential_care = pd.concat([residential_care, table])

    #detai2: specified facilities
    detail_url2 = f"https://jmap.jp/facilities/search/type:3/kaigoType:4/searchArea:city/searchId:{code}"
    driver.get(detail_url2)
    time.sleep(1)
    Select(driver.find_element(By.ID, "targets")).select_by_visible_text("介護定員数")
    html_tag = driver.find_element(By.XPATH,"/html/body/div[1]/div[2]/div[1]/div[3]/div[2]").get_attribute('innerHTML')
    table = pd.read_html(html_tag)[0].iloc[:,:4]
    table.columns = ['type', 'name', 'address', 'capacity']
    table['code'] = code
    specified= pd.concat([specified, table])
    
    return data, residential_care, specified

In [11]:
facility_data = []
null_list = {}
for code in cities.code_5digit:
    try:
        area_data, residential_care, specified = get_data(code, residential_care, specified)
        facility_data.append(area_data)
    except:
        city_name = cities[cities.code_5digit==code].city.values[0]
        null_list[code] = city_name
        print(code, city_name)

30361 湯浅町
30362 広川町
30366 有田川町
30381 美浜町
30382 日高町
30383 由良町
30390 印南町
30391 みなべ町
30401 白浜町
31384 日吉津村
33101 岡山市北区
33204 玉野市
38422 内子町
39201 高知市


In [12]:
null_list2 ={}
for code in null_list.keys():
    try:
        area_data, residential_care, specified = get_data(code, residential_care, specified)
        facility_data.append(area_data)
    except:
        city_name = cities[cities.code_5digit==code].city.values[0]
        null_list2[code] = city_name
        print(code, city_name)

save

In [14]:
with open('facility_data_ver2.json', 'w', encoding='utf-8') as f:
    json.dump({"cities": facility_data}, f, ensure_ascii=False, indent=4)

add coordinate

In [13]:
residential_care.to_csv('residential_care_facilities.csv', index=False)
specified.to_csv('specified_facilities.csv', index=False)

In [15]:
def get_coordinate(x):
    try:
        makeUrl = "https://msearch.gsi.go.jp/address-search/AddressSearch?q="
        s_quote = urllib.parse.quote(x)
        url = makeUrl + s_quote
        response = requests.get(url)
        coordinate = response.json()[0]["geometry"]["coordinates"]
    except:
        coordinate = np.nan
    return coordinate

In [None]:
residential_care['coordinate'] = residential_care.address.apply(get_coordinate)
specified['coordinate'] = specified.address.apply(get_coordinate)

In [None]:
residential_care.to_csv('residential_care_facilities.csv', index=False)
specified.to_csv('specified_facilities.csv', index=False)

In [17]:
#pd.json_normalize(data, sep='_')