In [1]:
import geopandas as gpd
import pandas as pd
import folium

In [2]:
japan_shp = gpd.read_file('gadm41_JPN_shp/gadm41_JPN_1.shp')
japan_shp

Unnamed: 0,GID_1,GID_0,COUNTRY,NAME_1,VARNAME_1,NL_NAME_1,TYPE_1,ENGTYPE_1,CC_1,HASC_1,ISO_1,geometry
0,JPN.1_1,JPN,Japan,Aichi,Aiti,愛知県,Ken,Prefecture,,JP.AI,JP-23,"MULTIPOLYGON (((137.09743 34.65121, 137.09773 ..."
1,JPN.2_1,JPN,Japan,Akita,,秋田県,Ken,Prefecture,,JP.AK,JP-05,"MULTIPOLYGON (((140.70844 38.92136, 140.69861 ..."
2,JPN.3_1,JPN,Japan,Aomori,,青森県,Ken,Prefecture,,JP.AO,JP-02,"MULTIPOLYGON (((140.95625 40.27003, 140.95795 ..."
3,JPN.4_1,JPN,Japan,Chiba,Tiba|Tsiba,千葉県,Ken,Prefecture,,JP.CH,JP-12,"MULTIPOLYGON (((139.82417 34.91861, 139.82465 ..."
4,JPN.5_1,JPN,Japan,Ehime,,愛媛県,Ken,Prefecture,,JP.EH,JP-38,"MULTIPOLYGON (((132.56189 32.91159, 132.56241 ..."
5,JPN.6_1,JPN,Japan,Fukui,Hukui,福井県,Ken,Prefecture,,JP.FI,JP-18,"MULTIPOLYGON (((135.77737 35.36200, 135.77370 ..."
6,JPN.7_1,JPN,Japan,Fukuoka,Hukuoka,福岡県,Ken,Prefecture,,JP.FO,JP-40,"MULTIPOLYGON (((130.88498 33.18366, 130.88748 ..."
7,JPN.8_1,JPN,Japan,Fukushima,Hukusima,福島県,Ken,Prefecture,,JP.FS,JP-07,"MULTIPOLYGON (((140.26530 36.93347, 140.26071 ..."
8,JPN.9_1,JPN,Japan,Gifu,Gihu,岐阜県,Ken,Prefecture,,JP.GF,JP-21,"POLYGON ((136.67628 35.17009, 136.67532 35.165..."
9,JPN.10_1,JPN,Japan,Gunma,GunmaGumma,群馬県,Ken,Prefecture,,JP.GM,JP-10,"POLYGON ((138.94455 36.09058, 138.94102 36.088..."


In [3]:
# fix NA

iso_1_replacements = {
    12: 'JP-28',  # Replace 'NA' with 'JP-28' for index 12
    26: 'JP-42',  # Replace 'NA' with 'JP-42' for index 26
}

# Fill NA values in ISO_1 column based on the dictionary
for index, iso_code in iso_1_replacements.items():
    japan_shp.at[index, 'ISO_1'] = iso_code

In [4]:
def convert_iso_to_region_code(iso_code):
    if '-' in iso_code:
        numeric_part = iso_code.split('-')[1]
        # Convert to the format XX000
        region_code = numeric_part + '000'
    else:
        # Handle unexpected formats
        region_code = '00000'
    return region_code

# Apply the conversion function to the ISO_1 column and create the REGION_CODE column
japan_shp['REGION_CODE'] = japan_shp['ISO_1'].apply(convert_iso_to_region_code)

In [5]:
japan_map = japan_shp[['NAME_1', 'REGION_CODE', 'geometry']]
japan_map

Unnamed: 0,NAME_1,REGION_CODE,geometry
0,Aichi,23000,"MULTIPOLYGON (((137.09743 34.65121, 137.09773 ..."
1,Akita,5000,"MULTIPOLYGON (((140.70844 38.92136, 140.69861 ..."
2,Aomori,2000,"MULTIPOLYGON (((140.95625 40.27003, 140.95795 ..."
3,Chiba,12000,"MULTIPOLYGON (((139.82417 34.91861, 139.82465 ..."
4,Ehime,38000,"MULTIPOLYGON (((132.56189 32.91159, 132.56241 ..."
5,Fukui,18000,"MULTIPOLYGON (((135.77737 35.36200, 135.77370 ..."
6,Fukuoka,40000,"MULTIPOLYGON (((130.88498 33.18366, 130.88748 ..."
7,Fukushima,7000,"MULTIPOLYGON (((140.26530 36.93347, 140.26071 ..."
8,Gifu,21000,"POLYGON ((136.67628 35.17009, 136.67532 35.165..."
9,Gunma,10000,"POLYGON ((138.94455 36.09058, 138.94102 36.088..."


In [8]:
m = japan_map.explore()
# m

### Cost of transportation

In [11]:
cost_of_transportation = pd.read_csv('data/Transportation_japan.csv')
cost_of_transportation

Unnamed: 0,AREA,Tokyo-Transport,Tokyo-airplane,Osaka-Transport,Osaka-airplane
0,Hokkaido,32870,0,39150,0
1,Aomori-ken,17540,33134,28130,0
2,Iwate-ken,14330,32508,24610,0
3,Miyagi-ken,12250,94360,23310,0
4,Akita-ken,18320,46020,28040,0
5,Yamagata-ken,11630,23103,22690,0
6,Fukushima-ken,14680,0,8790,0
7,Ibaraki-ken,2690,0,16460,0
8,Tochigi-ken,2930,0,15950,0
9,Gumma-ken,2800,0,15950,0


In [12]:
def clean_area_name(area_name):
    """Remove specific suffixes from area names and strip whitespace."""
    suffixes = ['-ken', '-fu', '-to', '-do']
    for suffix in suffixes:
        area_name = area_name.replace(suffix, '')
    return area_name.strip()

# Create a dictionary for the specific mismatched area names corrections
area_corrections = {
    'Gumma': 'Gunma',
    'Hyogo': 'Hyōgo',
    'Nagasaki': 'Naoasaki'
}

cost_of_transportation = cost_of_transportation.rename(columns={'AREA': 'NAME_1'})
cost_of_transportation.loc[:, 'NAME_1'] = cost_of_transportation['NAME_1'].apply(clean_area_name)
cost_of_transportation.loc[:, 'NAME_1'] = cost_of_transportation['NAME_1'].replace(area_corrections)
cost_of_transportation

Unnamed: 0,NAME_1,Tokyo-Transport,Tokyo-airplane,Osaka-Transport,Osaka-airplane
0,Hokkaido,32870,0,39150,0
1,Aomori,17540,33134,28130,0
2,Iwate,14330,32508,24610,0
3,Miyagi,12250,94360,23310,0
4,Akita,18320,46020,28040,0
5,Yamagata,11630,23103,22690,0
6,Fukushima,14680,0,8790,0
7,Ibaraki,2690,0,16460,0
8,Tochigi,2930,0,15950,0
9,Gunma,2800,0,15950,0
