# データ・サイエンス	データ・サイエンス社会応用論 / ICT社会応用演習Ⅳ
## 13-4. 北区の人口の確認

### 1. e-Stat のデータの読み込み
#### 1-a) 下準備

In [None]:
import pandas as pd
import geopandas as gpd
import japanize_matplotlib

In [None]:
CENSUS_SUBREGION_POPULATION_PATH = 'data/h02_13.csv'
KITA_REGION_PATH = 'data/shape/r2ka13117.shp'

#### 1-b) CSVデータの読み込み

In [None]:
df_population = pd.read_csv(CENSUS_SUBREGION_POPULATION_PATH, encoding='shift-jis', skiprows=4)
df_population

#### 1-c) DataFrameの下処理

In [None]:
df_population = df_population.rename(columns={
    '市区町村コード': 'lg_code',
    '町丁字コード': 'town_code',
    '総数': 'total',
    '男': 'male',
    '女': 'female',
    '-' : 'foreign'
})
df_population['total'] = df_population['total'].apply(pd.to_numeric, errors='coerce')
df_population['male'] = df_population['male'].apply(pd.to_numeric, errors='coerce')
df_population['female'] = df_population['female'].apply(pd.to_numeric, errors='coerce')
df_population['foreign'] = df_population['foreign'].apply(pd.to_numeric, errors='coerce')
df_population['town_code'] = df_population['town_code'].apply(lambda x: x + '00' if len(x) == 4 else x)
df_population = df_population[['lg_code', 'town_code', 'total', 'male', 'female', 'foreign']]
df_population 

#### 1-d) 東京都北区のデータでの絞り込み

In [None]:
df_kita_population = df_population[df_population['lg_code'] == 13117]
df_kita_population

### 2. コロプレス図の作成
#### 2-a) 境界データの読み込み

In [None]:
gdf_kita_region = gpd.read_file(KITA_REGION_PATH)
gdf_kita_region

#### 2-b) GeoDataFrameの下処理

In [None]:
gdf_kita_region = gdf_kita_region[['S_AREA', 'PREF_NAME', 'CITY_NAME', 'S_NAME', 'AREA', 'geometry']]
gdf_kita_region

#### 2-c) 国勢調査のDataFrameを結合

In [None]:
gdf_kita_population = gdf_kita_region.merge(df_kita_population, left_on='S_AREA', right_on='town_code', how='left')
gdf_kita_population

#### 2-d) 人口密度の列の追加

In [None]:
gdf_kita_population['density'] = gdf_kita_population['total'] / gdf_kita_population['AREA']
gdf_kita_population

#### 2-e) データの確認

In [None]:
gdf_kita_population.sort_values('total', ascending=False)

In [None]:
gdf_kita_population.sort_values('density', ascending=False)

#### 2-f) コロプレス図の表示

In [None]:
gdf_kita_population.plot(column='total', cmap='OrRd', legend=True, figsize=(20,4))

In [None]:
gdf_kita_population.plot(column='density', cmap='OrRd', legend=True, figsize=(20,4))