In [1]:
import networkx as nx
import osmnx as ox
import requests
import sys,os,os.path
import pandas as pd
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

ox.config(use_cache=True, log_console=True)
ox.__version__

'1.0.0'

In [2]:
stations = pd.read_csv('../../../data/stations.csv')

In [3]:
stations.head()

Unnamed: 0,station_cd,station_g_cd,station_name,station_name_k,station_name_r,line_cd,pref_cd,post,address,lon,lat,open_ymd,close_ymd,e_status,e_sort
0,1110101,1110101,函館,,,11101,1,040-0063,北海道函館市若松町１２-１３,140.726413,41.773709,1902-12-10,0000-00-00,0,1110101
1,1110102,1110102,五稜郭,,,11101,1,041-0813,函館市亀田本町,140.733539,41.803557,0000-00-00,0000-00-00,0,1110102
2,1110103,1110103,桔梗,,,11101,1,041-0801,北海道函館市桔梗３丁目４１-３６,140.722952,41.846457,1902-12-10,0000-00-00,0,1110103
3,1110104,1110104,大中山,,,11101,1,041-1121,亀田郡七飯町大字大中山,140.71358,41.864641,0000-00-00,0000-00-00,0,1110104
4,1110105,1110105,七飯,,,11101,1,041-1111,亀田郡七飯町字本町,140.688556,41.886971,0000-00-00,0000-00-00,0,1110105


In [4]:
#!sudo apt upgrade -y
#!sudo apt update -y
!sudo apt-get install -y git 

Reading package lists... Done
Building dependency tree       
Reading state information... Done
E: Unable to locate package git


In [5]:
# 日本語・ローマ字変換ライブラリ `pykakasi` をインストール
!pip install pykakasi



In [6]:
from pykakasi import kakasi

kakasi = kakasi()

kakasi.setMode('H', 'a')
kakasi.setMode('K', 'a')
kakasi.setMode('J', 'a')

conv = kakasi.getConverter()

In [7]:
# 駅名のローマ字名を付与
stations['station_name_roman'] = stations['station_name'].apply(lambda name: conv.do(name))
stations

Unnamed: 0,station_cd,station_g_cd,station_name,station_name_k,station_name_r,line_cd,pref_cd,post,address,lon,lat,open_ymd,close_ymd,e_status,e_sort,station_name_roman
0,1110101,1110101,函館,,,11101,1,040-0063,北海道函館市若松町１２-１３,140.726413,41.773709,1902-12-10,0000-00-00,0,1110101,hakodate
1,1110102,1110102,五稜郭,,,11101,1,041-0813,函館市亀田本町,140.733539,41.803557,0000-00-00,0000-00-00,0,1110102,goryoukaku
2,1110103,1110103,桔梗,,,11101,1,041-0801,北海道函館市桔梗３丁目４１-３６,140.722952,41.846457,1902-12-10,0000-00-00,0,1110103,kikyou
3,1110104,1110104,大中山,,,11101,1,041-1121,亀田郡七飯町大字大中山,140.713580,41.864641,0000-00-00,0000-00-00,0,1110104,daichuuyama
4,1110105,1110105,七飯,,,11101,1,041-1111,亀田郡七飯町字本町,140.688556,41.886971,0000-00-00,0000-00-00,0,1110105,shichimeshi
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10850,9992719,9992719,てだこ浦西,,,99927,47,901-2102,沖縄県浦添市前田三丁目21,127.741861,26.241778,2019-10-01,0000-00-00,0,9992719,tedakouranishi
10851,9992801,1190202,九州鉄道記念館,,,99928,40,800-0000,福岡県北九州市門司区,130.962439,33.944392,0000-00-00,0000-00-00,0,9992801,kyuushuutetsudoukinenkan
10852,9992802,9992802,出光美術館,,,99928,40,800-0000,福岡県北九州市門司区,130.965292,33.947792,0000-00-00,0000-00-00,0,9992802,idemitsubijutsukan
10853,9992803,9992803,ノーフォーク広場,,,99928,40,801-0854,福岡県北九州市門司区旧門司,130.964254,33.955973,0000-00-00,0000-00-00,0,9992803,noofookuhiroba


In [8]:
target_stations = stations[
    (stations['pref_cd'] == 13)
]



In [9]:
len(target_stations[target_stations['line_cd'] == 11312]['station_name'])

24

In [10]:
import folium

In [22]:
target_stations['line_cd']

1262    11301
1263    11301
1264    11301
1283    11302
1284    11302
        ...  
8247    99342
8248    99342
8249    99342
8250    99342
8251    99342
Name: line_cd, Length: 944, dtype: int64

In [23]:
center_lat, center_lon = (35.651820534474, 139.54461236426)

_map = folium.Map(location=[center_lat, center_lon], zoom_start=12)

colors = {
    11312: 'red',
    25001: 'blue',
    24001: 'pink',
    27001: 'gray',
}


for _, station in target_stations.iterrows():
    color = 'gray'
    folium.Marker([station.lat, station.lon], popup=str(station['station_name']), icon=folium.Icon(color=color)).add_to(_map)

_map

In [23]:
#駅の800m範囲内のグラフ構造の配列を取得
def get_stats(data):
    wurster_hall = (data['lat'], data['lon'])
    one_mile = 800 #meters


    try:
        G = ox.graph_from_point(wurster_hall, dist=one_mile, network_type='drive')
        if G is None:
            return pd.Series([])

        # what sized area does our network cover in square meters?
        G_proj = ox.project_graph(G)
        nodes_proj = ox.graph_to_gdfs(G_proj, edges=False)
        graph_area_m = nodes_proj.unary_union.convex_hull.area

        if graph_area_m == 0:
            print(data)
            print(graph_area_m)
            print(nodes_proj)
            print(G_proj)
            return pd.Series([])

        # show some basic stats about the network
        stats = ox.basic_stats(G_proj, area=graph_area_m, clean_intersects=True, circuity_dist='euclidean')
        return pd.Series(stats)

    except nx.NetworkXPointlessConcept as e:
        print(data)
        print(e)
        return pd.Series([])
    

In [24]:
pref_cd = 14
target_stations = stations[
    (stations['pref_cd'] == pref_cd)
]
len(target_stations)

424

In [25]:
#駅の800m範囲内のグラフ構造の配列を取得
stats_list = target_stations.apply(get_stats, axis=1)
stats_list

station_cd                 1130407
station_g_cd               1130407
station_name                   海芝浦
station_name_k                 NaN
station_name_r                 NaN
line_cd                      11304
pref_cd                         14
post                      230-0045
address               横浜市鶴見区末広町２丁目
lon                     139.700603
lat                      35.486073
open_ymd                0000-00-00
close_ymd               0000-00-00
e_status                         0
e_sort                     1130407
station_name_roman     umishibaura
Name: 1349, dtype: object
Connectivity is undefined for the null graph.


Unnamed: 0,n,m,k_avg,intersection_count,streets_per_node_avg,streets_per_node_counts,streets_per_node_proportion,edge_length_total,edge_length_avg,street_length_total,street_length_avg,street_segments_count,node_density_km,intersection_density_km,edge_density_km,street_density_km,circuity_avg,self_loop_proportion,clean_intersection_count,clean_intersection_density_km
1265,549.0,1266.0,4.612022,503.0,3.134791,"{0: 0, 1: 46, 2: 8, 3: 327, 4: 162, 5: 6}","{0: 0.0, 1: 0.08378870673952642, 2: 0.01457194...",83156.901,65.684756,54585.246,66.003925,827.0,239.332776,219.279392,36251.679302,23796.062730,1.029499,0.000000,361.0,157.375468
1266,562.0,1264.0,4.498221,534.0,3.176157,"{0: 0, 1: 28, 2: 3, 3: 378, 4: 149, 5: 3, 6: 1}","{0: 0.0, 1: 0.0498220640569395, 2: 0.005338078...",89084.291,70.478078,62935.403,75.552705,833.0,231.057974,219.546189,36625.686444,25874.958544,1.063799,0.000000,320.0,131.563259
1267,479.0,1211.0,5.056367,405.0,2.797495,"{0: 0, 1: 74, 2: 1, 3: 353, 4: 50, 5: 1}","{0: 0.0, 1: 0.1544885177453027, 2: 0.002087682...",80840.662,66.755295,43003.785,67.829314,634.0,196.923853,166.501379,33234.769574,17679.480226,1.090732,0.001652,272.0,111.823148
1268,499.0,1337.0,5.358717,428.0,2.869739,"{0: 0, 1: 71, 2: 1, 3: 349, 4: 78}","{0: 0.0, 1: 0.14228456913827656, 2: 0.00200400...",88700.320,66.342797,45092.164,66.409667,679.0,207.322456,177.823670,36852.842040,18734.705772,1.056511,0.000000,273.0,113.424911
1269,789.0,2182.0,5.531052,696.0,2.931559,"{0: 0, 1: 93, 2: 1, 3: 562, 4: 133}","{0: 0.0, 1: 0.11787072243346007, 2: 0.00126742...",121421.970,55.647099,62400.274,55.764320,1119.0,323.723165,285.565682,49818.890306,25602.552862,1.032558,0.000000,465.0,190.787417
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8263,136.0,368.0,5.411765,119.0,2.889706,"{0: 0, 1: 17, 2: 0, 3: 100, 4: 19}","{0: 0.0, 1: 0.125, 2: 0.0, 3: 0.73529411764705...",45429.074,123.448571,22714.537,123.448571,184.0,72.202201,63.176926,24118.228794,12059.114397,1.122440,0.000000,108.0,57.337042
8264,119.0,328.0,5.512605,100.0,2.840336,"{0: 0, 1: 19, 2: 0, 3: 81, 4: 19}","{0: 0.0, 1: 0.15966386554621848, 2: 0.0, 3: 0....",44043.688,134.279537,22021.844,134.279537,164.0,64.478878,54.183931,23864.601515,11932.300758,1.170265,0.000000,93.0,50.391056
8265,105.0,288.0,5.485714,88.0,2.857143,"{0: 0, 1: 17, 2: 0, 3: 69, 4: 19}","{0: 0.0, 1: 0.1619047619047619, 2: 0.0, 3: 0.6...",37921.810,131.672951,18960.905,131.672951,144.0,59.531582,49.893135,21500.431805,10750.215902,1.169691,0.000000,82.0,46.491331
8266,98.0,268.0,5.469388,83.0,2.877551,"{0: 0, 1: 15, 2: 0, 3: 65, 4: 18}","{0: 0.0, 1: 0.15306122448979592, 2: 0.0, 3: 0....",36349.322,135.631799,18174.661,135.631799,134.0,58.432131,49.488437,21673.146309,10836.573154,1.182520,0.007463,79.0,47.103452


In [26]:
len(stats_list)

424

In [27]:
target_stations_with_stats = pd.concat([target_stations, stats_list], axis=1)

In [28]:
target_stations_with_stats.to_csv(f'../../../data/stats/stations_{pref_cd}.csv')

In [29]:
target_stations_with_stats = pd.read_csv(f'../../../data/stats/stations_{pref_cd}.csv')

In [30]:
target_stations_with_stats

Unnamed: 0.1,Unnamed: 0,station_cd,station_g_cd,station_name,station_name_k,station_name_r,line_cd,pref_cd,post,address,...,street_length_avg,street_segments_count,node_density_km,intersection_density_km,edge_density_km,street_density_km,circuity_avg,self_loop_proportion,clean_intersection_count,clean_intersection_density_km
0,1265,1130104,1130104,川崎,,,11301,14,210-0007,川崎市川崎区駅前本町,...,66.003925,827.0,239.332776,219.279392,36251.679302,23796.062730,1.029499,0.000000,361.0,157.375468
1,1266,1130105,1130105,横浜,,,11301,14,220-0011,横浜市西区高島２丁目,...,75.552705,833.0,231.057974,219.546189,36625.686444,25874.958544,1.063799,0.000000,320.0,131.563259
2,1267,1130106,1130106,戸塚,,,11301,14,244-0003,横浜市戸塚区戸塚町,...,67.829314,634.0,196.923853,166.501379,33234.769574,17679.480226,1.090732,0.001652,272.0,111.823148
3,1268,1130107,1130107,大船,,,11301,14,247-0056,鎌倉市大船１丁目,...,66.409667,679.0,207.322456,177.823670,36852.842040,18734.705772,1.056511,0.000000,273.0,113.424911
4,1269,1130108,1130108,藤沢,,,11301,14,251-0052,藤沢市藤沢,...,55.764320,1119.0,323.723165,285.565682,49818.890306,25602.552862,1.032558,0.000000,465.0,190.787417
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
419,8263,9934402,9934402,公園下,,,99344,14,250-0500,神奈川県足柄下郡箱根町,...,123.448571,184.0,72.202201,63.176926,24118.228794,12059.114397,1.122440,0.000000,108.0,57.337042
420,8264,9934403,9934403,公園上,,,99344,14,250-0500,神奈川県足柄下郡箱根町,...,134.279537,164.0,64.478878,54.183931,23864.601515,11932.300758,1.170265,0.000000,93.0,50.391056
421,8265,9934404,9934404,中強羅,,,99344,14,250-0500,神奈川県足柄下郡箱根町,...,131.672951,144.0,59.531582,49.893135,21500.431805,10750.215902,1.169691,0.000000,82.0,46.491331
422,8266,9934405,9934405,上強羅,,,99344,14,250-0500,神奈川県足柄下郡箱根町,...,135.631799,134.0,58.432131,49.488437,21673.146309,10836.573154,1.182520,0.007463,79.0,47.103452


In [31]:
len(target_stations)

424

In [None]:
# 路線ごとの交差点数、道路数のグラフ化
ax = None
colors = {11312: 'red', 24001: 'pink', 25001: 'blue'}
for i, station in target_stations_with_stats.groupby('line_cd'):
    station.plot.scatter(x='n', y='m', color=colors[i], label='cluster{i}', ax=ax)

In [None]:
chuo_stations = target_stations_with_stats[target_stations_with_stats['line_cd'] == 11312]
keio_stations = target_stations_with_stats[target_stations_with_stats['line_cd'] == 24001]
odakyu_stations = target_stations_with_stats[target_stations_with_stats['line_cd'] == 25001]

In [None]:
plt.hist([chuo_stations['n'], keio_stations['n']], bins = 20, color=['red', 'pink'])

In [None]:
plt.hist([chuo_stations['m'], keio_stations['m']], bins = 20, color=['red', 'pink'])

In [None]:
# k-means法を使うためのインポート
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# データの列の絞り込み
stations_sub = target_stations_with_stats[['n','m']]

# 標準化
sc = StandardScaler()
sc.fit(stations_sub)
stations_sub_std = sc.transform(stations_sub)

In [None]:
# KMeansクラスの初期化
kmeans = KMeans(init='random', n_clusters=3, random_state=0)

# クラスターの重心を計算
kmeans.fit(stations_sub_std)

# クラスター番号をpandasのSeriesオブジェクトに変換
labels = pd.Series(kmeans.labels_, name='cluster_number', index=target_stations_with_stats.index)

# クラスター番号と件数を表示
print(labels.value_counts(sort=False))

# グラフを描画
ax = labels.value_counts(sort=False).plot(kind='bar')
ax.set_xlabel('cluster number')
ax.set_ylabel('count')

In [None]:
# エルボー方による推定。クラスター数を1から20に増やして、それぞれの距離の総和を求める
dist_list =[]
for i in range(1,20):
    kmeans= KMeans(n_clusters=i, init='random', random_state=0)
    kmeans.fit(stations_sub_std)
    dist_list.append(kmeans.inertia_)
    
# グラフを表示
plt.plot(range(1,20), dist_list,marker='+')
plt.xlabel('Number of clusters')
plt.ylabel('Distortion')

In [None]:
# 駅のデータにクラスター番号のデータを結合
stations_with_cluster = pd.concat([target_stations_with_stats, pd.Series(labels, index=target_stations_with_stats.index)], axis=1)

# 先頭の5行を表示
stations_with_cluster.head()

In [None]:
stations_0 = stations_with_cluster[stations_with_cluster['cluster_number'] == 0]
stations_1 = stations_with_cluster[stations_with_cluster['cluster_number'] == 1]
stations_2 = stations_with_cluster[stations_with_cluster['cluster_number'] == 2]
# クラスタリング結果のグラフ化
ax = None
colors = ['red', 'pink', 'blue']
for i, station in stations_with_cluster.groupby('cluster_number'):
    station.plot.scatter(x='n', y='m', color=colors[i], label='cluster{i}', ax=ax)

In [None]:
# クラスタ番号と年齢層を軸に集計し、年齢層を列に設定
cross_cluster_line_cd = stations_with_cluster.groupby(['cluster_number', 'line_cd']).size().unstack().fillna(0)
cross_cluster_line_cd

In [None]:
sns.heatmap(cross_cluster_line_cd.apply(lambda x : x/x.sum(), axis=1), cmap='Blues')

In [None]:
print([station['station_name'] for _, station in stations_with_cluster.iterrows() if (station['cluster_number'] == 0)])

In [None]:
print([station['station_name'] for _, station in stations_with_cluster.iterrows() if (station['cluster_number'] == 1)])

In [None]:
print([station['station_name'] for _, station in stations_with_cluster.iterrows() if (station['cluster_number'] == 2)])