In [3]:
import networkx as nx
import osmnx as ox
import requests
import sys,os,os.path
import pandas as pd
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

ox.config(use_cache=True, log_console=True)
ox.__version__

'0.16.1'

In [4]:
stations = pd.read_csv('../../data/stations.csv')

In [5]:
stations.head()

Unnamed: 0,station_cd,station_g_cd,station_name,station_name_k,station_name_r,line_cd,pref_cd,post,address,lon,lat,open_ymd,close_ymd,e_status,e_sort
0,1110101,1110101,函館,,,11101,1,040-0063,北海道函館市若松町１２-１３,140.726413,41.773709,1902-12-10,0000-00-00,0,1110101
1,1110102,1110102,五稜郭,,,11101,1,041-0813,函館市亀田本町,140.733539,41.803557,0000-00-00,0000-00-00,0,1110102
2,1110103,1110103,桔梗,,,11101,1,041-0801,北海道函館市桔梗３丁目４１-３６,140.722952,41.846457,1902-12-10,0000-00-00,0,1110103
3,1110104,1110104,大中山,,,11101,1,041-1121,亀田郡七飯町大字大中山,140.71358,41.864641,0000-00-00,0000-00-00,0,1110104
4,1110105,1110105,七飯,,,11101,1,041-1111,亀田郡七飯町字本町,140.688556,41.886971,0000-00-00,0000-00-00,0,1110105


In [1]:
#!sudo apt upgrade -y
#!sudo apt update -y
!sudo apt-get install -y git 

Reading package lists... Done
Building dependency tree       
Reading state information... Done
Calculating upgrade... Done
The following packages will be upgraded:
  ca-certificates gcc-10-base libc-bin libc6 libgcc-s1 libstdc++6 libsystemd0
  libudev1 perl-base zlib1g
10 upgraded, 0 newly installed, 0 to remove and 0 not upgraded.
Need to get 5,984 kB of archives.
After this operation, 39.9 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 perl-base amd64 5.30.0-9ubuntu0.2 [1,513 kB]
Get:2 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 gcc-10-base amd64 10.2.0-5ubuntu1~20.04 [19.7 kB]
Get:3 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 libgcc-s1 amd64 10.2.0-5ubuntu1~20.04 [41.6 kB]
Get:4 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 libstdc++6 amd64 10.2.0-5ubuntu1~20.04 [503 kB]
Get:5 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 libc6 amd64 2.31-0ubuntu9.1 [2,712 kB]
Get:6 http:/

In [2]:
# 日本語・ローマ字変換ライブラリ `pykakasi` をインストール
!pip install git+https://github.com/miurahr/pykakasi

Collecting git+https://github.com/miurahr/pykakasi
  Cloning https://github.com/miurahr/pykakasi to /tmp/pip-req-build-sghbs0oo
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h    Preparing wheel metadata ... [?25ldone
[?25hCollecting klepto
  Using cached klepto-0.2.0-py2.py3-none-any.whl (85 kB)
Collecting pox>=0.2.9
  Using cached pox-0.2.9-py2.py3-none-any.whl (30 kB)
Collecting dill>=0.3.3
  Using cached dill-0.3.3-py2.py3-none-any.whl (81 kB)
Building wheels for collected packages: pykakasi
  Building wheel for pykakasi (PEP 517) ... [?25ldone
[?25h  Created wheel for pykakasi: filename=pykakasi-2.0.2.dev11-py3-none-any.whl size=1244607 sha256=99d655edbefb4538a29fc5d3f2575badaf12b93dd7816c46cb7dd20f454d9e1a
  Stored in directory: /tmp/pip-ephem-wheel-cache-hwaztnop/wheels/62/49/c7/6c3498f4d54697dd2876d998349269c7b64871f871ee1180c7
Successfully built pykakasi
Installing collected packages: pox, dill, klepto, pykak

In [None]:
from pykakasi import kakasi

kakasi = kakasi()

kakasi.setMode('H', 'a')
kakasi.setMode('K', 'a')
kakasi.setMode('J', 'a')

conv = kakasi.getConverter()

In [None]:
# 駅名のローマ字名を付与
stations['station_name_roman'] = stations['station_name'].apply(lambda name: conv.do(name))
stations

In [None]:
target_stations = stations[(stations['line_cd'] == 11312) | (stations['line_cd'] == 24001)]

In [None]:
len(target_stations[target_stations['line_cd'] == 11312]['station_name'])

In [None]:
import folium

In [None]:
center_lat, center_lon = (35.651820534474,139.54461236426)

_map = folium.Map(location=[center_lat, center_lon], zoom_start=12)

for _, station in target_stations.iterrows():
    color = 'red' if station['line_cd'] == 11312 else 'pink'
    folium.Marker([station.lat, station.lon], popup=str(station['station_name']), icon=folium.Icon(color=color)).add_to(_map)

_map

In [None]:
#駅の800m範囲内のグラフ構造の配列を取得
def get_stats(data):
    wurster_hall = (data['lat'], data['lon'])
    one_mile = 800 #meters
    G = ox.graph_from_point(wurster_hall, distance=one_mile, network_type='drive')

    # what sized area does our network cover in square meters?
    G_proj = ox.project_graph(G)
    nodes_proj = ox.graph_to_gdfs(G_proj, edges=False)
    graph_area_m = nodes_proj.unary_union.convex_hull.area
    
    # show some basic stats about the network
    stats = ox.basic_stats(G_proj, area=graph_area_m, clean_intersects=True, circuity_dist='euclidean')
    return pd.Series(stats)
    

In [None]:
#駅の800m範囲内のグラフ構造の配列を取得
stats_list = target_stations.apply(get_stats, axis=1)
stats_list

In [None]:
stats_list[0]

In [None]:
target_stations_with_stats = pd.concat([target_stations, stats_list], axis=1)

In [None]:
target_stations_with_stats.to_csv('../../data/target_stations_with_stats')

In [None]:
target_stations_with_stats = pd.read_csv('../../data/target_stations_with_stats.csv')

In [None]:
# 路線ごとの交差点数、道路数のグラフ化
ax = None
colors = {11312: 'red', 24001: 'pink'}
for i, station in target_stations_with_stats.groupby('line_cd'):
    station.plot.scatter(x='n', y='m', color=colors[i], label='cluster{i}', ax=ax)

In [None]:
plt.hist([chuo_stations['n'], keio_stations['n']], bins = 20, color=['red', 'pink'])


In [None]:
plt.hist([chuo_stations['m'], keio_stations['m']], bins = 20, color=['red', 'pink'])


In [None]:
# k-means法を使うためのインポート
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# データの列の絞り込み
stations_sub = target_stations_with_stats[['n','m']]

# 標準化
sc = StandardScaler()
sc.fit(stations_sub)
stations_sub_std = sc.transform(stations_sub)

In [None]:
# KMeansクラスの初期化
kmeans = KMeans(init='random', n_clusters=2, random_state=0)

# クラスターの重心を計算
kmeans.fit(stations_sub_std)

# クラスター番号をpandasのSeriesオブジェクトに変換
labels = pd.Series(kmeans.labels_, name='cluster_number', index=target_stations_with_stats.index)

# クラスター番号と件数を表示
print(labels.value_counts(sort=False))

# グラフを描画
ax = labels.value_counts(sort=False).plot(kind='bar')
ax.set_xlabel('cluster number')
ax.set_ylabel('count')

In [None]:
# エルボー方による推定。クラスター数を1から20に増やして、それぞれの距離の総和を求める
dist_list =[]
for i in range(1,20):
    kmeans= KMeans(n_clusters=i, init='random', random_state=0)
    kmeans.fit(stations_sub_std)
    dist_list.append(kmeans.inertia_)
    
# グラフを表示
plt.plot(range(1,20), dist_list,marker='+')
plt.xlabel('Number of clusters')
plt.ylabel('Distortion')

In [None]:
# 駅のデータにクラスター番号のデータを結合
stations_with_cluster = pd.concat([target_stations_with_stats, pd.Series(labels, index=target_stations_with_stats.index)], axis=1)

# 先頭の5行を表示
stations_with_cluster.head()

In [None]:
stations_0 = stations_with_cluster[stations_with_cluster['cluster_number'] == 0]
stations_1 = stations_with_cluster[stations_with_cluster['cluster_number'] == 1]
# クラスタリング結果のグラフ化
ax = None
colors = ['red', 'pink']
for i, station in stations_with_cluster.groupby('cluster_number'):
    station.plot.scatter(x='n', y='m', color=colors[i], label='cluster{i}', ax=ax)

In [None]:
# クラスタ番号と年齢層を軸に集計し、年齢層を列に設定
cross_cluster_line_cd = stations_with_cluster.groupby(['cluster_number', 'line_cd']).size().unstack().fillna(0)
cross_cluster_line_cd

In [None]:
sns.heatmap(cross_cluster_line_cd.apply(lambda x : x/x.sum(), axis=1), cmap='Blues')

In [None]:
print([station['station_name'] for _, station in stations_with_cluster.iterrows() if (station['cluster_number'] == 0)])

In [None]:
print([station['station_name'] for _, station in stations_with_cluster.iterrows() if (station['cluster_number'] == 1)])