In [1]:
import pandas as pd
import geopandas as gpd

# read in data

tripChain = pd.read_csv('PJT001_TripChain.csv')
stations = pd.read_csv('PJT001_stations_table.csv')
mocLink = gpd.read_file('PJT001_moc_link_2018.geojson')
cellFlow = gpd.read_file('PJT001_h_100m_cell_flow.geojson')
cellPop = gpd.read_file('PJT001_h_100m_cell_pop.geojson')
emdOd = pd.read_csv('PJT001_sk_emd_od.csv')
emd = gpd.read_file('PJT001_tl_scco_emd.geojson')
routeStationInfo = pd.read_csv('PJT001_routestationinfo.csv')
route_station_mapping = pd.read_csv('PJT001_routestationmapping.csv')

In [2]:
# trip chain 테이블 컬럼이름을 좀 이쁘게 만들어 보자

tripChain.columns=['암호화카드번호', '트랜잭션ID', '환승횟수', '교통카드발행사ID',
       '총이용객수', '사용자구분', '교통수단CD1', '교통수단CD2',
       '교통수단CD3', '교통수단CD4', '교통수단CD5', '버스노선ID1',
       '버스노선ID2', '버스노선ID3', '버스노선ID4', '버스노선ID5',
       '차량ID1', '차량ID2', '차량ID3', '차량ID4',
       '차량ID5', '총통행거리', '총탑승시간', '총소요시간',
       '승차일시1', '승차일시2', '승차일시3', '승차일시4',
       '승차일시5', '하차일시1', '하차일시2', '하차일시3',
       '하차일시4', '하차일시5', '최초승차일시', '최종하차일시',
       '승차역ID1', '승차역ID2', '승차역ID3', '승차역ID4',
       '승차역ID5', '하차역ID1', '하차역ID2', '하차역ID3',
       '하차역ID4', '하차역ID5', '최초승차역ID', '최종하차역ID',
       '총이용금액', '수집건수', '트립체인완료코드']

In [3]:
# route-station 매핑테이블에 잘못된 column 이름 바로잡기

route_station_mapping.rename(columns = {'이비노선ID': '표준노선ID', '표준노선ID': '이비노선ID'}, inplace = True)
route_station_mapping

Unnamed: 0,구분,운수사명,운수사ID,표준노선ID,이비노선ID,노선명
0,경기시내,경원여객M,2805000,216000044,28050900,M6410
1,경기시내,강화운수,4100100,232000028,41001001,2
2,경기시내,강화운수,4100100,232000029,41001013,88
3,경기시내,강화운수,4100100,232000061,41001020,3000
4,경기시내,강화운수,4100100,232000067,41001024,388
...,...,...,...,...,...,...
2122,경기시내,서현운수,4108800,229000060,41088004,330
2123,경기시내,서현운수,4108800,229000063,41088005,850
2124,경기시내,코레일네트웍스,4108900,213000024,41089001,8507
2125,경기시내,신성교통,4109100,229000102,41091900,M7111


In [4]:
routeStationInfo

Unnamed: 0,seq,pr_station_id,bus_line_no,bus_line_no_seq,station_nm,station_id,mobile_no
0,65286,228000018,10-4,1,용인터미널,228001552,47634.0
1,65287,228000018,10-4,2,용인터미널(경유),277102443,
2,65288,228000018,10-4,3,포브스병원,228000443,29439.0
3,65289,228000018,10-4,4,제일교회,228000665,29881.0
4,65290,228000018,10-4,5,라이프아파트,228000664,29457.0
...,...,...,...,...,...,...,...
37826,191750,241491011,38-1,63,신일해피트리후문,233002733,
37827,191751,241491011,38-1,64,기배동행정복지센터,233002726,
37828,191752,241491011,38-1,65,기안1통입구,233002959,
37829,191753,241491011,38-1,66,배양2리,233002717,55455.0


In [5]:
# routeStationInfo 테이블에 이비노선ID를 머지할게요

routeStationInfo = pd.merge(routeStationInfo, route_station_mapping[['표준노선ID', '이비노선ID']], how = 'left', left_on = 'pr_station_id', right_on='표준노선ID')
routeStationInfo = routeStationInfo.drop('표준노선ID', 1)
#routeStationInfo.drop_duplicates(subset =['pr_station_id', 'bus_line_no_seq'], keep = 'first', inplace = True) 
routeStationInfo

Unnamed: 0,seq,pr_station_id,bus_line_no,bus_line_no_seq,station_nm,station_id,mobile_no,이비노선ID
0,65286,228000018,10-4,1,용인터미널,228001552,47634.0,41006260.0
1,65287,228000018,10-4,2,용인터미널(경유),277102443,,41006260.0
2,65288,228000018,10-4,3,포브스병원,228000443,29439.0,41006260.0
3,65289,228000018,10-4,4,제일교회,228000665,29881.0,41006260.0
4,65290,228000018,10-4,5,라이프아파트,228000664,29457.0,41006260.0
...,...,...,...,...,...,...,...,...
38535,191750,241491011,38-1,63,신일해피트리후문,233002733,,
38536,191751,241491011,38-1,64,기배동행정복지센터,233002726,,
38537,191752,241491011,38-1,65,기안1통입구,233002959,,
38538,191753,241491011,38-1,66,배양2리,233002717,55455.0,


In [6]:
# 총 661개의 노선이 있는데 그중 이비노선ID가 있는 노선은 289개밖에 없어요
# 이비노선 mapping이 안됨으로 인해 최소43%의 노선이용정보가 날라갈거같아요

print(routeStationInfo.pr_station_id.unique().size)
print(routeStationInfo.이비노선ID.unique().size)

661
289


In [7]:
# stations 테이블 시군명 컬럼에 결측치가 너무 많아 수동으로 데이터를 입력할게요
# stations2 = 시군명 결측치를 채운 stations테이블

stations2 = pd.read_csv('./station_null2.csv')
stations2 = stations2.drop(['station_id', 'Unnamed: 0'], 1)
stations2.drop_duplicates(subset ="표준정류장ID", keep = 'first', inplace = True) 
stations2

Unnamed: 0,표준정류장ID,시군명,정류소명,정류소영문명,정류소번호,중앙차로여부,관할관청,위치,WGS84위도,WGS84경도,모바일정류장ID,이비카드정류장ID
0,228003422,용인시,손골마을회관.국제학교,"Songol Community Center,",56443.0,노변정류장,경기도 용인시,,37.342517,127.066817,56443,
1,228003423,용인시,풀잎사랑,Pulipsarang,56444.0,노변정류장,경기도 용인시,,37.341800,127.068983,56444,
2,228003424,용인시,풀잎사랑,Pulipsarang,56445.0,노변정류장,경기도 용인시,,37.341817,127.069083,56445,
3,228003425,용인시,대성공정,Daesung Process,56446.0,노변정류장,경기도 용인시,,37.339350,127.073067,56446,
4,228003426,용인시,대성공정,Daesung Process,56447.0,노변정류장,경기도 용인시,,37.339183,127.073400,56447,
...,...,...,...,...,...,...,...,...,...,...,...,...
39449,228003381,용인시,현대빌리지.광교산자이,Hyundai Village,56402.0,노변정류장,경기도 용인시,,37.326433,127.071317,56402,
39450,228003382,용인시,현대빌리지.광교산자이,Hyundai Village,56403.0,노변정류장,경기도 용인시,,37.326450,127.071100,56403,
39451,236001229,포천시,평강식물원,Pyunggang Botanical Garden,40691.0,노변정류장,경기도 포천시,경기도 포천시 영북면,38.050650,127.306617,40691,
39453,236001230,포천시,양문1리.영중면사무소,"Yangmun 1-ri, Yeongjung-myeon Office",40699.0,노변정류장,경기도 포천시,,38.005517,127.245667,40699,


In [8]:
import numpy as np
stations2[(stations2['시군명'] == '화성시')]

Unnamed: 0,표준정류장ID,시군명,정류소명,정류소영문명,정류소번호,중앙차로여부,관할관청,위치,WGS84위도,WGS84경도,모바일정류장ID,이비카드정류장ID
1311,233001737,화성시,전곡산업단지,,,,,,,,55367,
3426,277100614,화성시,동탄초교(경유),,,,,,,,,
3427,277100615,화성시,동탄초교(경유),,,,,,,,,
3481,277101611,화성시,제암교차로(경유),,,,,,,,,
3488,277101618,화성시,고온리종점(경유),,,,,,37.038400,126.750217,,7101618.0
...,...,...,...,...,...,...,...,...,...,...,...,...
39138,233001260,화성시,전곡2리마을회관.운천동,"Jeongok 2-ri Community Center, Uncheon-dong",37371.0,노변정류장,경기도 화성시,경기도 화성시 서신면,37.191267,126.692133,37371,
39143,233001261,화성시,칠곡리,Chilgok-ri,37364.0,노변정류장,경기도 화성시,경기도 화성시 송산면,37.203567,126.711483,37364,
39148,233001279,화성시,신영통현대타운2단지,,,,,,,,55696,
39149,233001284,화성시,신한미지엔정문,,,,,,,,55697,


In [9]:
import numpy as np
stations2[(stations2['표준정류장ID'].isnull() == False) & (stations2['이비카드정류장ID'].isnull()) & (stations2['시군명'] == '화성시')]

Unnamed: 0,표준정류장ID,시군명,정류소명,정류소영문명,정류소번호,중앙차로여부,관할관청,위치,WGS84위도,WGS84경도,모바일정류장ID,이비카드정류장ID
1311,233001737,화성시,전곡산업단지,,,,,,,,55367,
3426,277100614,화성시,동탄초교(경유),,,,,,,,,
3427,277100615,화성시,동탄초교(경유),,,,,,,,,
3481,277101611,화성시,제암교차로(경유),,,,,,,,,
3489,277101619,화성시,호곡리(경유),,,,,,37.118383,126.773967,,
...,...,...,...,...,...,...,...,...,...,...,...,...
39138,233001260,화성시,전곡2리마을회관.운천동,"Jeongok 2-ri Community Center, Uncheon-dong",37371.0,노변정류장,경기도 화성시,경기도 화성시 서신면,37.191267,126.692133,37371,
39143,233001261,화성시,칠곡리,Chilgok-ri,37364.0,노변정류장,경기도 화성시,경기도 화성시 송산면,37.203567,126.711483,37364,
39148,233001279,화성시,신영통현대타운2단지,,,,,,,,55696,
39149,233001284,화성시,신한미지엔정문,,,,,,,,55697,


In [10]:
stations2[stations2['이비카드정류장ID'].isnull()]

Unnamed: 0,표준정류장ID,시군명,정류소명,정류소영문명,정류소번호,중앙차로여부,관할관청,위치,WGS84위도,WGS84경도,모바일정류장ID,이비카드정류장ID
0,228003422,용인시,손골마을회관.국제학교,"Songol Community Center,",56443.0,노변정류장,경기도 용인시,,37.342517,127.066817,56443,
1,228003423,용인시,풀잎사랑,Pulipsarang,56444.0,노변정류장,경기도 용인시,,37.341800,127.068983,56444,
2,228003424,용인시,풀잎사랑,Pulipsarang,56445.0,노변정류장,경기도 용인시,,37.341817,127.069083,56445,
3,228003425,용인시,대성공정,Daesung Process,56446.0,노변정류장,경기도 용인시,,37.339350,127.073067,56446,
4,228003426,용인시,대성공정,Daesung Process,56447.0,노변정류장,경기도 용인시,,37.339183,127.073400,56447,
...,...,...,...,...,...,...,...,...,...,...,...,...
39449,228003381,용인시,현대빌리지.광교산자이,Hyundai Village,56402.0,노변정류장,경기도 용인시,,37.326433,127.071317,56402,
39450,228003382,용인시,현대빌리지.광교산자이,Hyundai Village,56403.0,노변정류장,경기도 용인시,,37.326450,127.071100,56403,
39451,236001229,포천시,평강식물원,Pyunggang Botanical Garden,40691.0,노변정류장,경기도 포천시,경기도 포천시 영북면,38.050650,127.306617,40691,
39453,236001230,포천시,양문1리.영중면사무소,"Yangmun 1-ri, Yeongjung-myeon Office",40699.0,노변정류장,경기도 포천시,,38.005517,127.245667,40699,


# hwaseong_stations 테이블에 화성시 정류장만 보관할게요

hwaseong_stations = stations2

for index, row in hwaseong_stations.iterrows():
    if(row['시군명'] != '화성시'):
        hwaseong_stations.drop(index, inplace = True)
        
hwaseong_stations.drop_duplicates(subset ="표준정류장ID", keep = 'first', inplace = True) 
hwaseong_stations

hwaseong_stations.isna().sum()

In [11]:
# RouteStationInfo_latlon = route station info + (latitude, longitude, 시군명)

RouteStationInfo_latlon = pd.merge(routeStationInfo, stations2[['WGS84위도', 'WGS84경도', '표준정류장ID', '시군명']], how='left', left_on='station_id', right_on='표준정류장ID')
RouteStationInfo_latlon = RouteStationInfo_latlon.drop('표준정류장ID', 1)
RouteStationInfo_latlon.drop_duplicates(subset =['pr_station_id', 'station_id', 'bus_line_no_seq'], keep = 'first', inplace = True) 
RouteStationInfo_latlon = RouteStationInfo_latlon.dropna(subset=['시군명'])
RouteStationInfo_latlon

Unnamed: 0,seq,pr_station_id,bus_line_no,bus_line_no_seq,station_nm,station_id,mobile_no,이비노선ID,WGS84위도,WGS84경도,시군명
0,65286,228000018,10-4,1,용인터미널,228001552,47634.0,41006260.0,37.232783,127.210100,용인시
1,65287,228000018,10-4,2,용인터미널(경유),277102443,,41006260.0,37.235350,127.209033,용인시
2,65288,228000018,10-4,3,포브스병원,228000443,29439.0,41006260.0,37.235267,127.210617,용인시
3,65289,228000018,10-4,4,제일교회,228000665,29881.0,41006260.0,37.234300,127.213333,용인시
4,65290,228000018,10-4,5,라이프아파트,228000664,29457.0,41006260.0,37.231483,127.213767,용인시
...,...,...,...,...,...,...,...,...,...,...,...
38535,191750,241491011,38-1,63,신일해피트리후문,233002733,,,37.222650,126.978983,화성시
38536,191751,241491011,38-1,64,기배동행정복지센터,233002726,,,37.224000,126.983150,화성시
38537,191752,241491011,38-1,65,기안1통입구,233002959,,,37.225683,126.986250,화성시
38538,191753,241491011,38-1,66,배양2리,233002717,55455.0,,37.226833,126.992067,화성시


In [12]:
RouteStationInfo_latlon[RouteStationInfo_latlon['시군명'] != '화성시']

Unnamed: 0,seq,pr_station_id,bus_line_no,bus_line_no_seq,station_nm,station_id,mobile_no,이비노선ID,WGS84위도,WGS84경도,시군명
0,65286,228000018,10-4,1,용인터미널,228001552,47634.0,41006260.0,37.232783,127.210100,용인시
1,65287,228000018,10-4,2,용인터미널(경유),277102443,,41006260.0,37.235350,127.209033,용인시
2,65288,228000018,10-4,3,포브스병원,228000443,29439.0,41006260.0,37.235267,127.210617,용인시
3,65289,228000018,10-4,4,제일교회,228000665,29881.0,41006260.0,37.234300,127.213333,용인시
4,65290,228000018,10-4,5,라이프아파트,228000664,29457.0,41006260.0,37.231483,127.213767,용인시
...,...,...,...,...,...,...,...,...,...,...,...
38177,191392,241489008,9-1,48,종합운동장,223000028,24149.0,,37.156817,127.074433,오산시
38178,191393,241489008,9-1,49,중앙동주민센터,223000492,,,37.152533,127.073583,오산시
38179,191394,241489008,9-1,50,롯데마트,223000493,,,37.149150,127.073483,오산시
38180,191395,241489008,9-1,51,신양아파트사거리,223000198,24142.0,,37.146450,127.072917,오산시


In [13]:
pd.set_option('display.max_rows', 500)

In [14]:
RouteStationInfo_latlon.shape

(37778, 11)

In [15]:
for index, row in RouteStationInfo_latlon.iterrows():
    if(row['시군명'] != '화성시'):
        RouteStationInfo_latlon.drop(index, inplace = True)

In [16]:
RouteStationInfo_latlon

Unnamed: 0,seq,pr_station_id,bus_line_no,bus_line_no_seq,station_nm,station_id,mobile_no,이비노선ID,WGS84위도,WGS84경도,시군명
927,68834,228000204,24,53,한원IC,233000169,37403.0,41006281.0,37.152733,127.122883,화성시
928,68835,228000204,24,54,장지동,233000168,36808.0,41006281.0,37.155117,127.117900,화성시
929,68836,228000204,24,55,장지동,233000167,36809.0,41006281.0,37.155867,127.115017,화성시
930,68837,228000204,24,56,롯데캐슬.제일풍경채,233003130,55526.0,41006281.0,37.157483,127.109283,화성시
931,68838,228000204,24,57,자이파밀리에,233003131,55527.0,41006281.0,37.159167,127.106050,화성시
...,...,...,...,...,...,...,...,...,...,...,...
38535,191750,241491011,38-1,63,신일해피트리후문,233002733,,,37.222650,126.978983,화성시
38536,191751,241491011,38-1,64,기배동행정복지센터,233002726,,,37.224000,126.983150,화성시
38537,191752,241491011,38-1,65,기안1통입구,233002959,,,37.225683,126.986250,화성시
38538,191753,241491011,38-1,66,배양2리,233002717,55455.0,,37.226833,126.992067,화성시


In [17]:
RouteStationInfo_latlon.drop_duplicates(subset =['pr_station_id', 'station_id', 'bus_line_no_seq'], keep = 'first', inplace = True) 

In [18]:
RouteStationInfo_latlon.shape

(10998, 11)

In [19]:
RouteStationInfo_latlon[RouteStationInfo_latlon['이비노선ID'].isna()]
print(RouteStationInfo_latlon.pr_station_id.unique().size)

236


In [20]:
# 이비노선ID가 없는 노선들은 지워버릴게요 (어짜피 tripChain에서 매칭이 안되요)

RouteStationInfo_latlon = RouteStationInfo_latlon.dropna(subset=['이비노선ID'])

In [21]:
RouteStationInfo_latlon[RouteStationInfo_latlon['이비노선ID'].isna()]
print(RouteStationInfo_latlon.pr_station_id.unique().size)

77


In [22]:
RouteStationInfo_latlon[RouteStationInfo_latlon['bus_line_no'] == '27']

Unnamed: 0,seq,pr_station_id,bus_line_no,bus_line_no_seq,station_nm,station_id,mobile_no,이비노선ID,WGS84위도,WGS84경도,시군명
9192,91274,233000096,27,1,조암터미널,233000929,36011.0,41008157.0,37.08215,126.815583,화성시
9193,91275,233000096,27,2,조암삼거리(경유),277102392,,41008157.0,37.080933,126.81455,화성시
9194,91276,233000096,27,3,장안여자중학교,233000992,36797.0,41008157.0,37.078567,126.815217,화성시
9195,91277,233000096,27,4,사곡6리,233000496,36910.0,41008157.0,37.074817,126.81515,화성시
9196,91278,233000096,27,5,장안우체국,233002511,55388.0,41008157.0,37.072167,126.81385,화성시
9197,91279,233000096,27,6,사곡1리,233000993,37478.0,41008157.0,37.069083,126.809217,화성시
9198,91280,233000096,27,7,장자터,233001776,37834.0,41008157.0,37.066483,126.802167,화성시
9199,91281,233000096,27,8,기아생활관,233000485,36918.0,41008157.0,37.064383,126.795617,화성시
9200,91282,233000096,27,9,화산삼거리,233001554,36701.0,41008157.0,37.062667,126.792467,화성시
9201,91283,233000096,27,10,기아2차아파트앞,233002268,55143.0,41008157.0,37.060467,126.7929,화성시


In [23]:
RouteStationInfo_latlon[RouteStationInfo_latlon['WGS84위도'].isna()].shape

(99, 11)

In [24]:
# RouteStationId_latlon테이블의 위도경도 결측치 채우기

hwaseong_bus1 = pd.read_csv('./hwaseong_bus1.csv', encoding = 'euc-kr')
hwaseong_bus2 = pd.read_csv('./hwaseong_bus2.csv', encoding = 'euc-kr')
hwaseong_bus = pd.concat([hwaseong_bus1, hwaseong_bus2])
hwaseong_bus

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """


Unnamed: 0,TM_X,TM_Y,경도,구간거리,노선ID,노선번호,사업자ID,순번,운수사,위도,정류소ID,정류소명,정류소번호,정류장ID,정류장번호
0,183604.961400,398149.344700,126.815612,0,233000048,1,4100800,1,경진여객,37.082182,,조암터미널,,233000929.0,36011.0
1,183723.797100,398120.764200,126.816949,148,233000048,1,4100800,2,경진여객,37.081927,,조암농협,,233002625.0,55396.0
2,184047.440000,398289.637900,126.820585,360,233000048,1,4100800,3,경진여객,37.083454,,조암GS주유소,,233000930.0,37481.0
3,184566.921800,398565.340400,126.826422,587,233000048,1,4100800,4,경진여객,37.085947,,어은삼거리,,233000931.0,37122.0
4,184734.587200,398061.397500,126.828318,543,233000048,1,4100800,5,경진여객,37.081409,,어은4리,,233000515.0,37124.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6606,179120.574585,413094.042324,126.765517,769,41319044,20-4B,4131900,52,(화성)화성순환여객,37.219483,4170968.0,삼존3리,,,
6607,178219.397707,412231.606897,126.755383,1639,41319044,20-4B,4131900,53,(화성)화성순환여객,37.211683,4102873.0,돼지고개.삼존리물미,37097.0,,
6608,177326.329826,412300.496653,126.745317,928,41319044,20-4B,4131900,54,(화성)화성순환여객,37.212283,4107792.0,봉가1리,37095.0,,
6609,176849.072091,412440.386291,126.739950,494,41319044,20-4B,4131900,55,(화성)화성순환여객,37.213533,4119923.0,사강정형외과,,,


In [25]:
RouteStationInfo_latlon = pd.merge(RouteStationInfo_latlon, hwaseong_bus[['위도', '경도', '정류소명']], how='left', left_on='station_nm', right_on='정류소명')
RouteStationInfo_latlon = RouteStationInfo_latlon.drop(['정류소명'], axis =1)
RouteStationInfo_latlon

Unnamed: 0,seq,pr_station_id,bus_line_no,bus_line_no_seq,station_nm,station_id,mobile_no,이비노선ID,WGS84위도,WGS84경도,시군명,위도,경도
0,68834,228000204,24,53,한원IC,233000169,37403.0,41006281.0,37.152733,127.122883,화성시,,
1,68835,228000204,24,54,장지동,233000168,36808.0,41006281.0,37.155117,127.117900,화성시,37.155117,127.117900
2,68835,228000204,24,54,장지동,233000168,36808.0,41006281.0,37.155117,127.117900,화성시,37.155867,127.115017
3,68835,228000204,24,54,장지동,233000168,36808.0,41006281.0,37.155117,127.117900,화성시,37.155717,127.114950
4,68835,228000204,24,54,장지동,233000168,36808.0,41006281.0,37.155117,127.117900,화성시,37.155117,127.117900
...,...,...,...,...,...,...,...,...,...,...,...,...,...
62760,121497,234001570,116-2,89,농서리,233001300,36406.0,41086011.0,37.230633,127.068100,화성시,37.230633,127.068100
62761,121498,234001570,116-2,90,반달마을,233002112,55132.0,41086011.0,37.234400,127.067033,화성시,37.234561,127.067015
62762,121498,234001570,116-2,90,반달마을,233002112,55132.0,41086011.0,37.234400,127.067033,화성시,37.234407,127.067052
62763,121498,234001570,116-2,90,반달마을,233002112,55132.0,41086011.0,37.234400,127.067033,화성시,37.234400,127.067033


In [26]:
RouteStationInfo_latlon.drop_duplicates(subset =['pr_station_id', 'bus_line_no_seq', 'station_id'], keep = 'first', inplace = True) 
RouteStationInfo_latlon

Unnamed: 0,seq,pr_station_id,bus_line_no,bus_line_no_seq,station_nm,station_id,mobile_no,이비노선ID,WGS84위도,WGS84경도,시군명,위도,경도
0,68834,228000204,24,53,한원IC,233000169,37403.0,41006281.0,37.152733,127.122883,화성시,,
1,68835,228000204,24,54,장지동,233000168,36808.0,41006281.0,37.155117,127.117900,화성시,37.155117,127.117900
7,68836,228000204,24,55,장지동,233000167,36809.0,41006281.0,37.155867,127.115017,화성시,37.155117,127.117900
13,68837,228000204,24,56,롯데캐슬.제일풍경채,233003130,55526.0,41006281.0,37.157483,127.109283,화성시,37.157435,127.109124
17,68838,228000204,24,57,자이파밀리에,233003131,55527.0,41006281.0,37.159167,127.106050,화성시,37.159101,127.105963
...,...,...,...,...,...,...,...,...,...,...,...,...,...
62701,121494,234001570,116-2,86,한림대병원(중),233001782,37864.0,41086011.0,37.216633,127.078117,화성시,37.217343,127.077776
62719,121495,234001570,116-2,87,IT단지(중),233001978,37954.0,41086011.0,37.224717,127.074183,화성시,37.225117,127.073694
62737,121496,234001570,116-2,88,삼성반도체후문,233002353,55253.0,41086011.0,37.228417,127.071283,화성시,37.227497,127.070905
62754,121497,234001570,116-2,89,농서리,233001300,36406.0,41086011.0,37.230633,127.068100,화성시,37.230654,127.068138


In [27]:
RouteStationInfo_latlon['WGS84위도'] = RouteStationInfo_latlon['WGS84위도'].combine_first(RouteStationInfo_latlon['위도'])
RouteStationInfo_latlon['WGS84경도'] = RouteStationInfo_latlon['WGS84경도'].combine_first(RouteStationInfo_latlon['경도'])
RouteStationInfo_latlon = RouteStationInfo_latlon.drop(['위도', '경도'], axis=1)

In [28]:
RouteStationInfo_latlon[RouteStationInfo_latlon['WGS84위도'].isna()]

Unnamed: 0,seq,pr_station_id,bus_line_no,bus_line_no_seq,station_nm,station_id,mobile_no,이비노선ID,WGS84위도,WGS84경도,시군명


In [29]:
RouteStationInfo_latlon

Unnamed: 0,seq,pr_station_id,bus_line_no,bus_line_no_seq,station_nm,station_id,mobile_no,이비노선ID,WGS84위도,WGS84경도,시군명
0,68834,228000204,24,53,한원IC,233000169,37403.0,41006281.0,37.152733,127.122883,화성시
1,68835,228000204,24,54,장지동,233000168,36808.0,41006281.0,37.155117,127.117900,화성시
7,68836,228000204,24,55,장지동,233000167,36809.0,41006281.0,37.155867,127.115017,화성시
13,68837,228000204,24,56,롯데캐슬.제일풍경채,233003130,55526.0,41006281.0,37.157483,127.109283,화성시
17,68838,228000204,24,57,자이파밀리에,233003131,55527.0,41006281.0,37.159167,127.106050,화성시
...,...,...,...,...,...,...,...,...,...,...,...
62701,121494,234001570,116-2,86,한림대병원(중),233001782,37864.0,41086011.0,37.216633,127.078117,화성시
62719,121495,234001570,116-2,87,IT단지(중),233001978,37954.0,41086011.0,37.224717,127.074183,화성시
62737,121496,234001570,116-2,88,삼성반도체후문,233002353,55253.0,41086011.0,37.228417,127.071283,화성시
62754,121497,234001570,116-2,89,농서리,233001300,36406.0,41086011.0,37.230633,127.068100,화성시


In [30]:
RouteStationInfo_latlon[RouteStationInfo_latlon['bus_line_no'] == '27']

Unnamed: 0,seq,pr_station_id,bus_line_no,bus_line_no_seq,station_nm,station_id,mobile_no,이비노선ID,WGS84위도,WGS84경도,시군명
38233,91274,233000096,27,1,조암터미널,233000929,36011.0,41008157.0,37.08215,126.815583,화성시
38315,91275,233000096,27,2,조암삼거리(경유),277102392,,41008157.0,37.080933,126.81455,화성시
38337,91276,233000096,27,3,장안여자중학교,233000992,36797.0,41008157.0,37.078567,126.815217,화성시
38374,91277,233000096,27,4,사곡6리,233000496,36910.0,41008157.0,37.074817,126.81515,화성시
38375,91278,233000096,27,5,장안우체국,233002511,55388.0,41008157.0,37.072167,126.81385,화성시
38396,91279,233000096,27,6,사곡1리,233000993,37478.0,41008157.0,37.069083,126.809217,화성시
38417,91280,233000096,27,7,장자터,233001776,37834.0,41008157.0,37.066483,126.802167,화성시
38438,91281,233000096,27,8,기아생활관,233000485,36918.0,41008157.0,37.064383,126.795617,화성시
38458,91282,233000096,27,9,화산삼거리,233001554,36701.0,41008157.0,37.062667,126.792467,화성시
38478,91283,233000096,27,10,기아2차아파트앞,233002268,55143.0,41008157.0,37.060467,126.7929,화성시


In [31]:
RouteStationInfo_latlon.to_csv('RouteStationInfo_latlon.csv', sep=',')

# 버스노선별로 tripchain테이블에서의 이용횟수를 구할게요

tripChain2 = tripChain

tripChain2['버스노선ID1'] = tripChain2['버스노선ID1'].fillna(0)
tripChain2['버스노선ID2'] = tripChain2['버스노선ID2'].fillna(0)
tripChain2['버스노선ID3'] = tripChain2['버스노선ID3'].fillna(0)
tripChain2['버스노선ID4'] = tripChain2['버스노선ID4'].fillna(0)
tripChain2['버스노선ID5'] = tripChain2['버스노선ID5'].fillna(0)


bus_user_num = {}

for index, row in tripChain2.iterrows():
    if row['버스노선ID1'] in bus_user_num:
        bus_user_num[row['버스노선ID1']] = bus_user_num[row['버스노선ID1']] + 1
    else:
        bus_user_num[row['버스노선ID1']] = 1
    
    if row['버스노선ID2'] in bus_user_num:
        bus_user_num[row['버스노선ID2']] = bus_user_num[row['버스노선ID2']] + 1
    else:
        bus_user_num[row['버스노선ID2']] = 1
    
    if row['버스노선ID3'] in bus_user_num:
        bus_user_num[row['버스노선ID3']] = bus_user_num[row['버스노선ID3']] + 1
    else:
        bus_user_num[row['버스노선ID3']] = 1
    
    if row['버스노선ID4'] in bus_user_num:
        bus_user_num[row['버스노선ID4']] = bus_user_num[row['버스노선ID4']] + 1
    else:
        bus_user_num[row['버스노선ID4']] = 1
    
    if row['버스노선ID5'] in bus_user_num:
        bus_user_num[row['버스노선ID5']] = bus_user_num[row['버스노선ID5']] + 1
    else:
        bus_user_num[row['버스노선ID5']] = 1
        
most_bus_route = pd.DataFrame.from_dict(bus_user_num, orient='index')
most_bus_route.columns = ['이용횟수']
most_bus_route = most_bus_route.drop(0.0)
most_bus_route

most_bus_route.to_csv('bus_route_usage_count.csv', sep=',')

In [32]:
bus_usage_ranking = pd.read_csv('./bus_route_usage_count.csv')
bus_usage_ranking.rename(columns={'Unnamed: 0' : '이비노선ID'}, inplace = True)
bus_usage_ranking = bus_usage_ranking.sort_values(by='이용횟수', ascending = False)
bus_usage_ranking

Unnamed: 0,이비노선ID,이용횟수
23,41002046.0,78759
2,41031040.0,61357
0,41002045.0,53846
17,41027009.0,49515
14,41031020.0,46958
...,...,...
1941,41038165.0,1
1939,11110366.0,1
1938,41023103.0,1
1216,41009157.0,1


In [33]:
bus_usage_ranking = pd.merge(bus_usage_ranking, route_station_mapping[['이비노선ID', '표준노선ID', '노선명']],how='left', left_on='이비노선ID', right_on = '이비노선ID')
bus_usage_ranking.drop_duplicates(subset =['이비노선ID', '표준노선ID', '노선명'], keep = 'first', inplace = True) 
bus_usage_ranking

Unnamed: 0,이비노선ID,이용횟수,표준노선ID,노선명
0,41002046.0,78759,234000026.0,720-2
1,41031040.0,61357,200000078.0,62-1
2,41002045.0,53846,234000024.0,720-1
3,41027009.0,49515,200000028.0,301
4,41031020.0,46958,200000029.0,92-1
...,...,...,...,...
2257,41038165.0,1,239000062.0,33-16
2258,11110366.0,1,,
2259,41023103.0,1,236000070.0,66
2260,41009157.0,1,240000081.0,8-2


In [34]:
bus_usage_ranking_eb_id = RouteStationInfo_latlon.이비노선ID.unique()
bus_usage_ranking_eb_id

array([41006281., 41037102., 41037001., 41008001., 41008101., 41008103.,
       41008110., 41008112., 41008115., 41008114., 41008105., 41008117.,
       41008108., 41008102., 41008107., 41008106., 41008119., 41008118.,
       41008120., 41008122., 41008124., 41008125., 41008126., 41008127.,
       41008128., 41008129., 41008130., 41008131., 41008132., 41008133.,
       41008134., 41008138., 41008139., 41008140., 41008141., 41008142.,
       41008144., 41008137., 41037109., 41037106., 41037113., 41008145.,
       41008146., 41008147., 41037114., 41008150., 41008151., 41008155.,
       41008156., 41008157., 41061018., 41061020., 41008161., 41075031.,
       41075017., 41086001., 41086002., 41061022., 41086005., 41086004.,
       41008167., 41061024., 41086009., 41086010., 41086012., 41075030.,
       41061027., 41086013., 41008171., 41075034., 41075035., 41086014.,
       41086015., 41002090., 41003015., 41086008., 41086011.])

In [55]:
stations2

Unnamed: 0,표준정류장ID,시군명,정류소명,정류소영문명,정류소번호,중앙차로여부,관할관청,위치,WGS84위도,WGS84경도,모바일정류장ID,이비카드정류장ID
0,228003422,용인시,손골마을회관.국제학교,"Songol Community Center,",56443.0,노변정류장,경기도 용인시,,37.342517,127.066817,56443,
1,228003423,용인시,풀잎사랑,Pulipsarang,56444.0,노변정류장,경기도 용인시,,37.341800,127.068983,56444,
2,228003424,용인시,풀잎사랑,Pulipsarang,56445.0,노변정류장,경기도 용인시,,37.341817,127.069083,56445,
3,228003425,용인시,대성공정,Daesung Process,56446.0,노변정류장,경기도 용인시,,37.339350,127.073067,56446,
4,228003426,용인시,대성공정,Daesung Process,56447.0,노변정류장,경기도 용인시,,37.339183,127.073400,56447,
...,...,...,...,...,...,...,...,...,...,...,...,...
39449,228003381,용인시,현대빌리지.광교산자이,Hyundai Village,56402.0,노변정류장,경기도 용인시,,37.326433,127.071317,56402,
39450,228003382,용인시,현대빌리지.광교산자이,Hyundai Village,56403.0,노변정류장,경기도 용인시,,37.326450,127.071100,56403,
39451,236001229,포천시,평강식물원,Pyunggang Botanical Garden,40691.0,노변정류장,경기도 포천시,경기도 포천시 영북면,38.050650,127.306617,40691,
39453,236001230,포천시,양문1리.영중면사무소,"Yangmun 1-ri, Yeongjung-myeon Office",40699.0,노변정류장,경기도 포천시,,38.005517,127.245667,40699,


In [52]:
# 랭킹테이블의 버스중 RouteStatioInfo_latlon에 정보가 있는 노선들의 랭킹만 뽑을게요
for index, row in bus_usage_ranking.iterrows():
    if(row['이비노선ID'] not in bus_usage_ranking_eb_id):
        bus_usage_ranking.drop(index, inplace = True)
        
bus_usage_ranking

0


Unnamed: 0,이비노선ID,이용횟수,표준노선ID,노선명
31,41002090.0,18361,234000068.0,720-3
44,41086002.0,10967,233000133.0,116-3
54,41086011.0,6595,234001570.0,116-2
58,41086008.0,5504,234001568.0,721
71,41075031.0,3652,233000128.0,712
76,41086005.0,3452,233000135.0,8501
78,41008151.0,3362,233000086.0,38
79,41075017.0,3310,233000130.0,73-1
96,41061027.0,2451,233000253.0,150
114,41086013.0,1824,233000254.0,203


In [36]:
top20_bus = bus_usage_ranking.head(20)
top20_bus

Unnamed: 0,이비노선ID,이용횟수,표준노선ID,노선명
31,41002090.0,18361,234000068.0,720-3
44,41086002.0,10967,233000133.0,116-3
54,41086011.0,6595,234001570.0,116-2
58,41086008.0,5504,234001568.0,721
71,41075031.0,3652,233000128.0,712
76,41086005.0,3452,233000135.0,8501
78,41008151.0,3362,233000086.0,38
79,41075017.0,3310,233000130.0,73-1
96,41061027.0,2451,233000253.0,150
114,41086013.0,1824,233000254.0,203


In [37]:
top20_bus_id = top20_bus.이비노선ID.tolist()
top20_bus_id

[41002090.0,
 41086002.0,
 41086011.0,
 41086008.0,
 41075031.0,
 41086005.0,
 41008151.0,
 41075017.0,
 41061027.0,
 41086013.0,
 41086012.0,
 41008167.0,
 41061022.0,
 41086009.0,
 41075030.0,
 41037102.0,
 41086015.0,
 41037106.0,
 41086004.0,
 41037113.0]

In [38]:
All_busline = []
bus_num_ = 0
for j in range(len(top20_bus_id)):
    tmp_busline = RouteStationInfo_latlon[RouteStationInfo_latlon['이비노선ID'] == top20_bus_id[j]].sort_values(by='bus_line_no_seq')
    tmp_buslineArr = []
    print(len(tmp_busline))
    for i in range(len(tmp_busline)):
        tmp = tmp_busline.iloc[i]
        tmp_buslineArr.append((tmp['WGS84위도'], tmp['WGS84경도']))
    All_busline.append(tmp_buslineArr)
    bus_num_ += 1

35
42
57
60
47
35
69
81
48
60
32
45
162
51
67
114
36
83
40
214


In [39]:
import folium

Busline = folium.Map(location = [37.194263,126.8727078], zoom_start=11)

top_bus_route = folium.FeatureGroup(name='top_bus_route', show=True).add_to(Busline)
top_bus_stations = folium.FeatureGroup(name='top_bus_stations', show=False).add_to(Busline)
all_bus_route = folium.FeatureGroup(name='all_bus_route', show=False).add_to(Busline)
top_bus_stations_by_date = folium.FeatureGroup(name='top_bus_stations_by_date', show=False).add_to(Busline)

folium.LayerControl(collapsed=False).add_to(Busline)


folium.PolyLine(All_busline[0], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[0][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[0][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="red", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[1], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[1][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[1][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="blue", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[2], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[2][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[2][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="green", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[3], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[3][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[3][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="purple", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[4], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[4][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[4][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="orange", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[5], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[5][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[5][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="darkblue", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[6], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[6][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[6][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="darkred", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[7], 
               popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[7][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[7][1][1])]['bus_line_no']).iloc[0].iloc[0], 
               color="pink", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[8], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[8][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[8][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="gray", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[9], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[9][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[9][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="black", weight=2.5, opacity=1).add_to(top_bus_route)


folium.PolyLine(All_busline[10], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[10][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[10][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="red", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[11], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[11][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[11][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="blue", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[12], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[12][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[12][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="green", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[13], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[13][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[13][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="purple", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[14], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[14][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[14][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="orange", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[15], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[15][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[15][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="darkblue", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[16], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[16][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[16][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="darkred", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[17], 
               popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[17][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[17][1][1])]['bus_line_no']).iloc[0].iloc[0], 
               color="pink", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[18], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[18][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[18][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="gray", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[19], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[19][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[19][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="black", weight=2.5, opacity=1).add_to(top_bus_route)

Busline



## 정류장별 이용량 구하기

tripChain3 = tripChain

tripChain3['승차역ID1'] = tripChain3['승차역ID1'].fillna(0)
tripChain3['승차역ID2'] = tripChain3['승차역ID2'].fillna(0)
tripChain3['승차역ID3'] = tripChain3['승차역ID3'].fillna(0)
tripChain3['승차역ID4'] = tripChain3['승차역ID4'].fillna(0)
tripChain3['하차역ID1'] = tripChain3['하차역ID1'].fillna(0)
tripChain3['하차역ID2'] = tripChain3['하차역ID2'].fillna(0)
tripChain3['하차역ID3'] = tripChain3['하차역ID3'].fillna(0)
tripChain3['하차역ID4'] = tripChain3['하차역ID4'].fillna(0)

bus_station_user_num = {}

for index, row in tripChain3.iterrows():
    if row['승차역ID1'] in bus_station_user_num:
        bus_station_user_num[row['승차역ID1']] = bus_station_user_num[row['승차역ID1']] + 1
    else:
        bus_station_user_num[row['승차역ID1']] = 1
        
    if row['하차역ID1'] == row['승차역ID2']:
        if row['하차역ID1'] in bus_station_user_num:
            bus_station_user_num[row['하차역ID1']] = bus_station_user_num[row['하차역ID1']] + 1
        else:
            bus_station_user_num[row['하차역ID1']] = 1
    else:
        if row['하차역ID1'] in bus_station_user_num:
            bus_station_user_num[row['하차역ID1']] = bus_station_user_num[row['하차역ID1']] + 1
        else:
            bus_station_user_num[row['하차역ID1']] = 1
        if row['승차역ID2'] in bus_station_user_num:
            bus_station_user_num[row['승차역ID2']] = bus_station_user_num[row['승차역ID2']] + 1
        else:
            bus_station_user_num[row['승차역ID2']] = 1
            
            
    if row['하차역ID2'] == row['승차역ID3']:
        if row['하차역ID2'] in bus_station_user_num:
            bus_station_user_num[row['하차역ID2']] = bus_station_user_num[row['하차역ID2']] + 1
        else:
            bus_station_user_num[row['하차역ID2']] = 1
    else:
        if row['하차역ID2'] in bus_station_user_num:
            bus_station_user_num[row['하차역ID2']] = bus_station_user_num[row['하차역ID2']] + 1
        else:
            bus_station_user_num[row['하차역ID2']] = 1
        if row['승차역ID3'] in bus_station_user_num:
            bus_station_user_num[row['승차역ID3']] = bus_station_user_num[row['승차역ID3']] + 1
        else:
            bus_station_user_num[row['승차역ID3']] = 1
            
    if row['하차역ID3'] == row['승차역ID4']:
        if row['하차역ID3'] in bus_station_user_num:
            bus_station_user_num[row['하차역ID3']] = bus_station_user_num[row['하차역ID3']] + 1
        else:
            bus_station_user_num[row['하차역ID3']] = 1
    else:
        if row['하차역ID3'] in bus_station_user_num:
            bus_station_user_num[row['하차역ID3']] = bus_station_user_num[row['하차역ID3']] + 1
        else:
            bus_station_user_num[row['하차역ID3']] = 1
        if row['승차역ID4'] in bus_station_user_num:
            bus_station_user_num[row['승차역ID4']] = bus_station_user_num[row['승차역ID4']] + 1
        else:
            bus_station_user_num[row['승차역ID4']] = 1
            
    if row['하차역ID4'] in bus_station_user_num:
        bus_station_user_num[row['하차역ID4']] = bus_station_user_num[row['하차역ID4']] + 1
    else:
        bus_station_user_num[row['하차역ID4']] = 1
    
most_station = pd.DataFrame.from_dict(bus_station_user_num, orient='index')
most_station.columns = ['이용횟수']
most_station = most_station.drop(0.0)
most_station

most_station.to_csv('station_usage_count.csv', sep=',')

In [40]:
most_stations = pd.read_csv('./station_usage_count.csv')
most_stations = most_stations.rename(columns={'Unnamed: 0' : 'station_id' })
most_stations = most_stations.sort_values(by = '이용횟수', ascending=False)
most_stations

Unnamed: 0,station_id,이용횟수
64,1713.0,25266
27,4122214.0,22315
130,4170093.0,16599
59,4122215.0,16371
22,4108044.0,15188
...,...,...
11036,9010425.0,1
11033,4117872.0,1
11028,4120812.0,1
11023,16444.0,1


In [41]:
stations2

Unnamed: 0,표준정류장ID,시군명,정류소명,정류소영문명,정류소번호,중앙차로여부,관할관청,위치,WGS84위도,WGS84경도,모바일정류장ID,이비카드정류장ID
0,228003422,용인시,손골마을회관.국제학교,"Songol Community Center,",56443.0,노변정류장,경기도 용인시,,37.342517,127.066817,56443,
1,228003423,용인시,풀잎사랑,Pulipsarang,56444.0,노변정류장,경기도 용인시,,37.341800,127.068983,56444,
2,228003424,용인시,풀잎사랑,Pulipsarang,56445.0,노변정류장,경기도 용인시,,37.341817,127.069083,56445,
3,228003425,용인시,대성공정,Daesung Process,56446.0,노변정류장,경기도 용인시,,37.339350,127.073067,56446,
4,228003426,용인시,대성공정,Daesung Process,56447.0,노변정류장,경기도 용인시,,37.339183,127.073400,56447,
...,...,...,...,...,...,...,...,...,...,...,...,...
39449,228003381,용인시,현대빌리지.광교산자이,Hyundai Village,56402.0,노변정류장,경기도 용인시,,37.326433,127.071317,56402,
39450,228003382,용인시,현대빌리지.광교산자이,Hyundai Village,56403.0,노변정류장,경기도 용인시,,37.326450,127.071100,56403,
39451,236001229,포천시,평강식물원,Pyunggang Botanical Garden,40691.0,노변정류장,경기도 포천시,경기도 포천시 영북면,38.050650,127.306617,40691,
39453,236001230,포천시,양문1리.영중면사무소,"Yangmun 1-ri, Yeongjung-myeon Office",40699.0,노변정류장,경기도 포천시,,38.005517,127.245667,40699,


In [42]:
most_stations

Unnamed: 0,station_id,이용횟수
64,1713.0,25266
27,4122214.0,22315
130,4170093.0,16599
59,4122215.0,16371
22,4108044.0,15188
...,...,...
11036,9010425.0,1
11033,4117872.0,1
11028,4120812.0,1
11023,16444.0,1


In [43]:
stations2

Unnamed: 0,표준정류장ID,시군명,정류소명,정류소영문명,정류소번호,중앙차로여부,관할관청,위치,WGS84위도,WGS84경도,모바일정류장ID,이비카드정류장ID
0,228003422,용인시,손골마을회관.국제학교,"Songol Community Center,",56443.0,노변정류장,경기도 용인시,,37.342517,127.066817,56443,
1,228003423,용인시,풀잎사랑,Pulipsarang,56444.0,노변정류장,경기도 용인시,,37.341800,127.068983,56444,
2,228003424,용인시,풀잎사랑,Pulipsarang,56445.0,노변정류장,경기도 용인시,,37.341817,127.069083,56445,
3,228003425,용인시,대성공정,Daesung Process,56446.0,노변정류장,경기도 용인시,,37.339350,127.073067,56446,
4,228003426,용인시,대성공정,Daesung Process,56447.0,노변정류장,경기도 용인시,,37.339183,127.073400,56447,
...,...,...,...,...,...,...,...,...,...,...,...,...
39449,228003381,용인시,현대빌리지.광교산자이,Hyundai Village,56402.0,노변정류장,경기도 용인시,,37.326433,127.071317,56402,
39450,228003382,용인시,현대빌리지.광교산자이,Hyundai Village,56403.0,노변정류장,경기도 용인시,,37.326450,127.071100,56403,
39451,236001229,포천시,평강식물원,Pyunggang Botanical Garden,40691.0,노변정류장,경기도 포천시,경기도 포천시 영북면,38.050650,127.306617,40691,
39453,236001230,포천시,양문1리.영중면사무소,"Yangmun 1-ri, Yeongjung-myeon Office",40699.0,노변정류장,경기도 포천시,,38.005517,127.245667,40699,


In [44]:
most_stations = pd.merge(most_stations, stations2[['시군명', '정류소명', 'WGS84위도', 'WGS84경도', '표준정류장ID', '이비카드정류장ID']], how='left', left_on='station_id', right_on = '이비카드정류장ID')
most_stations.drop_duplicates(subset ="station_id", keep = 'first', inplace = True) 
most_stations

Unnamed: 0,station_id,이용횟수,시군명,정류소명,WGS84위도,WGS84경도,표준정류장ID,이비카드정류장ID
0,1713.0,25266,,,,,,
1,4122214.0,22315,수원시,수원역.AK플라자,37.264917,127.001450,202000105.0,4122214.0
2,4170093.0,16599,,,,,,
3,4122215.0,16371,수원시,수원역.AK플라자,37.267367,127.000883,202000106.0,4122215.0
4,4108044.0,15188,수원시,그대가프리미어아파트.망포역3번출구,37.244783,127.056167,203000150.0,4108044.0
...,...,...,...,...,...,...,...,...
15613,9010425.0,1,,,,,,
15614,4117872.0,1,,,,,,
15615,4120812.0,1,구리시,LG원앙.한성아파트,37.588767,127.138033,221000102.0,4120812.0
15616,16444.0,1,,,,,,


In [51]:
 most_stations[(most_stations['시군명'] == '화성시')].isna().sum()

station_id    0
이용횟수          0
시군명           0
정류소명          0
WGS84위도       0
WGS84경도       0
표준정류장ID       0
이비카드정류장ID     0
dtype: int64

In [48]:
most_stations.표준정류장ID.unique().size

3828

In [49]:
most_stations.이비카드정류장ID.unique().size

3828

In [45]:
top_hwaseong_stations = most_stations[most_stations['시군명'] == '화성시'].sort_values(by='이용횟수', ascending=False).head(60)
top_hwaseong_stations

Unnamed: 0,station_id,이용횟수,시군명,정류소명,WGS84위도,WGS84경도,표준정류장ID,이비카드정류장ID
8,4100048.0,8599,화성시,신창미션힐.송화초교,37.2033,127.03855,233000101.0,4100048.0
10,4151651.0,7944,화성시,병점역사거리,37.206917,127.035633,233000702.0,4151651.0
18,4100049.0,6269,화성시,홈플러스.벌말초교,37.2033,127.038133,233000100.0,4100049.0
19,4100051.0,6051,화성시,동부출장소.병점초등학교,37.2088,127.034433,233000708.0,4100051.0
33,4170271.0,4459,화성시,병점사거리,37.208383,127.034433,233000703.0,4170271.0
44,4108036.0,3976,화성시,신영통현대타운.두산위브,37.235467,127.062467,233000118.0,4108036.0
47,4116671.0,3807,화성시,수원대학교,37.214167,126.97915,233000576.0,4116671.0
51,4197467.0,3651,화성시,한빛마을(중),37.207567,127.0692,233001562.0,4197467.0
53,4170973.0,3598,화성시,메타폴리스(중),37.2037,127.0675,233001219.0,4170973.0
58,4130131.0,3354,화성시,병점역후문,37.206867,127.031833,233000701.0,4130131.0


In [None]:
top_hwaseong_stations_id = top_hwaseong_stations.station_id.unique()
top_hwaseong_stations_id

In [None]:
top_hwaseong_stations_id = top_hwaseong_stations.이비카드정류장ID.unique()
top_hwaseong_stations_id

In [None]:
all_buses = RouteStationInfo_latlon.이비노선ID.unique()
all_buses

In [None]:
for index, row in top_hwaseong_stations.iterrows():
    folium.Marker([row['WGS84위도'], row['WGS84경도']], popup=row['정류소명'], icon = folium.Icon(icon='cloud')).add_to(top_bus_stations)

# map_osm2.save('top_bus_stations_without_route.html')
Busline

In [None]:
import random
color = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', 'lightred', 'darkblue', 'darkgreen', 'cadetblue', 'darkpurple', 'gray', 'black']
len(color)

In [None]:
All_busline = []
bus_num_ = 0
for j in range(len(all_buses)):
    tmp_busline = RouteStationInfo_latlon[RouteStationInfo_latlon['이비노선ID'] == all_buses[j]].sort_values(by='bus_line_no_seq')
    tmp_buslineArr = []
    for i in range(len(tmp_busline)):
        tmp = tmp_busline.iloc[i]
        tmp_buslineArr.append((tmp['WGS84위도'], tmp['WGS84경도']))
    All_busline.append(tmp_buslineArr)
    bus_num_ += 1

for i in range(len(All_busline)):
    if(len(All_busline[i]) == 0):
        continue
    folium.PolyLine(All_busline[i],
                    popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[i][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[i][1][1])]['bus_line_no']).iloc[0].iloc[0],
                    color=color[random.randrange(0,13)], weight=2.5, opacity=1).add_to(all_bus_route)


Busline

In [None]:
## 이용날짜별 정류장 이용수
tripChain3 = tripChain

tripChain3['이용날짜'] = tripChain3.최초승차일시.astype(str)
tripChain3['이용날짜'] = tripChain3.이용날짜.str.slice(0, 8)
tripChain3['이용날짜'].unique()

In [None]:
def usage_calculate(table, usage_table):
    for index, row in table.iterrows():
        if row['승차역ID1'] in usage_table:
            usage_table[row['승차역ID1']] = usage_table[row['승차역ID1']] + 1
        else:
            usage_table[row['승차역ID1']] = 1

        if row['하차역ID1'] == row['승차역ID2']:
            if row['하차역ID1'] in usage_table:
                usage_table[row['하차역ID1']] = usage_table[row['하차역ID1']] + 1
            else:
                usage_table[row['하차역ID1']] = 1
        else:
            if row['하차역ID1'] in usage_table:
                usage_table[row['하차역ID1']] = usage_table[row['하차역ID1']] + 1
            else:
                usage_table[row['하차역ID1']] = 1
            if row['승차역ID2'] in usage_table:
                usage_table[row['승차역ID2']] = usage_table[row['승차역ID2']] + 1
            else:
                usage_table[row['승차역ID2']] = 1


        if row['하차역ID2'] == row['승차역ID3']:
            if row['하차역ID2'] in usage_table:
                usage_table[row['하차역ID2']] = usage_table[row['하차역ID2']] + 1
            else:
                usage_table[row['하차역ID2']] = 1
        else:
            if row['하차역ID2'] in usage_table:
                usage_table[row['하차역ID2']] = usage_table[row['하차역ID2']] + 1
            else:
                usage_table[row['하차역ID2']] = 1
            if row['승차역ID3'] in usage_table:
                usage_table[row['승차역ID3']] = usage_table[row['승차역ID3']] + 1
            else:
                usage_table[row['승차역ID3']] = 1

        if row['하차역ID3'] == row['승차역ID4']:
            if row['하차역ID3'] in usage_table:
                usage_table[row['하차역ID3']] = usage_table[row['하차역ID3']] + 1
            else:
                usage_table[row['하차역ID3']] = 1
        else:
            if row['하차역ID3'] in usage_table:
                usage_table[row['하차역ID3']] = usage_table[row['하차역ID3']] + 1
            else:
                usage_table[row['하차역ID3']] = 1
            if row['승차역ID4'] in usage_table:
                usage_table[row['승차역ID4']] = usage_table[row['승차역ID4']] + 1
            else:
                usage_table[row['승차역ID4']] = 1

        if row['하차역ID4'] in usage_table:
            usage_table[row['하차역ID4']] = usage_table[row['하차역ID4']] + 1
        else:
            usage_table[row['하차역ID4']] = 1
            
    return usage_table

usage_20180701 = pd.DataFrame(tripChain3[tripChain3['이용날짜'] == '20180701'])
usage_count_20180701 = {}
usage_count_20180701 = usage_calculate(usage_20180701, usage_count_20180701)
usage_count_20180701 = pd.DataFrame.from_dict(usage_count_20180701, orient='index')
usage_count_20180701.columns = ['20180701이용횟수']
usage_count_20180701 = usage_count_20180701.drop(0.0)
usage_count_20180701.to_csv('usage_count_20180701.csv', sep=',')



usage_20180702 = pd.DataFrame(tripChain3[tripChain3['이용날짜'] == '20180702'])
usage_count_20180702 = {}
usage_count_20180702 = usage_calculate(usage_20180702, usage_count_20180702)
usage_count_20180702 = pd.DataFrame.from_dict(usage_count_20180702, orient='index')
usage_count_20180702.columns = ['20180702이용횟수']
usage_count_20180702 = usage_count_20180702.drop(0.0)
usage_count_20180702.to_csv('usage_count_20180702.csv', sep=',')




usage_20180703 = pd.DataFrame(tripChain3[tripChain3['이용날짜'] == '20180703'])
usage_count_20180703 = {}
usage_count_20180703 = usage_calculate(usage_20180703, usage_count_20180703)
usage_count_20180703 = pd.DataFrame.from_dict(usage_count_20180703, orient='index')
usage_count_20180703.columns = ['20180703이용횟수']
usage_count_20180703 = usage_count_20180703.drop(0.0)
usage_count_20180703.to_csv('usage_count_20180703.csv', sep=',')

usage_20180704 = pd.DataFrame(tripChain3[tripChain3['이용날짜'] == '20180704'])
usage_count_20180704 = {}
usage_count_20180704 = usage_calculate(usage_20180704, usage_count_20180704)
usage_count_20180704 = pd.DataFrame.from_dict(usage_count_20180704, orient='index')
usage_count_20180704.columns = ['20180704이용횟수']
usage_count_20180704 = usage_count_20180704.drop(0.0)
usage_count_20180704.to_csv('usage_count_20180704.csv', sep=',')


In [None]:
usage_count_20180701 = pd.read_csv('./usage_count_20180701.csv')
usage_count_20180701 = usage_count_20180701.rename(columns={'Unnamed: 0' : 'station_id' })
usage_count_20180701.sort_values(by = '20180701이용횟수', ascending=False)

In [None]:
usage_count_20180702 = pd.read_csv('./usage_count_20180702.csv')
usage_count_20180702 = usage_count_20180702.rename(columns={'Unnamed: 0' : 'station_id' })
usage_count_20180702.sort_values(by = '20180702이용횟수', ascending=False)

In [None]:
usage_count_20180703 = pd.read_csv('./usage_count_20180703.csv')
usage_count_20180703 = usage_count_20180703.rename(columns={'Unnamed: 0' : 'station_id' })
usage_count_20180703.sort_values(by = '20180703이용횟수', ascending=False)

In [None]:
usage_count_20180704 = pd.read_csv('./usage_count_20180704.csv')
usage_count_20180704 = usage_count_20180704.rename(columns={'Unnamed: 0' : 'station_id' })
usage_count_20180704.sort_values(by = '20180704이용횟수', ascending=False)

In [None]:
usage_by_date = usage_count_20180701.merge(usage_count_20180702,on='station_id').merge(usage_count_20180703,on='station_id').merge(usage_count_20180704, on='station_id')
usage_by_date

In [None]:
usage_by_date = pd.merge(usage_by_date, stations2[['시군명', '정류소명', 'WGS84위도', 'WGS84경도', '이비카드정류장ID']], how='left', left_on='station_id', right_on = '이비카드정류장ID')
usage_by_date = usage_by_date.drop(['이비카드정류장ID'], axis=1)
usage_by_date.drop_duplicates(subset ="station_id", keep = 'first', inplace = True) 
usage_by_date

In [None]:
top_hwaseong_stations_20180701 = usage_by_date[usage_by_date['시군명'] == '화성시'].sort_values(by='20180701이용횟수', ascending=False).head(60)
top_hwaseong_stations_20180701

In [None]:
top_hwaseong_stations_20180702 = usage_by_date[usage_by_date['시군명'] == '화성시'].sort_values(by='20180702이용횟수', ascending=False).head(60)
top_hwaseong_stations_20180702

In [None]:
top_hwaseong_stations_20180703 = usage_by_date[usage_by_date['시군명'] == '화성시'].sort_values(by='20180703이용횟수', ascending=False).head(60)
top_hwaseong_stations_20180703

In [None]:
top_hwaseong_stations_20180704 = usage_by_date[usage_by_date['시군명'] == '화성시'].sort_values(by='20180704이용횟수', ascending=False).head(60)
top_hwaseong_stations_20180704

In [None]:
for index, row in top_hwaseong_stations_20180701.iterrows():
    folium.Marker([row['WGS84위도'] + 0.001, row['WGS84경도'] + 0.001], popup=row['정류소명'], icon = folium.Icon(color='lightblue')).add_to(top_bus_stations_by_date)

for index, row in top_hwaseong_stations_20180702.iterrows():
    folium.Marker([row['WGS84위도'], row['WGS84경도']], popup=row['정류소명'], icon = folium.Icon(color='red')).add_to(top_bus_stations_by_date)
    
for index, row in top_hwaseong_stations_20180703.iterrows():
    folium.Marker([row['WGS84위도'], row['WGS84경도']], popup=row['정류소명'], icon = folium.Icon(color='red')).add_to(top_bus_stations_by_date)
    
for index, row in top_hwaseong_stations_20180704.iterrows():
    folium.Marker([row['WGS84위도'], row['WGS84경도']], popup=row['정류소명'], icon = folium.Icon(color='red')).add_to(top_bus_stations_by_date)
    
Busline


In [None]:
#Busline.save('station_route_ranking_visualization.html')