In [1]:
import pandas as pd
import geopandas as gpd

# read in data

tripChain = pd.read_csv('PJT001_TripChain.csv')
stations = pd.read_csv('PJT001_stations_table.csv')
mocLink = gpd.read_file('PJT001_moc_link_2018.geojson')
cellFlow = gpd.read_file('PJT001_h_100m_cell_flow.geojson')
cellPop = gpd.read_file('PJT001_h_100m_cell_pop.geojson')
emdOd = pd.read_csv('PJT001_sk_emd_od.csv')
emd = gpd.read_file('PJT001_tl_scco_emd.geojson')
routeStationInfo = pd.read_csv('PJT001_routestationinfo.csv')
route_station_mapping = pd.read_csv('PJT001_routestationmapping.csv')

In [2]:
# trip chain 테이블 컬럼이름을 좀 이쁘게 만들어 보자

tripChain.columns=['암호화카드번호', '트랜잭션ID', '환승횟수', '교통카드발행사ID',
       '총이용객수', '사용자구분', '교통수단CD1', '교통수단CD2',
       '교통수단CD3', '교통수단CD4', '교통수단CD5', '버스노선ID1',
       '버스노선ID2', '버스노선ID3', '버스노선ID4', '버스노선ID5',
       '차량ID1', '차량ID2', '차량ID3', '차량ID4',
       '차량ID5', '총통행거리', '총탑승시간', '총소요시간',
       '승차일시1', '승차일시2', '승차일시3', '승차일시4',
       '승차일시5', '하차일시1', '하차일시2', '하차일시3',
       '하차일시4', '하차일시5', '최초승차일시', '최종하차일시',
       '승차역ID1', '승차역ID2', '승차역ID3', '승차역ID4',
       '승차역ID5', '하차역ID1', '하차역ID2', '하차역ID3',
       '하차역ID4', '하차역ID5', '최초승차역ID', '최종하차역ID',
       '총이용금액', '수집건수', '트립체인완료코드']

In [3]:
# route-station 매핑테이블에 잘못된 column 이름 바로잡기

route_station_mapping.rename(columns = {'이비노선ID': '표준노선ID', '표준노선ID': '이비노선ID'}, inplace = True)
route_station_mapping

Unnamed: 0,구분,운수사명,운수사ID,표준노선ID,이비노선ID,노선명
0,경기시내,경원여객M,2805000,216000044,28050900,M6410
1,경기시내,강화운수,4100100,232000028,41001001,2
2,경기시내,강화운수,4100100,232000029,41001013,88
3,경기시내,강화운수,4100100,232000061,41001020,3000
4,경기시내,강화운수,4100100,232000067,41001024,388
...,...,...,...,...,...,...
2122,경기시내,서현운수,4108800,229000060,41088004,330
2123,경기시내,서현운수,4108800,229000063,41088005,850
2124,경기시내,코레일네트웍스,4108900,213000024,41089001,8507
2125,경기시내,신성교통,4109100,229000102,41091900,M7111


In [4]:
routeStationInfo

Unnamed: 0,seq,pr_station_id,bus_line_no,bus_line_no_seq,station_nm,station_id,mobile_no
0,65286,228000018,10-4,1,용인터미널,228001552,47634.0
1,65287,228000018,10-4,2,용인터미널(경유),277102443,
2,65288,228000018,10-4,3,포브스병원,228000443,29439.0
3,65289,228000018,10-4,4,제일교회,228000665,29881.0
4,65290,228000018,10-4,5,라이프아파트,228000664,29457.0
...,...,...,...,...,...,...,...
37826,191750,241491011,38-1,63,신일해피트리후문,233002733,
37827,191751,241491011,38-1,64,기배동행정복지센터,233002726,
37828,191752,241491011,38-1,65,기안1통입구,233002959,
37829,191753,241491011,38-1,66,배양2리,233002717,55455.0


In [5]:
# routeStationInfo 테이블에 이비노선ID를 머지할게요

routeStationInfo = pd.merge(routeStationInfo, route_station_mapping[['표준노선ID', '이비노선ID']], how = 'left', left_on = 'pr_station_id', right_on='표준노선ID')
routeStationInfo = routeStationInfo.drop('표준노선ID', 1)
#routeStationInfo.drop_duplicates(subset =['pr_station_id', 'bus_line_no_seq'], keep = 'first', inplace = True) 
routeStationInfo

Unnamed: 0,seq,pr_station_id,bus_line_no,bus_line_no_seq,station_nm,station_id,mobile_no,이비노선ID
0,65286,228000018,10-4,1,용인터미널,228001552,47634.0,41006260.0
1,65287,228000018,10-4,2,용인터미널(경유),277102443,,41006260.0
2,65288,228000018,10-4,3,포브스병원,228000443,29439.0,41006260.0
3,65289,228000018,10-4,4,제일교회,228000665,29881.0,41006260.0
4,65290,228000018,10-4,5,라이프아파트,228000664,29457.0,41006260.0
...,...,...,...,...,...,...,...,...
38535,191750,241491011,38-1,63,신일해피트리후문,233002733,,
38536,191751,241491011,38-1,64,기배동행정복지센터,233002726,,
38537,191752,241491011,38-1,65,기안1통입구,233002959,,
38538,191753,241491011,38-1,66,배양2리,233002717,55455.0,


In [6]:
# 총 661개의 노선이 있는데 그중 이비노선ID가 있는 노선은 289개밖에 없어요
# 이비노선 mapping이 안됨으로 인해 최소43%의 노선이용정보가 날라갈거같아요

print(routeStationInfo.pr_station_id.unique().size)
print(routeStationInfo.이비노선ID.unique().size)

661
289


In [7]:
# stations 테이블 시군명 컬럼에 결측치가 너무 많아 수동으로 데이터를 입력할게요
# stations2 = 시군명 결측치를 채운 stations테이블

stations2 = pd.read_csv('./station_null2.csv')
stations2 = stations2.drop(['station_id', 'Unnamed: 0'], 1)
stations2.drop_duplicates(subset ="표준정류장ID", keep = 'first', inplace = True) 
stations2

Unnamed: 0,표준정류장ID,시군명,정류소명,정류소영문명,정류소번호,중앙차로여부,관할관청,위치,WGS84위도,WGS84경도,모바일정류장ID,이비카드정류장ID
0,228003422,용인시,손골마을회관.국제학교,"Songol Community Center,",56443.0,노변정류장,경기도 용인시,,37.342517,127.066817,56443,
1,228003423,용인시,풀잎사랑,Pulipsarang,56444.0,노변정류장,경기도 용인시,,37.341800,127.068983,56444,
2,228003424,용인시,풀잎사랑,Pulipsarang,56445.0,노변정류장,경기도 용인시,,37.341817,127.069083,56445,
3,228003425,용인시,대성공정,Daesung Process,56446.0,노변정류장,경기도 용인시,,37.339350,127.073067,56446,
4,228003426,용인시,대성공정,Daesung Process,56447.0,노변정류장,경기도 용인시,,37.339183,127.073400,56447,
...,...,...,...,...,...,...,...,...,...,...,...,...
39449,228003381,용인시,현대빌리지.광교산자이,Hyundai Village,56402.0,노변정류장,경기도 용인시,,37.326433,127.071317,56402,
39450,228003382,용인시,현대빌리지.광교산자이,Hyundai Village,56403.0,노변정류장,경기도 용인시,,37.326450,127.071100,56403,
39451,236001229,포천시,평강식물원,Pyunggang Botanical Garden,40691.0,노변정류장,경기도 포천시,경기도 포천시 영북면,38.050650,127.306617,40691,
39453,236001230,포천시,양문1리.영중면사무소,"Yangmun 1-ri, Yeongjung-myeon Office",40699.0,노변정류장,경기도 포천시,,38.005517,127.245667,40699,


In [15]:
import numpy as np
stations2[(stations2['시군명'] == '화성시')]

Unnamed: 0,표준정류장ID,시군명,정류소명,정류소영문명,정류소번호,중앙차로여부,관할관청,위치,WGS84위도,WGS84경도,모바일정류장ID,이비카드정류장ID
1311,233001737,화성시,전곡산업단지,,,,,,,,55367,
3426,277100614,화성시,동탄초교(경유),,,,,,,,,
3427,277100615,화성시,동탄초교(경유),,,,,,,,,
3481,277101611,화성시,제암교차로(경유),,,,,,,,,
3488,277101618,화성시,고온리종점(경유),,,,,,37.038400,126.750217,,7101618.0
...,...,...,...,...,...,...,...,...,...,...,...,...
39138,233001260,화성시,전곡2리마을회관.운천동,"Jeongok 2-ri Community Center, Uncheon-dong",37371.0,노변정류장,경기도 화성시,경기도 화성시 서신면,37.191267,126.692133,37371,
39143,233001261,화성시,칠곡리,Chilgok-ri,37364.0,노변정류장,경기도 화성시,경기도 화성시 송산면,37.203567,126.711483,37364,
39148,233001279,화성시,신영통현대타운2단지,,,,,,,,55696,
39149,233001284,화성시,신한미지엔정문,,,,,,,,55697,


In [14]:
import numpy as np
stations2[(stations2['표준정류장ID'].isnull() == False) & (stations2['이비카드정류장ID'].isnull()) & (stations2['시군명'] == '화성시')]

Unnamed: 0,표준정류장ID,시군명,정류소명,정류소영문명,정류소번호,중앙차로여부,관할관청,위치,WGS84위도,WGS84경도,모바일정류장ID,이비카드정류장ID
1311,233001737,화성시,전곡산업단지,,,,,,,,55367,
3426,277100614,화성시,동탄초교(경유),,,,,,,,,
3427,277100615,화성시,동탄초교(경유),,,,,,,,,
3481,277101611,화성시,제암교차로(경유),,,,,,,,,
3489,277101619,화성시,호곡리(경유),,,,,,37.118383,126.773967,,
...,...,...,...,...,...,...,...,...,...,...,...,...
39138,233001260,화성시,전곡2리마을회관.운천동,"Jeongok 2-ri Community Center, Uncheon-dong",37371.0,노변정류장,경기도 화성시,경기도 화성시 서신면,37.191267,126.692133,37371,
39143,233001261,화성시,칠곡리,Chilgok-ri,37364.0,노변정류장,경기도 화성시,경기도 화성시 송산면,37.203567,126.711483,37364,
39148,233001279,화성시,신영통현대타운2단지,,,,,,,,55696,
39149,233001284,화성시,신한미지엔정문,,,,,,,,55697,


In [12]:
stations2[stations2['이비카드정류장ID'].isnull()]

Unnamed: 0,표준정류장ID,시군명,정류소명,정류소영문명,정류소번호,중앙차로여부,관할관청,위치,WGS84위도,WGS84경도,모바일정류장ID,이비카드정류장ID
0,228003422,용인시,손골마을회관.국제학교,"Songol Community Center,",56443.0,노변정류장,경기도 용인시,,37.342517,127.066817,56443,
1,228003423,용인시,풀잎사랑,Pulipsarang,56444.0,노변정류장,경기도 용인시,,37.341800,127.068983,56444,
2,228003424,용인시,풀잎사랑,Pulipsarang,56445.0,노변정류장,경기도 용인시,,37.341817,127.069083,56445,
3,228003425,용인시,대성공정,Daesung Process,56446.0,노변정류장,경기도 용인시,,37.339350,127.073067,56446,
4,228003426,용인시,대성공정,Daesung Process,56447.0,노변정류장,경기도 용인시,,37.339183,127.073400,56447,
...,...,...,...,...,...,...,...,...,...,...,...,...
39449,228003381,용인시,현대빌리지.광교산자이,Hyundai Village,56402.0,노변정류장,경기도 용인시,,37.326433,127.071317,56402,
39450,228003382,용인시,현대빌리지.광교산자이,Hyundai Village,56403.0,노변정류장,경기도 용인시,,37.326450,127.071100,56403,
39451,236001229,포천시,평강식물원,Pyunggang Botanical Garden,40691.0,노변정류장,경기도 포천시,경기도 포천시 영북면,38.050650,127.306617,40691,
39453,236001230,포천시,양문1리.영중면사무소,"Yangmun 1-ri, Yeongjung-myeon Office",40699.0,노변정류장,경기도 포천시,,38.005517,127.245667,40699,


# hwaseong_stations 테이블에 화성시 정류장만 보관할게요

hwaseong_stations = stations2

for index, row in hwaseong_stations.iterrows():
    if(row['시군명'] != '화성시'):
        hwaseong_stations.drop(index, inplace = True)
        
hwaseong_stations.drop_duplicates(subset ="표준정류장ID", keep = 'first', inplace = True) 
hwaseong_stations

hwaseong_stations.isna().sum()

In [None]:
# RouteStationInfo_latlon = route station info + (latitude, longitude, 시군명)

RouteStationInfo_latlon = pd.merge(routeStationInfo, stations2[['WGS84위도', 'WGS84경도', '표준정류장ID', '시군명']], how='left', left_on='station_id', right_on='표준정류장ID')
RouteStationInfo_latlon = RouteStationInfo_latlon.drop('표준정류장ID', 1)
RouteStationInfo_latlon.drop_duplicates(subset =['pr_station_id', 'station_id', 'bus_line_no_seq'], keep = 'first', inplace = True) 
RouteStationInfo_latlon = RouteStationInfo_latlon.dropna(subset=['시군명'])
RouteStationInfo_latlon

In [None]:
RouteStationInfo_latlon[RouteStationInfo_latlon['시군명'] != '화성시']

In [None]:
pd.set_option('display.max_rows', 500)

In [None]:
RouteStationInfo_latlon.shape

In [None]:
for index, row in RouteStationInfo_latlon.iterrows():
    if(row['시군명'] != '화성시'):
        RouteStationInfo_latlon.drop(index, inplace = True)

In [None]:
RouteStationInfo_latlon

In [None]:
RouteStationInfo_latlon.drop_duplicates(subset =['pr_station_id', 'station_id', 'bus_line_no_seq'], keep = 'first', inplace = True) 

In [None]:
RouteStationInfo_latlon.shape

In [None]:
RouteStationInfo_latlon[RouteStationInfo_latlon['이비노선ID'].isna()]
print(RouteStationInfo_latlon.pr_station_id.unique().size)

In [None]:
# 이비노선ID가 없는 노선들은 지워버릴게요 (어짜피 tripChain에서 매칭이 안되요)

RouteStationInfo_latlon = RouteStationInfo_latlon.dropna(subset=['이비노선ID'])

In [None]:
RouteStationInfo_latlon[RouteStationInfo_latlon['이비노선ID'].isna()]
print(RouteStationInfo_latlon.pr_station_id.unique().size)

In [None]:
RouteStationInfo_latlon[RouteStationInfo_latlon['bus_line_no'] == '27']

In [None]:
RouteStationInfo_latlon[RouteStationInfo_latlon['WGS84위도'].isna()].shape

In [None]:
# RouteStationId_latlon테이블의 위도경도 결측치 채우기

hwaseong_bus1 = pd.read_csv('./hwaseong_bus1.csv', encoding = 'euc-kr')
hwaseong_bus2 = pd.read_csv('./hwaseong_bus2.csv', encoding = 'euc-kr')
hwaseong_bus = pd.concat([hwaseong_bus1, hwaseong_bus2])
hwaseong_bus

In [None]:
RouteStationInfo_latlon = pd.merge(RouteStationInfo_latlon, hwaseong_bus[['위도', '경도', '정류소명']], how='left', left_on='station_nm', right_on='정류소명')
RouteStationInfo_latlon = RouteStationInfo_latlon.drop(['정류소명'], axis =1)
RouteStationInfo_latlon

In [None]:
RouteStationInfo_latlon.drop_duplicates(subset =['pr_station_id', 'bus_line_no_seq', 'station_id'], keep = 'first', inplace = True) 
RouteStationInfo_latlon

In [None]:
RouteStationInfo_latlon['WGS84위도'] = RouteStationInfo_latlon['WGS84위도'].combine_first(RouteStationInfo_latlon['위도'])
RouteStationInfo_latlon['WGS84경도'] = RouteStationInfo_latlon['WGS84경도'].combine_first(RouteStationInfo_latlon['경도'])
RouteStationInfo_latlon = RouteStationInfo_latlon.drop(['위도', '경도'], axis=1)

In [None]:
RouteStationInfo_latlon[RouteStationInfo_latlon['WGS84위도'].isna()]

In [None]:
RouteStationInfo_latlon

In [None]:
RouteStationInfo_latlon[RouteStationInfo_latlon['bus_line_no'] == '27']

In [None]:
RouteStationInfo_latlon.to_csv('RouteStationInfo_latlon.csv', sep=',')

# 버스노선별로 tripchain테이블에서의 이용횟수를 구할게요

tripChain2 = tripChain

tripChain2['버스노선ID1'] = tripChain2['버스노선ID1'].fillna(0)
tripChain2['버스노선ID2'] = tripChain2['버스노선ID2'].fillna(0)
tripChain2['버스노선ID3'] = tripChain2['버스노선ID3'].fillna(0)
tripChain2['버스노선ID4'] = tripChain2['버스노선ID4'].fillna(0)
tripChain2['버스노선ID5'] = tripChain2['버스노선ID5'].fillna(0)


bus_user_num = {}

for index, row in tripChain2.iterrows():
    if row['버스노선ID1'] in bus_user_num:
        bus_user_num[row['버스노선ID1']] = bus_user_num[row['버스노선ID1']] + 1
    else:
        bus_user_num[row['버스노선ID1']] = 1
    
    if row['버스노선ID2'] in bus_user_num:
        bus_user_num[row['버스노선ID2']] = bus_user_num[row['버스노선ID2']] + 1
    else:
        bus_user_num[row['버스노선ID2']] = 1
    
    if row['버스노선ID3'] in bus_user_num:
        bus_user_num[row['버스노선ID3']] = bus_user_num[row['버스노선ID3']] + 1
    else:
        bus_user_num[row['버스노선ID3']] = 1
    
    if row['버스노선ID4'] in bus_user_num:
        bus_user_num[row['버스노선ID4']] = bus_user_num[row['버스노선ID4']] + 1
    else:
        bus_user_num[row['버스노선ID4']] = 1
    
    if row['버스노선ID5'] in bus_user_num:
        bus_user_num[row['버스노선ID5']] = bus_user_num[row['버스노선ID5']] + 1
    else:
        bus_user_num[row['버스노선ID5']] = 1
        
most_bus_route = pd.DataFrame.from_dict(bus_user_num, orient='index')
most_bus_route.columns = ['이용횟수']
most_bus_route = most_bus_route.drop(0.0)
most_bus_route

most_bus_route.to_csv('bus_route_usage_count.csv', sep=',')

In [None]:
bus_usage_ranking = pd.read_csv('./bus_route_usage_count.csv')
bus_usage_ranking.rename(columns={'Unnamed: 0' : '이비노선ID'}, inplace = True)
bus_usage_ranking = bus_usage_ranking.sort_values(by='이용횟수', ascending = False)
bus_usage_ranking

In [None]:
bus_usage_ranking = pd.merge(bus_usage_ranking, route_station_mapping[['이비노선ID', '표준노선ID', '노선명']],how='left', left_on='이비노선ID', right_on = '이비노선ID')
bus_usage_ranking.drop_duplicates(subset =['이비노선ID', '표준노선ID', '노선명'], keep = 'first', inplace = True) 
bus_usage_ranking

In [None]:
bus_usage_ranking_eb_id = RouteStationInfo_latlon.이비노선ID.unique()
bus_usage_ranking_eb_id

In [None]:
# 랭킹테이블의 버스중 RouteStatioInfo_latlon에 정보가 있는 노선들의 랭킹만 뽑을게요
for index, row in bus_usage_ranking.iterrows():
    if(row['이비노선ID'] not in bus_usage_ranking_eb_id):
        bus_usage_ranking.drop(index, inplace = True)
        
bus_usage_ranking

In [None]:
top20_bus = bus_usage_ranking.head(20)
top20_bus

In [None]:
top20_bus_id = top20_bus.이비노선ID.tolist()
top20_bus_id

In [None]:
All_busline = []
bus_num_ = 0
for j in range(len(top20_bus_id)):
    tmp_busline = RouteStationInfo_latlon[RouteStationInfo_latlon['이비노선ID'] == top20_bus_id[j]].sort_values(by='bus_line_no_seq')
    tmp_buslineArr = []
    print(len(tmp_busline))
    for i in range(len(tmp_busline)):
        tmp = tmp_busline.iloc[i]
        tmp_buslineArr.append((tmp['WGS84위도'], tmp['WGS84경도']))
    All_busline.append(tmp_buslineArr)
    bus_num_ += 1

In [None]:
import folium

Busline = folium.Map(location = [37.194263,126.8727078], zoom_start=11)

top_bus_route = folium.FeatureGroup(name='top_bus_route', show=True).add_to(Busline)
top_bus_stations = folium.FeatureGroup(name='top_bus_stations', show=False).add_to(Busline)
all_bus_route = folium.FeatureGroup(name='all_bus_route', show=False).add_to(Busline)
top_bus_stations_by_date = folium.FeatureGroup(name='top_bus_stations_by_date', show=False).add_to(Busline)

folium.LayerControl(collapsed=False).add_to(Busline)


folium.PolyLine(All_busline[0], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[0][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[0][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="red", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[1], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[1][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[1][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="blue", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[2], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[2][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[2][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="green", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[3], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[3][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[3][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="purple", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[4], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[4][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[4][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="orange", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[5], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[5][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[5][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="darkblue", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[6], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[6][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[6][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="darkred", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[7], 
               popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[7][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[7][1][1])]['bus_line_no']).iloc[0].iloc[0], 
               color="pink", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[8], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[8][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[8][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="gray", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[9], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[9][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[9][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="black", weight=2.5, opacity=1).add_to(top_bus_route)


folium.PolyLine(All_busline[10], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[10][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[10][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="red", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[11], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[11][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[11][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="blue", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[12], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[12][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[12][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="green", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[13], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[13][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[13][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="purple", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[14], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[14][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[14][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="orange", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[15], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[15][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[15][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="darkblue", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[16], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[16][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[16][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="darkred", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[17], 
               popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[17][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[17][1][1])]['bus_line_no']).iloc[0].iloc[0], 
               color="pink", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[18], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[18][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[18][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="gray", weight=2.5, opacity=1).add_to(top_bus_route)

folium.PolyLine(All_busline[19], 
                popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[19][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[19][1][1])]['bus_line_no']).iloc[0].iloc[0], 
                color="black", weight=2.5, opacity=1).add_to(top_bus_route)

Busline



## 정류장별 이용량 구하기

tripChain3 = tripChain

tripChain3['승차역ID1'] = tripChain3['승차역ID1'].fillna(0)
tripChain3['승차역ID2'] = tripChain3['승차역ID2'].fillna(0)
tripChain3['승차역ID3'] = tripChain3['승차역ID3'].fillna(0)
tripChain3['승차역ID4'] = tripChain3['승차역ID4'].fillna(0)
tripChain3['하차역ID1'] = tripChain3['하차역ID1'].fillna(0)
tripChain3['하차역ID2'] = tripChain3['하차역ID2'].fillna(0)
tripChain3['하차역ID3'] = tripChain3['하차역ID3'].fillna(0)
tripChain3['하차역ID4'] = tripChain3['하차역ID4'].fillna(0)

bus_station_user_num = {}

for index, row in tripChain3.iterrows():
    if row['승차역ID1'] in bus_station_user_num:
        bus_station_user_num[row['승차역ID1']] = bus_station_user_num[row['승차역ID1']] + 1
    else:
        bus_station_user_num[row['승차역ID1']] = 1
        
    if row['하차역ID1'] == row['승차역ID2']:
        if row['하차역ID1'] in bus_station_user_num:
            bus_station_user_num[row['하차역ID1']] = bus_station_user_num[row['하차역ID1']] + 1
        else:
            bus_station_user_num[row['하차역ID1']] = 1
    else:
        if row['하차역ID1'] in bus_station_user_num:
            bus_station_user_num[row['하차역ID1']] = bus_station_user_num[row['하차역ID1']] + 1
        else:
            bus_station_user_num[row['하차역ID1']] = 1
        if row['승차역ID2'] in bus_station_user_num:
            bus_station_user_num[row['승차역ID2']] = bus_station_user_num[row['승차역ID2']] + 1
        else:
            bus_station_user_num[row['승차역ID2']] = 1
            
            
    if row['하차역ID2'] == row['승차역ID3']:
        if row['하차역ID2'] in bus_station_user_num:
            bus_station_user_num[row['하차역ID2']] = bus_station_user_num[row['하차역ID2']] + 1
        else:
            bus_station_user_num[row['하차역ID2']] = 1
    else:
        if row['하차역ID2'] in bus_station_user_num:
            bus_station_user_num[row['하차역ID2']] = bus_station_user_num[row['하차역ID2']] + 1
        else:
            bus_station_user_num[row['하차역ID2']] = 1
        if row['승차역ID3'] in bus_station_user_num:
            bus_station_user_num[row['승차역ID3']] = bus_station_user_num[row['승차역ID3']] + 1
        else:
            bus_station_user_num[row['승차역ID3']] = 1
            
    if row['하차역ID3'] == row['승차역ID4']:
        if row['하차역ID3'] in bus_station_user_num:
            bus_station_user_num[row['하차역ID3']] = bus_station_user_num[row['하차역ID3']] + 1
        else:
            bus_station_user_num[row['하차역ID3']] = 1
    else:
        if row['하차역ID3'] in bus_station_user_num:
            bus_station_user_num[row['하차역ID3']] = bus_station_user_num[row['하차역ID3']] + 1
        else:
            bus_station_user_num[row['하차역ID3']] = 1
        if row['승차역ID4'] in bus_station_user_num:
            bus_station_user_num[row['승차역ID4']] = bus_station_user_num[row['승차역ID4']] + 1
        else:
            bus_station_user_num[row['승차역ID4']] = 1
            
    if row['하차역ID4'] in bus_station_user_num:
        bus_station_user_num[row['하차역ID4']] = bus_station_user_num[row['하차역ID4']] + 1
    else:
        bus_station_user_num[row['하차역ID4']] = 1
    
most_station = pd.DataFrame.from_dict(bus_station_user_num, orient='index')
most_station.columns = ['이용횟수']
most_station = most_station.drop(0.0)
most_station

most_station.to_csv('station_usage_count.csv', sep=',')

In [None]:
most_stations = pd.read_csv('./station_usage_count.csv')
most_stations = most_stations.rename(columns={'Unnamed: 0' : 'station_id' })
most_stations = most_stations.sort_values(by = '이용횟수', ascending=False)
most_stations

In [None]:
stations2

In [None]:
most_stations

In [None]:
most_stations = pd.merge(most_stations, stations2[['시군명', '정류소명', 'WGS84위도', 'WGS84경도', '이비카드정류장ID']], how='left', left_on='station_id', right_on = '이비카드정류장ID')
most_stations.drop_duplicates(subset ="station_id", keep = 'first', inplace = True) 
most_stations

In [None]:
top_hwaseong_stations = most_stations[most_stations['시군명'] == '화성시'].sort_values(by='이용횟수', ascending=False).head(60)
top_hwaseong_stations

In [None]:
top_hwaseong_stations_id = top_hwaseong_stations.station_id.unique()
top_hwaseong_stations_id

In [None]:
all_buses = RouteStationInfo_latlon.이비노선ID.unique()
all_buses

In [None]:
for index, row in top_hwaseong_stations.iterrows():
    folium.Marker([row['WGS84위도'], row['WGS84경도']], popup=row['정류소명'], icon = folium.Icon(icon='cloud')).add_to(top_bus_stations)

# map_osm2.save('top_bus_stations_without_route.html')
Busline

In [None]:
import random
color = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', 'lightred', 'darkblue', 'darkgreen', 'cadetblue', 'darkpurple', 'gray', 'black']
len(color)

In [None]:
All_busline = []
bus_num_ = 0
for j in range(len(all_buses)):
    tmp_busline = RouteStationInfo_latlon[RouteStationInfo_latlon['이비노선ID'] == all_buses[j]].sort_values(by='bus_line_no_seq')
    tmp_buslineArr = []
    for i in range(len(tmp_busline)):
        tmp = tmp_busline.iloc[i]
        tmp_buslineArr.append((tmp['WGS84위도'], tmp['WGS84경도']))
    All_busline.append(tmp_buslineArr)
    bus_num_ += 1

for i in range(len(All_busline)):
    if(len(All_busline[i]) == 0):
        continue
    folium.PolyLine(All_busline[i],
                    popup = pd.DataFrame(RouteStationInfo_latlon[(RouteStationInfo_latlon['WGS84위도'] == All_busline[i][1][0]) & (RouteStationInfo_latlon['WGS84경도'] == All_busline[i][1][1])]['bus_line_no']).iloc[0].iloc[0],
                    color=color[random.randrange(0,13)], weight=2.5, opacity=1).add_to(all_bus_route)


Busline

In [None]:
## 이용날짜별 정류장 이용수
tripChain3 = tripChain

tripChain3['이용날짜'] = tripChain3.최초승차일시.astype(str)
tripChain3['이용날짜'] = tripChain3.이용날짜.str.slice(0, 8)
tripChain3['이용날짜'].unique()

In [None]:
def usage_calculate(table, usage_table):
    for index, row in table.iterrows():
        if row['승차역ID1'] in usage_table:
            usage_table[row['승차역ID1']] = usage_table[row['승차역ID1']] + 1
        else:
            usage_table[row['승차역ID1']] = 1

        if row['하차역ID1'] == row['승차역ID2']:
            if row['하차역ID1'] in usage_table:
                usage_table[row['하차역ID1']] = usage_table[row['하차역ID1']] + 1
            else:
                usage_table[row['하차역ID1']] = 1
        else:
            if row['하차역ID1'] in usage_table:
                usage_table[row['하차역ID1']] = usage_table[row['하차역ID1']] + 1
            else:
                usage_table[row['하차역ID1']] = 1
            if row['승차역ID2'] in usage_table:
                usage_table[row['승차역ID2']] = usage_table[row['승차역ID2']] + 1
            else:
                usage_table[row['승차역ID2']] = 1


        if row['하차역ID2'] == row['승차역ID3']:
            if row['하차역ID2'] in usage_table:
                usage_table[row['하차역ID2']] = usage_table[row['하차역ID2']] + 1
            else:
                usage_table[row['하차역ID2']] = 1
        else:
            if row['하차역ID2'] in usage_table:
                usage_table[row['하차역ID2']] = usage_table[row['하차역ID2']] + 1
            else:
                usage_table[row['하차역ID2']] = 1
            if row['승차역ID3'] in usage_table:
                usage_table[row['승차역ID3']] = usage_table[row['승차역ID3']] + 1
            else:
                usage_table[row['승차역ID3']] = 1

        if row['하차역ID3'] == row['승차역ID4']:
            if row['하차역ID3'] in usage_table:
                usage_table[row['하차역ID3']] = usage_table[row['하차역ID3']] + 1
            else:
                usage_table[row['하차역ID3']] = 1
        else:
            if row['하차역ID3'] in usage_table:
                usage_table[row['하차역ID3']] = usage_table[row['하차역ID3']] + 1
            else:
                usage_table[row['하차역ID3']] = 1
            if row['승차역ID4'] in usage_table:
                usage_table[row['승차역ID4']] = usage_table[row['승차역ID4']] + 1
            else:
                usage_table[row['승차역ID4']] = 1

        if row['하차역ID4'] in usage_table:
            usage_table[row['하차역ID4']] = usage_table[row['하차역ID4']] + 1
        else:
            usage_table[row['하차역ID4']] = 1
            
    return usage_table

usage_20180701 = pd.DataFrame(tripChain3[tripChain3['이용날짜'] == '20180701'])
usage_count_20180701 = {}
usage_count_20180701 = usage_calculate(usage_20180701, usage_count_20180701)
usage_count_20180701 = pd.DataFrame.from_dict(usage_count_20180701, orient='index')
usage_count_20180701.columns = ['20180701이용횟수']
usage_count_20180701 = usage_count_20180701.drop(0.0)
usage_count_20180701.to_csv('usage_count_20180701.csv', sep=',')



usage_20180702 = pd.DataFrame(tripChain3[tripChain3['이용날짜'] == '20180702'])
usage_count_20180702 = {}
usage_count_20180702 = usage_calculate(usage_20180702, usage_count_20180702)
usage_count_20180702 = pd.DataFrame.from_dict(usage_count_20180702, orient='index')
usage_count_20180702.columns = ['20180702이용횟수']
usage_count_20180702 = usage_count_20180702.drop(0.0)
usage_count_20180702.to_csv('usage_count_20180702.csv', sep=',')




usage_20180703 = pd.DataFrame(tripChain3[tripChain3['이용날짜'] == '20180703'])
usage_count_20180703 = {}
usage_count_20180703 = usage_calculate(usage_20180703, usage_count_20180703)
usage_count_20180703 = pd.DataFrame.from_dict(usage_count_20180703, orient='index')
usage_count_20180703.columns = ['20180703이용횟수']
usage_count_20180703 = usage_count_20180703.drop(0.0)
usage_count_20180703.to_csv('usage_count_20180703.csv', sep=',')

usage_20180704 = pd.DataFrame(tripChain3[tripChain3['이용날짜'] == '20180704'])
usage_count_20180704 = {}
usage_count_20180704 = usage_calculate(usage_20180704, usage_count_20180704)
usage_count_20180704 = pd.DataFrame.from_dict(usage_count_20180704, orient='index')
usage_count_20180704.columns = ['20180704이용횟수']
usage_count_20180704 = usage_count_20180704.drop(0.0)
usage_count_20180704.to_csv('usage_count_20180704.csv', sep=',')


In [None]:
usage_count_20180701 = pd.read_csv('./usage_count_20180701.csv')
usage_count_20180701 = usage_count_20180701.rename(columns={'Unnamed: 0' : 'station_id' })
usage_count_20180701.sort_values(by = '20180701이용횟수', ascending=False)

In [None]:
usage_count_20180702 = pd.read_csv('./usage_count_20180702.csv')
usage_count_20180702 = usage_count_20180702.rename(columns={'Unnamed: 0' : 'station_id' })
usage_count_20180702.sort_values(by = '20180702이용횟수', ascending=False)

In [None]:
usage_count_20180703 = pd.read_csv('./usage_count_20180703.csv')
usage_count_20180703 = usage_count_20180703.rename(columns={'Unnamed: 0' : 'station_id' })
usage_count_20180703.sort_values(by = '20180703이용횟수', ascending=False)

In [None]:
usage_count_20180704 = pd.read_csv('./usage_count_20180704.csv')
usage_count_20180704 = usage_count_20180704.rename(columns={'Unnamed: 0' : 'station_id' })
usage_count_20180704.sort_values(by = '20180704이용횟수', ascending=False)

In [None]:
usage_by_date = usage_count_20180701.merge(usage_count_20180702,on='station_id').merge(usage_count_20180703,on='station_id').merge(usage_count_20180704, on='station_id')
usage_by_date

In [None]:
usage_by_date = pd.merge(usage_by_date, stations2[['시군명', '정류소명', 'WGS84위도', 'WGS84경도', '이비카드정류장ID']], how='left', left_on='station_id', right_on = '이비카드정류장ID')
usage_by_date = usage_by_date.drop(['이비카드정류장ID'], axis=1)
usage_by_date.drop_duplicates(subset ="station_id", keep = 'first', inplace = True) 
usage_by_date

In [None]:
top_hwaseong_stations_20180701 = usage_by_date[usage_by_date['시군명'] == '화성시'].sort_values(by='20180701이용횟수', ascending=False).head(60)
top_hwaseong_stations_20180701

In [None]:
top_hwaseong_stations_20180702 = usage_by_date[usage_by_date['시군명'] == '화성시'].sort_values(by='20180702이용횟수', ascending=False).head(60)
top_hwaseong_stations_20180702

In [None]:
top_hwaseong_stations_20180703 = usage_by_date[usage_by_date['시군명'] == '화성시'].sort_values(by='20180703이용횟수', ascending=False).head(60)
top_hwaseong_stations_20180703

In [None]:
top_hwaseong_stations_20180704 = usage_by_date[usage_by_date['시군명'] == '화성시'].sort_values(by='20180704이용횟수', ascending=False).head(60)
top_hwaseong_stations_20180704

In [None]:
for index, row in top_hwaseong_stations_20180701.iterrows():
    folium.Marker([row['WGS84위도'] + 0.001, row['WGS84경도'] + 0.001], popup=row['정류소명'], icon = folium.Icon(color='lightblue')).add_to(top_bus_stations_by_date)

for index, row in top_hwaseong_stations_20180702.iterrows():
    folium.Marker([row['WGS84위도'], row['WGS84경도']], popup=row['정류소명'], icon = folium.Icon(color='red')).add_to(top_bus_stations_by_date)
    
for index, row in top_hwaseong_stations_20180703.iterrows():
    folium.Marker([row['WGS84위도'], row['WGS84경도']], popup=row['정류소명'], icon = folium.Icon(color='red')).add_to(top_bus_stations_by_date)
    
for index, row in top_hwaseong_stations_20180704.iterrows():
    folium.Marker([row['WGS84위도'], row['WGS84경도']], popup=row['정류소명'], icon = folium.Icon(color='red')).add_to(top_bus_stations_by_date)
    
Busline


In [None]:
Busline.save('station_route_ranking_visualization.html')