In [1]:
import warnings
warnings.filterwarnings('ignore')
import requests
import folium
import json
import pandas as pd
from pandas.io.json import json_normalize    #json type -> dict를 데이터프레임으로 만드는데 필수

https://www.bikeseoul.com/main.do
ajax를 보려면 

1) 계발자 모드 (inspect)들어가    
2) Network들어가    
2) Fetch/XHR들어간다     

In [2]:
# Header를 보면
# Request URL: https://www.bikeseoul.com/app/station/getStationRealtimeStatus.do
# Request Method: POST
# 타겟 사이트에서 정보 요청을 할때 request method에 써있는 메소드를 사용하면 된다. Either'get' or 'post'
targetSite = 'https://www.bikeseoul.com/app/station/getStationRealtimeStatus.do'

#post를 이용할때는 Fetch/XHR header에 제일 아래있는 Form Data에 써있는 정보도 입력해야한다.
request = requests.post(targetSite, data = {
    'stationGrpSeq': 'ALL'  #, 추가 데이터
    #서버한테 targetSite주수에 'stationGrpSeq': 'ALL를 가지고 정보 요청을 하는거다. 서버는 응답받은 정보를 클라이언트에게 request 변수에 보내준다.
})

#print(request)
#<Response [200]>

print(type(request.text))  #json형태 데이터가 문자열로 넘어온것이다.
print(request.text)

<class 'str'>


In [3]:
# json 모듈의 loads() 함수로 크롤링한 json 형태의 문자열 데이터를 파이선에서 처리하기에 적합하도록 딕셔너리 타입으로 변환한다.
# 밥법 #1
bike_json = json.loads(request.text)
print(type(bike_json))    # str에서 dict타입으로 바꼈다.
print(bike_json)

<class 'dict'>


In [4]:
# request 모듈의 json() 함수로 크롤링한 json 형태의 문자열 데이터를 파이썬에서 처리하기에 적합하도록 딕셔너리 타입으로 
# 변경한다.
# 밥법 #2
bike_json = request.json()
print(type(bike_json))    # str에서 dict타입으로 바꼈다.
print(bike_json)

<class 'dict'>


In [5]:
# 판다스의 json_normalize() 함수를 사용해서 딕셔너리 타입의 데이터를 판다스 데이터프레임으로 변환한다.
# json_normalize(딕셔너리 이름, 딕셔너리에서 데이터프레임으로 변경할 데이터와 연결된 키 이름)
bike_df = json_normalize(bike_json, 'realtimeList')
bike_df

Unnamed: 0,stationName,stationImgFileName,stationId,stationLongitude,stationLatitude,rackTotCnt,parkingBikeTotCnt,parkingQRBikeCnt,parkingELECBikeCnt,stationSeCd,mode
0,위트콤공장,,ST-598,0.00000000,0.00000000,68,0,0,0,RAK_001,
1,102. 망원역 1번출구 앞,,ST-4,126.91062927,37.55564880,22,0,15,3,RAK_001,
2,103. 망원역 2번출구 앞,,ST-5,126.91083527,37.55495071,16,0,28,3,RAK_001,
3,104. 합정역 1번출구 앞,,ST-6,126.91498566,37.55062866,15,0,23,0,RAK_001,
4,105. 합정역 5번출구 앞,,ST-7,126.91482544,37.55000687,7,0,0,0,RAK_001,
...,...,...,...,...,...,...,...,...,...,...,...
2497,5063. 서남환경공원(방화동도시개발2단지방향),,ST-2890,126.82331848,37.57334137,16,0,3,0,RAK_002,
2498,5064. 양천향교역8번출구,,ST-2943,126.83983612,37.56899643,10,0,0,0,RAK_002,
2499,5072. 김포공항입구 교통섬,,ST-2947,126.80708313,37.56154633,10,0,10,0,RAK_002,
2500,상담센터,,ST-1747,0.00000000,0.00000000,4,0,5,0,RAK_001,


In [6]:
bike_df.shape

(2502, 11)

In [7]:
bike_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2502 entries, 0 to 2501
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   stationName         2502 non-null   object
 1   stationImgFileName  2502 non-null   object
 2   stationId           2502 non-null   object
 3   stationLongitude    2502 non-null   object
 4   stationLatitude     2502 non-null   object
 5   rackTotCnt          2502 non-null   object
 6   parkingBikeTotCnt   2502 non-null   object
 7   parkingQRBikeCnt    2502 non-null   object
 8   parkingELECBikeCnt  2502 non-null   object
 9   stationSeCd         2502 non-null   object
 10  mode                0 non-null      object
dtypes: object(11)
memory usage: 215.1+ KB


In [8]:
bike_df.columns

Index(['stationName', 'stationImgFileName', 'stationId', 'stationLongitude',
       'stationLatitude', 'rackTotCnt', 'parkingBikeTotCnt',
       'parkingQRBikeCnt', 'parkingELECBikeCnt', 'stationSeCd', 'mode'],
      dtype='object')

In [9]:
# 필요한 컬럼 및 가지를 선택해서 지도에 마커르르 표시할 때 사용할 데이터가 저장된 데이터프레임을 만든다.
# stationName 이름
# stationLongitude 경도
# stationLatitude 위도
# rackToCnt 주차 가능한 전체 자전거 대수
# parkingBikeToCnt 주차된 따릉이 LCD 대수
# parkingQRBikeCnt 주차된 따릉이 QR형 대수
# parkingELECBikeCnt 주차된 새싹 따릉이 대수

# 판다스 데이터프레임에서 특정 컬럼의 데이터를 얻어올 때 2개 이상의 컬럼을 얻어올 컬럼 목록을 반드시 []로
# 묶어야 한다.
bike_df_map = bike_df[['stationName', 'stationLongitude',
       'stationLatitude', 'rackTotCnt', 'parkingBikeTotCnt',
       'parkingQRBikeCnt', 'parkingELECBikeCnt']]
                       
bike_df_map
# 기초 데이터 준비 끝

Unnamed: 0,stationName,stationLongitude,stationLatitude,rackTotCnt,parkingBikeTotCnt,parkingQRBikeCnt,parkingELECBikeCnt
0,위트콤공장,0.00000000,0.00000000,68,0,0,0
1,102. 망원역 1번출구 앞,126.91062927,37.55564880,22,0,15,3
2,103. 망원역 2번출구 앞,126.91083527,37.55495071,16,0,28,3
3,104. 합정역 1번출구 앞,126.91498566,37.55062866,15,0,23,0
4,105. 합정역 5번출구 앞,126.91482544,37.55000687,7,0,0,0
...,...,...,...,...,...,...,...
2497,5063. 서남환경공원(방화동도시개발2단지방향),126.82331848,37.57334137,16,0,3,0
2498,5064. 양천향교역8번출구,126.83983612,37.56899643,10,0,0,0
2499,5072. 김포공항입구 교통섬,126.80708313,37.56154633,10,0,10,0
2500,상담센터,0.00000000,0.00000000,4,0,5,0


In [10]:
# 데이터 전 처리 시작
bike_df_map.dtypes

stationName           object
stationLongitude      object
stationLatitude       object
rackTotCnt            object
parkingBikeTotCnt     object
parkingQRBikeCnt      object
parkingELECBikeCnt    object
dtype: object

In [11]:
# 데이터 타입을 object에서 float로 바꿔주는 작업

In [12]:
bike_df_map['stationLongitude'] = bike_df_map['stationLongitude'].astype(float)
bike_df_map['stationLatitude'] = bike_df_map['stationLatitude'].astype(float)
bike_df_map['rackTotCnt'] = bike_df_map['rackTotCnt'].astype(int)
bike_df_map['parkingBikeTotCnt'] = bike_df_map['parkingBikeTotCnt'].astype(int)
bike_df_map['parkingQRBikeCnt'] = bike_df_map['parkingQRBikeCnt'].astype(int)
bike_df_map['parkingELECBikeCnt'] = bike_df_map['parkingELECBikeCnt'].astype(int)
bike_df_map['total'] = bike_df_map['parkingBikeTotCnt'] + bike_df_map['parkingQRBikeCnt'] + bike_df_map['parkingELECBikeCnt']
bike_df_map.dtypes

stationName            object
stationLongitude      float64
stationLatitude       float64
rackTotCnt              int32
parkingBikeTotCnt       int32
parkingQRBikeCnt        int32
parkingELECBikeCnt      int32
total                   int32
dtype: object

In [13]:
bike_df_map.head()

Unnamed: 0,stationName,stationLongitude,stationLatitude,rackTotCnt,parkingBikeTotCnt,parkingQRBikeCnt,parkingELECBikeCnt,total
0,위트콤공장,0.0,0.0,68,0,0,0,0
1,102. 망원역 1번출구 앞,126.910629,37.555649,22,0,15,3,18
2,103. 망원역 2번출구 앞,126.910835,37.554951,16,0,28,3,31
3,104. 합정역 1번출구 앞,126.914986,37.550629,15,0,23,0,23
4,105. 합정역 5번출구 앞,126.914825,37.550007,7,0,0,0,0


In [14]:
# 이상한 데이터 제거하는 과정
bike_df_map = bike_df_map[(bike_df_map['stationLongitude'] > 125) & (bike_df_map['stationLatitude'] > 37)]
bike_df_map

Unnamed: 0,stationName,stationLongitude,stationLatitude,rackTotCnt,parkingBikeTotCnt,parkingQRBikeCnt,parkingELECBikeCnt,total
1,102. 망원역 1번출구 앞,126.910629,37.555649,22,0,15,3,18
2,103. 망원역 2번출구 앞,126.910835,37.554951,16,0,28,3,31
3,104. 합정역 1번출구 앞,126.914986,37.550629,15,0,23,0,23
4,105. 합정역 5번출구 앞,126.914825,37.550007,7,0,0,0,0
5,106. 합정역 7번출구 앞,126.912827,37.548645,14,0,4,1,5
...,...,...,...,...,...,...,...,...
2495,5061. 우장산동 가곡어린이공원앞,126.837517,37.555454,10,0,3,0,3
2496,5062. 마곡동 767-6,126.824554,37.563541,10,0,1,0,1
2497,5063. 서남환경공원(방화동도시개발2단지방향),126.823318,37.573341,16,0,3,0,3
2498,5064. 양천향교역8번출구,126.839836,37.568996,10,0,0,0,0


In [15]:
bike_df_map.describe()

Unnamed: 0,stationLongitude,stationLatitude,rackTotCnt,parkingBikeTotCnt,parkingQRBikeCnt,parkingELECBikeCnt,total
count,2498.0,2498.0,2498.0,2498.0,2498.0,2498.0,2498.0
mean,126.99185,37.547864,13.434347,0.0,7.611689,0.888711,8.5004
std,0.092206,0.052107,4.934076,0.0,9.983634,1.667742,10.402137
min,126.798599,37.430977,4.0,0.0,0.0,0.0,0.0
25%,126.915268,37.505705,10.0,0.0,1.0,0.0,1.0
50%,127.003853,37.546839,12.0,0.0,4.0,0.0,5.0
75%,127.064255,37.577715,16.75,0.0,11.0,1.0,12.0
max,127.180641,37.691013,48.0,0.0,95.0,17.0,95.0


In [16]:
bike_map = folium.Map(location = [bike_df_map['stationLatitude'].mean(), bike_df_map['stationLongitude'].mean()], zoom_start= 12)

for index, data in bike_df_map.iterrows():
    # print(index, data)
    station_name = '{} LED: {}대, QR: {}대, 새싹: {}대'.format(data['stationName'], data['parkingBikeTotCnt'], data['parkingQRBikeCnt'],  data['parkingELECBikeCnt'])
    
    popup = folium.Popup(station_name, max_width = 250)
    # folium.Marker(location = [data['stationLatitude'], data['stationLongitude']], popup = popup, icon = folium.Icon(color = 'green', icon = 'arrow-down')).add_to(bike_map)
    folium.CircleMarker(location = [data['stationLatitude'], data['stationLongitude']], popup = popup, radius = 2, color = '#FF0000', fill_color ='#FF0000').add_to(bike_map)

bike_map.save('./Output/bike.html')
bike_map