## 라이브러리 불러오기

In [1]:
import pandas as pd
# DataFrame 화면 출력                                                # DataFrame 화면 출력 설정이 변경되어 있습니다 #
from IPython.display import display
pd.options.display.max_columns = None
pd.options.display.max_rows = None # default = 60

import datetime

# API 호출
# ## https://data.go.kr/tcs/dss/selectApiDataDetailView.do?publicDataPk=15057440
from urllib.request import Request, urlopen
# Request https://docs.python.org/ko/3/library/urllib.request.html#urllib.request.Request
# urlopen https://docs.python.org/ko/3/library/urllib.request.html#urllib.request.urlopen
from urllib.parse import urlencode, quote_plus
# urlenocde https://docs.python.org/ko/3/library/urllib.parse.html#urllib.parse.urlencode
# quote_plus https://docs.python.org/ko/3/library/urllib.parse.html#urllib.parse.quote_plus
from urllib.parse import unquote
import requests

# XML 파싱
import xml.etree.ElementTree as ET

## 기본값 설정하기

In [2]:
routeId = 204000046
year = 2020
month = 11
day = 13

## Key Unquote

In [3]:
KEY = 'yEaR%2F3MDedRSlVJL%2F2pxnVg0yre1N5VF3RZ%2FUAt56MJ7J2mNpfqhUvy05pXV0uhHTVY7DbyCR8xmMaDdYga67Q%3D%3D' # 종현
# apiKEY = requests.utils.unquote(KEY)
apiKey = unquote(KEY)
# print(apiKey)
KEYSW = 'M%2B4%2FqUiadT8X8PhgFjaQLDu%2BIOgPMURfGsOX%2FmVxwHQVJgnVR%2FMPjDYXkuQNwUFbZXlfnX5Lls3SUCiCLIFjgQ%3D%3D'
apiKeySW = unquote(KEYSW)

## API 호출

In [4]:
# 노선정보항목조회 busrouteservice/info

# 해당 노선에 대한 노선번호, 기점/종점 정류소, 첫차/막차시간, 배차간격, 운행업체 등의 운행계획 정보를 제공합니다.
url = 'http://openapi.gbis.go.kr/ws/rest/busrouteservice/station'
queryParams = '?' + urlencode({ quote_plus('serviceKey') : apiKey, quote_plus('routeId') : routeId })
request = Request(url + queryParams)
request.get_method = lambda: 'GET'

# API를 호출하여 XML 형식으로 된 string 데이터를 변수 "oneLineXML" 에 저장합니다.
oneLineXML = urlopen(request).read().decode('utf8')
# oneLineXML

## XML 파싱

In [5]:
# xtree는 "openapi" 도메인 API 호출에서 "headerCd"에 상관없이 3개의 태그를 갖습니다.
# [comMsgHeader, msgHeader, msgBody]
xtree = ET.fromstring(oneLineXML)
# for branch in xtree:
#     print(branch)
msgBody = xtree[2]

tagList = ["stationSeq", "stationId", "stationName"]
stationList = []
for branch in msgBody:
    stationList.append([branch.find(tag).text for tag in tagList])
    
station_df = pd.DataFrame(stationList, columns=tagList)
station_df = station_df.astype({
    "stationSeq" : "int32",
    "stationId" : "int32", 
    "stationName" : "string"
    
})
# station_df.dtypes

## 데이터 불러오기

In [6]:
# routeId 별 정류소 도착시간을 나타내는 2차원 배열을 생성 할 수 있습니다
# 마지막 정류소 번호가 필요합니다 : 경유정류소목록조회 API 서비스를 이용하여 가져옵니다.
# 중간 또는 처음과 끝에 발생하는 결측치를 채워야 합니다.


rootPath = 'C:/Users/jongh/OneDrive/School/석사/1-2/자료구조/프로젝트/5.DATA/Master/dataAPI/buslocationservice/'
outputPath = rootPath + str(routeId) + '/' + str(routeId) + '_' + str(year)[-2:] + '-' + str(month) + '-' + str(day) + '.txt'

itemTagList = ['endBus', 'lowPlate', 'plateNo', 'plateType', 'remainSeatCnt', 'routeId', 'stationId', 'stationSeq', 'Nan']
# itemTagList = ['endBus', 'plateNo', 'plateType', 'remainSeatCnt', 'routeId', 'stationId', 'stationSeq', 'Nan']
itemTagList.insert(0, 'DateTime')
_df = pd.read_csv(outputPath, sep=' ', skiprows=[0], names=itemTagList)
_df = _df.drop(["Nan"], axis=1)
_df = _df.sort_values(['plateNo'])
_df = _df.dropna()
# _df = _df.drop(_df[_df["endBus"]=="API"].index)

# string을 datetime객체로 변환하기
_df['DateTime'] = pd.to_datetime(_df['DateTime'])
# dateTimeObj = datetime.datetime.strptime(_df['DateTime'], '%Y-%m-%dT%H:%M:%S.%f%z')
# _df['DateTime'] = _df.DateTime.str.split('.').str[0]
# _df['DateTime'] = _df.DateTime.str.replace('T', ' ')

_df = _df.astype({
    'endBus': 'int32', 
    'lowPlate': 'int32', 
    'plateNo': 'string', 
    'plateType': 'int32',
    'remainSeatCnt': 'int32', 
    'routeId': 'int32', 
    'stationId': 'int32', 
    'stationSeq': 'int32'
})
_df.dtypes

DateTime         datetime64[ns, pytz.FixedOffset(540)]
endBus                                           int32
lowPlate                                         int32
plateNo                                         string
plateType                                        int32
remainSeatCnt                                    int32
routeId                                          int32
stationId                                        int32
stationSeq                                       int32
dtype: object

In [7]:
# 차량 운행기록을 "plateNo"를 기준으로 구분합니다.
plateNoList = _df.groupby(['plateNo']).size().index

In [8]:
# 각 차량 번호를 기준으로 반복문을 수행합니다.
for plateNo in plateNoList:
    
    # 특정 차량번호와 같은 운행기록을 "DateTime을 기준으로 오름차순으로 정렬합니다."
    sameBusList = _df[_df['plateNo'] == plateNo].sort_values(['DateTime']).reset_index(drop=True)
#     print(sameBusList)
    
    
    # 버스는 뒤로가지 않으므로
    # "stationSeq"가 작아지는 인덱스 i의 리스트를 만듭니다.
    indexList = []
    for i in range(1,len(sameBusList)):
        if(sameBusList.iloc[i-1]['stationSeq'] > sameBusList.iloc[i]['stationSeq']):
            indexList.append(i)
#             print((sameBusList.iloc[i-1]['stationSeq'] , sameBusList.iloc[i]['stationSeq']))
            
    indexList.append(len(sameBusList))
#     print(indexList)
    
    # 첫번재 인덱스, 인덱스의 리스트, 마지막 인덱스를 조합하여 호차별로 운행기록을 구분합니다.
    index = 0
    tupleList = []
    for i in indexList:
        tupleList.append((index, i))
        small_df = sameBusList.iloc[index:i].groupby(by=['stationSeq']).min()
        
        # 정류소 데이터프레임에 합칩니다.
        merge_df = pd.merge(station_df, small_df, how='outer', on='stationId')
        merge_df = merge_df.set_index(keys='stationSeq')

        display(merge_df)
        index=i
        break;
    break;

Unnamed: 0_level_0,stationId,stationName,DateTime,endBus,lowPlate,plateType,remainSeatCnt,routeId
stationSeq,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,206000010,운중동먹거리촌,NaT,,,,,
2,206000009,운중동푸르지오하임,NaT,,,,,
3,206000658,한빛교회.월든힐스아파트,NaT,,,,,
4,206000558,운중초등학교,NaT,,,,,
5,206000007,운중중학교,NaT,,,,,
6,206000006,운중동행정복지센터,2020-11-13 16:07:43.743358+09:00,0.0,0.0,3.0,44.0,204000046.0
7,206000005,뫼루니육교,2020-11-13 16:09:35.156841+09:00,0.0,0.0,3.0,39.0,204000046.0
8,206000004,판교원마을1.2단지.판교도서관,2020-11-13 16:10:30.930773+09:00,0.0,0.0,3.0,39.0,204000046.0
9,206000555,판교청소년수련관.판교종합사회복지관,2020-11-13 16:13:17.798959+09:00,0.0,0.0,3.0,39.0,204000046.0
10,206000003,한림아파트,2020-11-13 16:14:13.444355+09:00,0.0,0.0,3.0,38.0,204000046.0
