## 라이브러리 불러오기

In [1]:
# API 호출
# ## https://data.go.kr/tcs/dss/selectApiDataDetailView.do?publicDataPk=15057440
from urllib.request import Request, urlopen
# Request https://docs.python.org/ko/3/library/urllib.request.html#urllib.request.Request
# urlopen https://docs.python.org/ko/3/library/urllib.request.html#urllib.request.urlopen
from urllib.parse import urlencode, quote_plus
# urlenocde https://docs.python.org/ko/3/library/urllib.parse.html#urllib.parse.urlencode
# quote_plus https://docs.python.org/ko/3/library/urllib.parse.html#urllib.parse.quote_plus
from urllib.parse import unquote
import requests

# 시간 생성
import time
import datetime
from pytz import timezone, utc

# 폴더 자동 생성
import os

# 코드 스케줄링
import threading
# import schedule

# XML 파싱
import xml.etree.ElementTree as ET

# 입력데이터 데이터프레임 작성
import pandas as pd
import numpy as np

# DataFrame 화면 출력                                                # DataFrame 화면 출력 설정이 변경되어 있습니다 #
from IPython.display import display
pd.options.display.max_columns = None
pd.options.display.max_rows = None # default = 60

#EXCEL
import openpyxl

In [2]:
# 폴더 자동 생성 함수
# https://data-make.tistory.com/170
def createFolder(directory):
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSError:
        print ('Error: Creating directory. ' +  directory)

## 기본값 설정하기

In [3]:
routeId = 229000266
year = 2020
month = 11
day = 18

## Key Unquote

In [4]:
KEY = 'yEaR%2F3MDedRSlVJL%2F2pxnVg0yre1N5VF3RZ%2FUAt56MJ7J2mNpfqhUvy05pXV0uhHTVY7DbyCR8xmMaDdYga67Q%3D%3D' # 종현
# apiKEY = requests.utils.unquote(KEY)
apiKey = unquote(KEY)
# print(apiKey)
KEYSW = 'M%2B4%2FqUiadT8X8PhgFjaQLDu%2BIOgPMURfGsOX%2FmVxwHQVJgnVR%2FMPjDYXkuQNwUFbZXlfnX5Lls3SUCiCLIFjgQ%3D%3D'
apiKeySW = unquote(KEYSW)

## API 호출

In [25]:
# 노선정보항목조회 busrouteservice/info
# routeId의 정류장 목록을 받아옵니다.
url = 'http://openapi.gbis.go.kr/ws/rest/busrouteservice/station'
queryParams = '?' + urlencode({ quote_plus('serviceKey') : apiKey, quote_plus('routeId') : routeId })
request = Request(url + queryParams)
request.get_method = lambda: 'GET'

# API를 호출하여 XML 형식으로 된 string 데이터를 변수 "oneLineXML" 에 저장합니다.
oneLineXML = urlopen(request).read().decode('utf8')
# oneLineXML

## XML 파싱

In [24]:
# xtree는 "openapi" 도메인 API 호출에서 "headerCd"에 상관없이 3개의 태그를 갖습니다.
# [comMsgHeader, msgHeader, msgBody]
xtree = ET.fromstring(oneLineXML)
# for branch in xtree:
#     print(branch)
msgBody = xtree[2]

tagList = ["stationSeq", "stationId", "stationName"]
stationList = []
for branch in msgBody:
    stationList.append([branch.find(tag).text for tag in tagList])

# station_df의 index가 0부터 시작하는 문제를 해결 하기 위해 rangeIndex를 지정하였습니다.
stationCount = len(stationList) # 뒤에서 다시 사용되므로 함수로 선언시 주의
rangeIndex = pd.RangeIndex(start=1, stop=stationCount+1)

station_df = pd.DataFrame(stationList, columns=tagList, index=rangeIndex)
station_df = station_df.astype({
    "stationSeq" : "int32",
    "stationId" : "int32", 
    "stationName" : "string"
    
})
# station_df

## 데이터 불러오기

In [15]:
# routeId 별 정류소 도착시간을 나타내는 2차원 배열을 생성 할 수 있습니다
# 마지막 정류소 번호가 필요합니다 : 경유정류소목록조회 API 서비스를 이용하여 가져옵니다.
# 중간 또는 처음과 끝에 발생하는 결측치를 채워야 합니다.


rootPath = 'C:/Users/jongh/OneDrive/School/석사/1-2/자료구조/프로젝트/5.DATA/Master/dataAPI/buslocationservice/'
inputPath = rootPath + str(routeId) + '/' + str(routeId) + '_' + str(year)[-2:] + '-' + str(month) + '-' + str(day) + '.txt'

itemTagList = ['endBus', 'lowPlate', 'plateNo', 'plateType', 'remainSeatCnt', 'routeId', 'stationId', 'stationSeq', 'Nan']
# itemTagList = ['endBus', 'plateNo', 'plateType', 'remainSeatCnt', 'routeId', 'stationId', 'stationSeq', 'Nan']
itemTagList.insert(0, 'DateTime')
_df = pd.read_csv(inputPath, sep=' ', skiprows=[0], names=itemTagList)
_df = _df.drop(["Nan"], axis=1)
_df = _df.sort_values(['plateNo'])
_df = _df.dropna()
# _df = _df.drop(_df[_df["endBus"]=="API"].index)

# string을 datetime객체로 변환하기
_df['DateTime'] = pd.to_datetime(_df['DateTime'])
# dateTimeObj = datetime.datetime.strptime(_df['DateTime'], '%Y-%m-%dT%H:%M:%S.%f%z')
# _df['DateTime'] = _df.DateTime.str.split('.').str[0]
# _df['DateTime'] = _df.DateTime.str.replace('T', ' ')

_df = _df.astype({
    'endBus': 'int32', 
    'lowPlate': 'int32', 
    'plateNo': 'string', 
    'plateType': 'int32',
    'remainSeatCnt': 'int32', 
    'routeId': 'int32', 
    'stationId': 'int32', 
    'stationSeq': 'int32'
})
# _df

In [11]:
# stationSeq 가 뒤바뀌는 사례도 발생합니다. 
# cf) routeId = 204000046, DateTime = 20-11-17, plateNo = 경기78아1147, index = 166
# _df[_df['plateNo']=='경기78아1147'].sort_values(by='DateTime').reset_index(drop=True).iloc[160:170] 

In [12]:
# 차량 운행기록을 "plateNo"를 기준으로 구분합니다.
plateNoList = _df.groupby(['plateNo']).size().index
plateNoList

Index(['경기76자2005', '경기76자2006', '경기76자2007', '경기76자2009', '경기76자2010'], dtype='object', name='plateNo')

In [22]:
# 차량 번호 한개에 대해서 검사를 실행합니다.
temp_df =_df[_df['plateNo']=='경기76자2005'].sort_values(by='DateTime').reset_index(drop=True)
tempIndexList = []
for i in range(1, len(temp_df)):
    if(temp_df.iloc[i-1]['stationSeq'] > temp_df.iloc[i]['stationSeq']):
            tempIndexList.append(i)
            print((temp_df.iloc[i-1]['stationSeq'] , temp_df.iloc[i]['stationSeq']))
            print(i)
            
    tempIndexList.append(len(temp_df))
display(temp_df)

(144, 4)
207
(119, 2)
380
(143, 9)
610


Unnamed: 0,DateTime,endBus,lowPlate,plateNo,plateType,remainSeatCnt,routeId,stationId,stationSeq
0,2020-11-18 06:00:03.455493+09:00,0,0,경기76자2005,3,41,229000266,229000744,3
1,2020-11-18 06:01:01.822545+09:00,0,0,경기76자2005,3,41,229000266,229000743,4
2,2020-11-18 06:02:00.166672+09:00,0,0,경기76자2005,3,40,229000266,229000740,7
3,2020-11-18 06:02:58.070683+09:00,0,0,경기76자2005,3,40,229000266,229000739,8
4,2020-11-18 06:03:55.734835+09:00,0,0,경기76자2005,3,40,229000266,229000738,9
5,2020-11-18 06:04:54.178761+09:00,0,0,경기76자2005,3,40,229000266,229000738,9
6,2020-11-18 06:05:52.492947+09:00,0,0,경기76자2005,3,39,229000266,229000565,10
7,2020-11-18 06:06:51.006977+09:00,0,0,경기76자2005,3,39,229000266,229000564,11
8,2020-11-18 06:07:49.081003+09:00,0,0,경기76자2005,3,39,229000266,229000582,13
9,2020-11-18 06:08:47.395018+09:00,0,0,경기76자2005,3,38,229000266,229000582,13


In [16]:
# 정류장의 수 만큼 행을 갖는 빈 데이터프레임을 생성합니다. 변수 rangeIndex는 [XML파싱]에서 선언되어 있습니다.
_oneDay_df = pd.DataFrame(index = rangeIndex)

# 각 차량 번호를 기준으로 반복문을 수행합니다.
for plateNo in plateNoList:
    
    # 특정 차량번호와 같은 운행기록을 "DateTime을 기준으로 오름차순으로 정렬합니다."
    sameBusList = _df[_df['plateNo'] == plateNo].sort_values(['DateTime']).reset_index(drop=True)
#     display(sameBusList)
    
    
    # 버스는 뒤로가지 않으므로
    # "stationSeq"가 작아지는 인덱스 i의 리스트를 만듭니다.
    # API 호출 오류로 stationSeq가 작아지는 경우가 있습니다. 이 문제를 해결하기 위해 보정계수 diffK를 둡니다. 
    indexList = []
    diffK = 10 if ( True ) else 0
    for i in range(1,len(sameBusList)):
        if(sameBusList.iloc[i-1]['stationSeq'] > sameBusList.iloc[i]['stationSeq'] + diffK ):
            indexList.append(i)
#             print((sameBusList.iloc[i-1]['stationSeq'] , sameBusList.iloc[i]['stationSeq']))
            
    indexList.append(len(sameBusList))
#     print(indexList)
    
    # 첫번재 인덱스, 인덱스의 리스트, 마지막 인덱스를 조합하여 호차별로 운행기록을 구분합니다.
    index = 0
    tupleList = []
    for i in indexList:
        tupleList.append((index, i))
        small_df = sameBusList.iloc[index:i].groupby(by=['stationSeq']).min()
        
        # 정류소 데이터프레임에 합칩니다.
        merge_df = pd.merge(station_df, small_df, how='outer', on='stationId')
        # merge_df = merge_df.set_index(keys='stationSeq')
        
        if(1):
            # 1번 정류장에 대한 DateTime의 결측치가 많으면 1일 시간표에서 column 전체가 정렬이 안되므로 bfill방식으로 두번 결측치를 채웁니다.
            merge_df['DateTime'] = merge_df['DateTime'].interpolate(method='bfill', limit=1)
#             merge_df['DateTime'] = merge_df['DateTime'].interpolate(method='ffill', limit=1)
#             merge_df['DateTime'] = merge_df['DateTime'].interpolate(method='bfill', limit=1)


        if(0):
            # 결측치를 채울 경우 문제가 되는 데이터가 잘 드러나지 않으므로 결측치는 가장 마지막에 채우세요.
            # if의 인수가 1이면 결측치를 처리합니다. 결측치 처리를 하지 않으려면 if의 인수를 0으로 수정하세요. 
            # df.interpolate(method='linear' or 'polynomial')
            # 시간에 대한 결측치 처리
            merge_df['DateTime'] = merge_df['DateTime'].interpolate(method='backfill', limit=3)
            merge_df['DateTime'] = merge_df['DateTime'].interpolate(method='pad', limit=3)

            # 빈좌석에 대한 결측치 처리, limit 값이 다름에 주의
            merge_df['remainSeatCnt'] = merge_df['remainSeatCnt'].interpolate(method='linear', limit=2, limit_direction='both')

            merge_df['endBus'] = merge_df['endBus'].interpolate(method='pad', limit=3)        
            merge_df['endBus'] = merge_df['endBus'].interpolate(method='backfill', limit=3)

            merge_df['lowPlate'] = merge_df['lowPlate'].interpolate(method='pad', limit=3)        
            merge_df['lowPlate'] = merge_df['lowPlate'].interpolate(method='backfill', limit=3)

            merge_df['plateType'] = merge_df['plateType'].interpolate(method='pad', limit=3)        
            merge_df['plateType'] = merge_df['plateType'].interpolate(method='backfill', limit=3)

            merge_df['routeId'] = merge_df['routeId'].interpolate(method='pad', limit=3)        
            merge_df['routeId'] = merge_df['routeId'].interpolate(method='backfill', limit=3)

        _oneDay_df[str(plateNo)+'_'+str(index)] = merge_df['DateTime']
        index=i
#         print(str(plateNo)+'_'+str(index))
#         display(merge_df)
#         break;
#     break;

_oneDay_df

Unnamed: 0,경기76자2005_0,경기76자2005_207,경기76자2005_380,경기76자2005_610,경기76자2006_0,경기76자2006_219,경기76자2006_408,경기76자2006_616,경기76자2007_0,경기76자2007_217,경기76자2007_424,경기76자2007_658,경기76자2009_0,경기76자2009_215,경기76자2009_418,경기76자2009_644,경기76자2010_0,경기76자2010_199,경기76자2010_402,경기76자2010_614
1,2020-11-18 06:00:03.455493+09:00,NaT,2020-11-18 13:59:36.714050+09:00,NaT,2020-11-18 07:00:08.388472+09:00,2020-11-18 11:00:15.837456+09:00,2020-11-18 15:09:35.915201+09:00,2020-11-18 19:59:17.891008+09:00,2020-11-18 07:30:12.183814+09:00,2020-11-18 11:29:27.178231+09:00,2020-11-18 15:44:36.830795+09:00,2020-11-18 20:45:14.051940+09:00,2020-11-18 08:30:19.844274+09:00,NaT,2020-11-18 16:54:39.692087+09:00,NaT,2020-11-18 05:27:03.350856+09:00,2020-11-18 09:30:29.574054+09:00,2020-11-18 13:33:15.044717+09:00,2020-11-18 18:07:33.815780+09:00
2,2020-11-18 06:00:03.455493+09:00,2020-11-18 10:00:35.549830+09:00,2020-11-18 14:01:33.982136+09:00,NaT,2020-11-18 07:00:08.388472+09:00,NaT,2020-11-18 15:10:34.469230+09:00,2020-11-18 20:01:13.958965+09:00,2020-11-18 07:30:12.183814+09:00,NaT,2020-11-18 15:45:47.865834+09:00,2020-11-18 20:45:14.051940+09:00,2020-11-18 08:30:19.844274+09:00,2020-11-18 12:30:48.725367+09:00,2020-11-18 16:55:38.126003+09:00,NaT,2020-11-18 05:27:03.350856+09:00,2020-11-18 09:30:29.574054+09:00,2020-11-18 13:33:15.044717+09:00,2020-11-18 18:07:33.815780+09:00
3,2020-11-18 06:01:01.822545+09:00,2020-11-18 10:00:35.549830+09:00,2020-11-18 14:01:33.982136+09:00,NaT,NaT,NaT,2020-11-18 15:10:34.469230+09:00,2020-11-18 20:01:13.958965+09:00,2020-11-18 07:31:10.317848+09:00,NaT,2020-11-18 15:45:47.865834+09:00,2020-11-18 20:46:12.426361+09:00,2020-11-18 08:31:18.328322+09:00,2020-11-18 12:30:48.725367+09:00,2020-11-18 16:55:38.126003+09:00,2020-11-18 22:06:26.154511+09:00,2020-11-18 05:28:01.364931+09:00,2020-11-18 09:31:28.008082+09:00,NaT,2020-11-18 18:08:32.259660+09:00
4,NaT,2020-11-18 10:01:33.813954+09:00,NaT,NaT,2020-11-18 07:01:06.292446+09:00,2020-11-18 11:01:13.981342+09:00,NaT,NaT,2020-11-18 07:31:10.317848+09:00,2020-11-18 11:31:23.736391+09:00,NaT,NaT,2020-11-18 08:31:18.328322+09:00,NaT,2020-11-18 16:56:36.160292+09:00,2020-11-18 22:06:26.154511+09:00,2020-11-18 05:28:59.708910+09:00,2020-11-18 09:31:28.008082+09:00,NaT,2020-11-18 18:08:32.259660+09:00
5,2020-11-18 06:02:00.166672+09:00,2020-11-18 10:01:33.813954+09:00,2020-11-18 14:02:32.316155+09:00,NaT,2020-11-18 07:01:06.292446+09:00,2020-11-18 11:01:13.981342+09:00,2020-11-18 15:12:31.147314+09:00,NaT,2020-11-18 07:33:06.396409+09:00,2020-11-18 11:31:23.736391+09:00,2020-11-18 15:46:45.679822+09:00,2020-11-18 20:47:10.540221+09:00,2020-11-18 08:33:14.356401+09:00,2020-11-18 12:31:46.999298+09:00,2020-11-18 16:56:36.160292+09:00,2020-11-18 22:07:28.834741+09:00,2020-11-18 05:28:59.708910+09:00,2020-11-18 09:32:26.222167+09:00,2020-11-18 13:34:13.108793+09:00,2020-11-18 18:10:28.757778+09:00
6,2020-11-18 06:02:00.166672+09:00,2020-11-18 10:02:32.057712+09:00,2020-11-18 14:02:32.316155+09:00,NaT,2020-11-18 07:02:04.536547+09:00,2020-11-18 11:02:12.115393+09:00,2020-11-18 15:12:31.147314+09:00,2020-11-18 20:02:12.443916+09:00,2020-11-18 07:34:04.549872+09:00,NaT,2020-11-18 15:46:45.679822+09:00,2020-11-18 20:47:10.540221+09:00,2020-11-18 08:33:14.356401+09:00,2020-11-18 12:31:46.999298+09:00,2020-11-18 16:57:34.525972+09:00,2020-11-18 22:09:30.632057+09:00,2020-11-18 05:29:58.242961+09:00,2020-11-18 09:33:24.496311+09:00,2020-11-18 13:34:13.108793+09:00,2020-11-18 18:11:27.001849+09:00
7,2020-11-18 06:02:58.070683+09:00,2020-11-18 10:04:28.375934+09:00,2020-11-18 14:03:30.440213+09:00,2020-11-18 18:43:27.364849+09:00,2020-11-18 07:03:02.850640+09:00,2020-11-18 11:03:14.919624+09:00,2020-11-18 15:13:29.551393+09:00,2020-11-18 20:02:12.443916+09:00,2020-11-18 07:35:02.783918+09:00,2020-11-18 11:33:20.184356+09:00,2020-11-18 15:47:44.093838+09:00,2020-11-18 20:49:08.908589+09:00,2020-11-18 08:34:12.780451+09:00,2020-11-18 12:33:43.397267+09:00,2020-11-18 16:59:31.082112+09:00,2020-11-18 22:09:30.632057+09:00,2020-11-18 05:31:54.721104+09:00,2020-11-18 09:33:24.496311+09:00,2020-11-18 13:35:11.342693+09:00,2020-11-18 18:11:27.001849+09:00
8,2020-11-18 06:03:55.734835+09:00,2020-11-18 10:05:26.230168+09:00,2020-11-18 14:08:21.850357+09:00,2020-11-18 18:43:27.364849+09:00,2020-11-18 07:03:02.850640+09:00,2020-11-18 11:06:10.101677+09:00,2020-11-18 15:15:25.889520+09:00,2020-11-18 20:04:09.091024+09:00,2020-11-18 07:35:02.783918+09:00,2020-11-18 11:33:20.184356+09:00,2020-11-18 15:51:36.599902+09:00,2020-11-18 20:49:08.908589+09:00,2020-11-18 08:38:05.576669+09:00,2020-11-18 12:33:43.397267+09:00,2020-11-18 16:59:31.082112+09:00,2020-11-18 22:11:27.070113+09:00,2020-11-18 05:31:54.721104+09:00,2020-11-18 09:34:22.680452+09:00,2020-11-18 13:36:09.856794+09:00,2020-11-18 18:12:25.085972+09:00
9,2020-11-18 06:05:52.492947+09:00,2020-11-18 10:08:20.902086+09:00,2020-11-18 14:08:21.850357+09:00,2020-11-18 18:45:23.352851+09:00,2020-11-18 07:05:57.222759+09:00,2020-11-18 11:09:06.043843+09:00,2020-11-18 15:19:18.625523+09:00,2020-11-18 20:05:19.535937+09:00,2020-11-18 07:37:56.566083+09:00,2020-11-18 11:35:16.402305+09:00,2020-11-18 15:54:31.072104+09:00,2020-11-18 20:50:07.482497+09:00,2020-11-18 08:44:54.165029+09:00,2020-11-18 12:35:39.835211+09:00,2020-11-18 17:00:29.085926+09:00,2020-11-18 22:13:23.538089+09:00,2020-11-18 05:32:52.945184+09:00,2020-11-18 09:37:17.532593+09:00,2020-11-18 13:40:03.422783+09:00,2020-11-18 18:15:19.437934+09:00
10,2020-11-18 06:06:51.006977+09:00,2020-11-18 10:08:20.902086+09:00,2020-11-18 14:10:18.638502+09:00,2020-11-18 18:45:23.352851+09:00,2020-11-18 07:06:55.576957+09:00,2020-11-18 11:10:04.284700+09:00,2020-11-18 15:19:18.625523+09:00,2020-11-18 20:05:19.535937+09:00,2020-11-18 07:38:54.970014+09:00,2020-11-18 11:37:12.050525+09:00,2020-11-18 15:55:29.156037+09:00,2020-11-18 20:52:04.160548+09:00,2020-11-18 08:46:50.813132+09:00,2020-11-18 12:36:38.369367+09:00,2020-11-18 17:01:27.079946+09:00,2020-11-18 22:13:23.538089+09:00,2020-11-18 05:33:50.929733+09:00,2020-11-18 09:37:17.532593+09:00,2020-11-18 13:41:01.196966+09:00,2020-11-18 18:15:19.437934+09:00


In [17]:
def get_full_data_index(_oneDay_df):
    '''
    함수 [get_full_data_index(df)]은 데이터프레임 안에서 결측치가 하나도 없는 행의 index를 반환합니다.
    시간표를 배차 시간순으로 정렬하기 위해 데이터프레임의 첫번째 행을 기준으로 열(axis=1)을 정렬합니다.
    하지만 데이터프레임의 첫번째 행을 기준으로 정렬을 하게되면 결측치로 인하여 정렬이 안되는 열이 발생합니다.
    결측치가 없는 행을 찾기 위해 제작한 함수 입니다.

    '''
    for i in range(len(_oneDay_df.columns)):
        for j in range(len(_oneDay_df)):
            if(_oneDay_df.iloc[j].isnull().sum() == i):
                return j
            
        print("모든 행에 결측치가 존재합니다.")

In [20]:
# 출차시간(1행)을 기준으로 column을 정렬합니다.
# oneDay_df = _oneDay_df.sort_values(by=1, axis=1)
lineIndex = get_full_data_index(_oneDay_df)+1
oneDay_df = _oneDay_df.sort_values(by=lineIndex, axis=1)


# datetime 객체를 string 객체로 변환합니다. 시간을 가지고 계산해야 하는 일은 이전에 실행하세요.
oneDayString_df = oneDay_df.astype('string')
for column in oneDayString_df.columns:
    oneDayString_df[column] = oneDayString_df[column].str.slice(start=10, stop=16)

# [column명 : 차량번호] station_df 와 oneDay_df를 합칩니다. datetime이 아닌 다른 타입의 오브젝트가 행에 붙게 되므로 지금부터는 편집이 어렵습니다.
_finalOneDay_df = pd.concat([station_df,oneDayString_df], axis=1)
_finalOneDay_df.set_index('stationSeq', drop=True, inplace=True)
_finalOneDay_df

Unnamed: 0_level_0,stationId,stationName,경기76자2010_0,경기76자2005_0,경기76자2006_0,경기76자2007_0,경기76자2009_0,경기76자2010_199,경기76자2005_207,경기76자2006_219,경기76자2007_217,경기76자2009_215,경기76자2010_402,경기76자2005_380,경기76자2006_408,경기76자2007_424,경기76자2009_418,경기76자2010_614,경기76자2005_610,경기76자2006_616,경기76자2007_658,경기76자2009_644
stationSeq,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,229000746,장터고개.맥금동영업소,05:27,06:00,07:00,07:30,08:30,09:30,,11:00,11:29,,13:33,13:59,15:09,15:44,16:54,18:07,,19:59,20:45,
2,229000745,배무기,05:27,06:00,07:00,07:30,08:30,09:30,10:00,,,12:30,13:33,14:01,15:10,15:45,16:55,18:07,,20:01,20:45,
3,229000744,검산초등학교,05:28,06:01,,07:31,08:31,09:31,10:00,,,12:30,,14:01,15:10,15:45,16:55,18:08,,20:01,20:46,22:06
4,229000743,유승아파트,05:28,,07:01,07:31,08:31,09:31,10:01,11:01,11:31,,,,,,16:56,18:08,,,,22:06
5,229000742,성원아파트,05:28,06:02,07:01,07:33,08:33,09:32,10:01,11:01,11:31,12:31,13:34,14:02,15:12,15:46,16:56,18:10,,,20:47,22:07
6,229000741,대방아파트,05:29,06:02,07:02,07:34,08:33,09:33,10:02,11:02,,12:31,13:34,14:02,15:12,15:46,16:57,18:11,,20:02,20:47,22:09
7,229000740,풀무골,05:31,06:02,07:03,07:35,08:34,09:33,10:04,11:03,11:33,12:33,13:35,14:03,15:13,15:47,16:59,18:11,18:43,20:02,20:49,22:09
8,229000739,문산제일고,05:31,06:03,07:03,07:35,08:38,09:34,10:05,11:06,11:33,12:33,13:36,14:08,15:15,15:51,16:59,18:12,18:43,20:04,20:49,22:11
9,229000738,문산제일고삼거리,05:32,06:05,07:05,07:37,08:44,09:37,10:08,11:09,11:35,12:35,13:40,14:08,15:19,15:54,17:00,18:15,18:45,20:05,20:50,22:13
10,229000565,독점말.대방노블랜드2차아파트,05:33,06:06,07:06,07:38,08:46,09:37,10:08,11:10,11:37,12:36,13:41,14:10,15:19,15:55,17:01,18:15,18:45,20:05,20:52,22:13


In [14]:
# 정렬된 column에 출차 순서를 배정합니다. (1호차, 2호차, ...)
columnCount = len(oneDayString_df.columns)
oneDayString_df.columns = np.arange(1,columnCount+1)

# [column명 : n호차] station_df 와 oneDay_df를 합칩니다. datetime이 아닌 다른 타입의 오브젝트가 행에 붙게 되므로 지금부터는 편집이 어렵습니다.
finalOneDay_df = pd.concat([station_df,oneDayString_df], axis=1)
df = finalOneDay_df.set_index('stationSeq', drop=True)

## 일일 시간표 데이터 csv로 내보내기

In [15]:
rootPath = 'C:/Users/jongh/OneDrive/School/석사/1-2/자료구조/프로젝트/5.DATA/Master/dataAPI/DailyTimeTable/' + str(routeId)
createFolder(rootPath)
outputPath = rootPath + '/' + str(routeId) + '_' + str(year)[-2:] + '-' + str(month) + '-' + str(day) + '.csv'
df.to_csv(outputPath, encoding='ANSI')

# CSV 파일을 읽어서 EXCEL파일로 내보낸다 : 블로그 게시 목적

## 일일 시간표 데이터 csv 읽어오기

In [16]:
df = pd.read_csv(outputPath, encoding='ANSI')

## 노선번호 받아오기

In [17]:
KEY = 'yEaR%2F3MDedRSlVJL%2F2pxnVg0yre1N5VF3RZ%2FUAt56MJ7J2mNpfqhUvy05pXV0uhHTVY7DbyCR8xmMaDdYga67Q%3D%3D' # 종현
apiKey = unquote(KEY)

# 노선정보항목조회 busrouteservice/info
# 해당 노선에 대한 노선번호, 기점/종점 정류소, 첫차/막차시간, 배차간격, 운행업체 등의 운행계획 정보를 제공합니다.
url = 'http://openapi.gbis.go.kr/ws/rest/busrouteservice/info'
queryParams = '?' + urlencode({ quote_plus('serviceKey') : apiKey, quote_plus('routeId') : routeId })
# print(url+queryParams)

request = Request(url + queryParams)
request.get_method = lambda: 'GET'
oneLineXML = urlopen(request).read().decode('utf8')
# print(oneLineXML)

xtree = ET.fromstring(oneLineXML)
resultCode = int(xtree[1].find("resultCode").text)
msgBody = xtree[2]

busRouteInfoItem = msgBody[0]
i=0
for info in busRouteInfoItem:
    print(f"{i} {info.tag} : {info.text}")
    i+=1

0 companyId : 4103400
1 companyName : 신일여객
2 companyTel : 031-958-0135
3 districtCd : 2
4 downFirstTime : 06:20
5 downLastTime : 02:20
6 endStationId : 101000022
7 endStationName : 숭례문
8 peekAlloc : 105
9 regionName : 고양,서울,파주
10 routeId : 229000263
11 routeName : 9709
12 routeTypeCd : 11
13 routeTypeName : 직행좌석형시내버스
14 startMobileNo : 30095
15 startStationId : 229000746
16 startStationName : 장터고개.맥금동영업소
17 upFirstTime : 04:50
18 upLastTime : 00:50
19 nPeekAlloc : 0


In [18]:
routeName = busRouteInfoItem.find("routeName").text

## Excel로 내보내기

In [19]:
rootPath = 'C:/Users/jongh/OneDrive/School/석사/1-2/자료구조/프로젝트/5.DATA/Master/dataAPI/ExcelExport/'
excelFileName = str(routeName) + '.xlsx'
filePath = rootPath + excelFileName

createFolder(rootPath)
print(filePath)

C:/Users/jongh/OneDrive/School/석사/1-2/자료구조/프로젝트/5.DATA/Master/dataAPI/ExcelExport/9709.xlsx


### 배포용 DataFrame 수정

In [20]:
export_df = df.set_index('stationName').drop('stationId', axis=1)

### 내보내기

In [21]:
#-- write an object to an Excel sheet using pd.DataFrame.to_excel()
export_df.to_excel(filePath, # directory and file name to write
            sheet_name = '평일', 
            na_rep = 'NaN', 
            float_format = "%.2f", 
            header = True, 
            #columns = ["group", "value_1", "value_2"], # if header is False
            index = True, 
            index_label = "정류장", 
            startrow = 0, 
            startcol = 0, 
            engine = 'openpyxl', 
            freeze_panes = (1, 1)
            )

## Excel 접근하여 수정하기
https://naakjii.tistory.com/56

In [22]:
wb = openpyxl.load_workbook(filePath)
sheet = wb['평일']

In [23]:
sheet.max_column, sheet.max_row

(17, 145)

In [24]:
sheet.column_dimensions['A'].width = 14

In [25]:
for cell in sheet['A']:
    cell.alignment = openpyxl.styles.Alignment(horizontal='left', vertical='center')
#     cell.fill = openpyxl.styles.fills.PatternFill(patternType='solid', fgColor=openpyxl.styles.colors.Color(rgb='00FF00'))

In [26]:
wb.save(filePath)

In [27]:
wb.close()