## 라이브러리 불러오기

In [1]:
# API 호출
# ## https://data.go.kr/tcs/dss/selectApiDataDetailView.do?publicDataPk=15057440
from urllib.request import Request, urlopen
# Request https://docs.python.org/ko/3/library/urllib.request.html#urllib.request.Request
# urlopen https://docs.python.org/ko/3/library/urllib.request.html#urllib.request.urlopen
from urllib.parse import urlencode, quote_plus
# urlenocde https://docs.python.org/ko/3/library/urllib.parse.html#urllib.parse.urlencode
# quote_plus https://docs.python.org/ko/3/library/urllib.parse.html#urllib.parse.quote_plus
from urllib.parse import unquote
import requests

# 시간 생성
import time
import datetime
from pytz import timezone, utc

# 폴더 자동 생성
import os

# 코드 스케줄링
import threading
# import schedule

# XML 파싱
import xml.etree.ElementTree as ET

# 입력데이터 데이터프레임 작성
import pandas as pd
import numpy as np

# DataFrame 화면 출력                                                # DataFrame 화면 출력 설정이 변경되어 있습니다 #
from IPython.display import display
pd.options.display.max_columns = None
pd.options.display.max_rows = None # default = 60

#EXCEL
import openpyxl

In [2]:
# 폴더 자동 생성 함수
# https://data-make.tistory.com/170
def createFolder(directory):
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSError:
        print ('Error: Creating directory. ' +  directory)

## 기본값 설정하기

In [3]:
routeId = 204000060
year = 2020
month = 11
day = 20

## Key Unquote

In [4]:
KEY = 'yEaR%2F3MDedRSlVJL%2F2pxnVg0yre1N5VF3RZ%2FUAt56MJ7J2mNpfqhUvy05pXV0uhHTVY7DbyCR8xmMaDdYga67Q%3D%3D' # 종현
# apiKEY = requests.utils.unquote(KEY)
apiKey = unquote(KEY)
# print(apiKey)
KEYSW = 'M%2B4%2FqUiadT8X8PhgFjaQLDu%2BIOgPMURfGsOX%2FmVxwHQVJgnVR%2FMPjDYXkuQNwUFbZXlfnX5Lls3SUCiCLIFjgQ%3D%3D'
apiKeySW = unquote(KEYSW)

## API 호출

In [5]:
# 노선정보항목조회 busrouteservice/info
# routeId의 정류장 목록을 받아옵니다.
url = 'http://openapi.gbis.go.kr/ws/rest/busrouteservice/station'
queryParams = '?' + urlencode({ quote_plus('serviceKey') : apiKey, quote_plus('routeId') : routeId })
request = Request(url + queryParams)
request.get_method = lambda: 'GET'

# API를 호출하여 XML 형식으로 된 string 데이터를 변수 "oneLineXML" 에 저장합니다.
oneLineXML = urlopen(request).read().decode('utf8')
# oneLineXML

## XML 파싱

In [6]:
# xtree는 "openapi" 도메인 API 호출에서 "headerCd"에 상관없이 3개의 태그를 갖습니다.
# [comMsgHeader, msgHeader, msgBody]
xtree = ET.fromstring(oneLineXML)
# for branch in xtree:
#     print(branch)
msgBody = xtree[2]

tagList = ["stationSeq", "stationId", "stationName"]
stationList = []
for branch in msgBody:
    stationList.append([branch.find(tag).text for tag in tagList])

# station_df의 index가 0부터 시작하는 문제를 해결 하기 위해 rangeIndex를 지정하였습니다.
stationCount = len(stationList) # 뒤에서 다시 사용되므로 함수로 선언시 주의
rangeIndex = pd.RangeIndex(start=1, stop=stationCount+1)

station_df = pd.DataFrame(stationList, columns=tagList, index=rangeIndex)
station_df = station_df.astype({
    "stationSeq" : "int32",
    "stationId" : "int32", 
    "stationName" : "string"
    
})
# station_df

## 데이터 불러오기

In [7]:
# routeId 별 정류소 도착시간을 나타내는 2차원 배열을 생성 할 수 있습니다
# 마지막 정류소 번호가 필요합니다 : 경유정류소목록조회 API 서비스를 이용하여 가져옵니다.
# 중간 또는 처음과 끝에 발생하는 결측치를 채워야 합니다.


rootPath = 'C:/Users/jongh/OneDrive/School/석사/1-2/자료구조/프로젝트/5.DATA/Master/dataAPI/buslocationservice/'
inputPath = rootPath + str(routeId) + '/' + str(routeId) + '_' + str(year)[-2:] + '-' + str(month) + '-' + str(day) + '.txt'

itemTagList = ['endBus', 'lowPlate', 'plateNo', 'plateType', 'remainSeatCnt', 'routeId', 'stationId', 'stationSeq', 'Nan']
# itemTagList = ['endBus', 'plateNo', 'plateType', 'remainSeatCnt', 'routeId', 'stationId', 'stationSeq', 'Nan']
itemTagList.insert(0, 'DateTime')
_df = pd.read_csv(inputPath, sep=' ', skiprows=[0], names=itemTagList)
_df = _df.drop(["Nan"], axis=1)
_df = _df.sort_values(['plateNo'])
_df = _df.dropna()
# _df = _df.drop(_df[_df["endBus"]=="API"].index)

# string을 datetime객체로 변환하기
_df['DateTime'] = pd.to_datetime(_df['DateTime'])
# dateTimeObj = datetime.datetime.strptime(_df['DateTime'], '%Y-%m-%dT%H:%M:%S.%f%z')
# _df['DateTime'] = _df.DateTime.str.split('.').str[0]
# _df['DateTime'] = _df.DateTime.str.replace('T', ' ')

_df = _df.astype({
    'endBus': 'int32', 
    'lowPlate': 'int32', 
    'plateNo': 'string', 
    'plateType': 'int32',
    'remainSeatCnt': 'int32', 
    'routeId': 'int32', 
    'stationId': 'int32', 
    'stationSeq': 'int32'
})
# _df

In [8]:
# stationSeq 가 뒤바뀌는 사례도 발생합니다. 
# cf) routeId = 204000046, DateTime = 20-11-17, plateNo = 경기78아1147, index = 166
# _df[_df['plateNo']=='경기78아1147'].sort_values(by='DateTime').reset_index(drop=True).iloc[160:170] 

In [9]:
# 차량 운행기록을 "plateNo"를 기준으로 구분합니다.
plateNoList = _df.groupby(['plateNo']).size().index
plateNoList

Index(['경기70아6146', '경기70아6322', '경기70아8643', '경기70아8644', '경기70아8959'], dtype='object', name='plateNo')

In [10]:
# 차량 번호 한개에 대해서 검사를 실행합니다.
temp_df =_df[_df['plateNo']=='경기76자2005'].sort_values(by='DateTime').reset_index(drop=True)
tempIndexList = []
for i in range(1, len(temp_df)):
    if(temp_df.iloc[i-1]['stationSeq'] > temp_df.iloc[i]['stationSeq']):
            tempIndexList.append(i)
            print((temp_df.iloc[i-1]['stationSeq'] , temp_df.iloc[i]['stationSeq']))
            print(i)
            
    tempIndexList.append(len(temp_df))
display(temp_df)

Unnamed: 0,DateTime,endBus,lowPlate,plateNo,plateType,remainSeatCnt,routeId,stationId,stationSeq


In [11]:
# 정류장의 수 만큼 행을 갖는 빈 데이터프레임을 생성합니다. 변수 rangeIndex는 [XML파싱]에서 선언되어 있습니다.
_oneDay_df = pd.DataFrame(index = rangeIndex)

# 각 차량 번호를 기준으로 반복문을 수행합니다.
for plateNo in plateNoList:
    
    # 특정 차량번호와 같은 운행기록을 "DateTime을 기준으로 오름차순으로 정렬합니다."
    sameBusList = _df[_df['plateNo'] == plateNo].sort_values(['DateTime']).reset_index(drop=True)
#     display(sameBusList)
    
    
    # 버스는 뒤로가지 않으므로
    # "stationSeq"가 작아지는 인덱스 i의 리스트를 만듭니다.
    # API 호출 오류로 stationSeq가 작아지는 경우가 있습니다. 이 문제를 해결하기 위해 보정계수 diffK를 둡니다. 
    indexList = []
    diffK = 10 if ( True ) else 0
    for i in range(1,len(sameBusList)):
        if(sameBusList.iloc[i-1]['stationSeq'] > sameBusList.iloc[i]['stationSeq'] + diffK ):
            indexList.append(i)
#             print((sameBusList.iloc[i-1]['stationSeq'] , sameBusList.iloc[i]['stationSeq']))
            
    indexList.append(len(sameBusList))
#     print(indexList)
    
    # 첫번재 인덱스, 인덱스의 리스트, 마지막 인덱스를 조합하여 호차별로 운행기록을 구분합니다.
    index = 0
    tupleList = []
    for i in indexList:
        tupleList.append((index, i))
        small_df = sameBusList.iloc[index:i].groupby(by=['stationSeq']).min()
        
        # 정류소 데이터프레임에 합칩니다.
        merge_df = pd.merge(station_df, small_df, how='outer', on='stationId')
        # merge_df = merge_df.set_index(keys='stationSeq')
        
        if(1):
            # 1번 정류장에 대한 DateTime의 결측치가 많으면 1일 시간표에서 column 전체가 정렬이 안되므로 bfill방식으로 두번 결측치를 채웁니다.
            merge_df['DateTime'] = merge_df['DateTime'].interpolate(method='bfill', limit=1)
#             merge_df['DateTime'] = merge_df['DateTime'].interpolate(method='ffill', limit=1)
#             merge_df['DateTime'] = merge_df['DateTime'].interpolate(method='bfill', limit=1)


        if(0):
            # 결측치를 채울 경우 문제가 되는 데이터가 잘 드러나지 않으므로 결측치는 가장 마지막에 채우세요.
            # if의 인수가 1이면 결측치를 처리합니다. 결측치 처리를 하지 않으려면 if의 인수를 0으로 수정하세요. 
            # df.interpolate(method='linear' or 'polynomial')
            # 시간에 대한 결측치 처리
            merge_df['DateTime'] = merge_df['DateTime'].interpolate(method='backfill', limit=3)
            merge_df['DateTime'] = merge_df['DateTime'].interpolate(method='pad', limit=3)

            # 빈좌석에 대한 결측치 처리, limit 값이 다름에 주의
            merge_df['remainSeatCnt'] = merge_df['remainSeatCnt'].interpolate(method='linear', limit=2, limit_direction='both')

            merge_df['endBus'] = merge_df['endBus'].interpolate(method='pad', limit=3)        
            merge_df['endBus'] = merge_df['endBus'].interpolate(method='backfill', limit=3)

            merge_df['lowPlate'] = merge_df['lowPlate'].interpolate(method='pad', limit=3)        
            merge_df['lowPlate'] = merge_df['lowPlate'].interpolate(method='backfill', limit=3)

            merge_df['plateType'] = merge_df['plateType'].interpolate(method='pad', limit=3)        
            merge_df['plateType'] = merge_df['plateType'].interpolate(method='backfill', limit=3)

            merge_df['routeId'] = merge_df['routeId'].interpolate(method='pad', limit=3)        
            merge_df['routeId'] = merge_df['routeId'].interpolate(method='backfill', limit=3)

        _oneDay_df[str(plateNo)+'_'+str(index)] = merge_df['DateTime']
        index=i
#         print(str(plateNo)+'_'+str(index))
#         display(merge_df)
#         break;
#     break;

_oneDay_df

Unnamed: 0,경기70아6146_0,경기70아6146_208,경기70아6146_389,경기70아6146_607,경기70아6322_0,경기70아6322_173,경기70아6322_353,경기70아6322_566,경기70아8643_0,경기70아8643_181,경기70아8643_372,경기70아8643_589,경기70아8644_0,경기70아8644_142,경기70아8644_335,경기70아8644_525,경기70아8959_0,경기70아8959_187,경기70아8959_369,경기70아8959_592
1,2020-11-20 06:43:32.957561+09:00,NaT,2020-11-20 16:34:32.508001+09:00,NaT,2020-11-20 07:25:14.731551+09:00,2020-11-20 12:32:26.882194+09:00,2020-11-20 17:22:01.796342+09:00,NaT,2020-11-20 05:36:15.778381+09:00,2020-11-20 09:06:09.152343+09:00,2020-11-20 14:56:23.271390+09:00,2020-11-20 18:58:09.584268+09:00,NaT,2020-11-20 08:11:51.720831+09:00,2020-11-20 13:42:20.273261+09:00,2020-11-20 18:01:44.991796+09:00,2020-11-20 06:10:27.390314+09:00,2020-11-20 10:11:20.313162+09:00,2020-11-20 15:58:29.589389+09:00,2020-11-20 20:11:55.080743+09:00
2,2020-11-20 06:45:29.055828+09:00,NaT,2020-11-20 16:35:30.312022+09:00,NaT,2020-11-20 07:26:16.305860+09:00,2020-11-20 12:33:25.076198+09:00,2020-11-20 17:22:59.680478+09:00,NaT,NaT,2020-11-20 09:08:05.960550+09:00,2020-11-20 14:58:19.749555+09:00,2020-11-20 19:00:05.732238+09:00,NaT,2020-11-20 08:12:50.235045+09:00,2020-11-20 13:43:18.547264+09:00,2020-11-20 18:02:42.866011+09:00,2020-11-20 06:10:27.390314+09:00,2020-11-20 10:13:16.341467+09:00,2020-11-20 16:00:26.067649+09:00,2020-11-20 20:12:53.184830+09:00
3,2020-11-20 06:45:29.055828+09:00,2020-11-20 11:24:25.398498+09:00,2020-11-20 16:36:28.626066+09:00,NaT,2020-11-20 07:27:14.399809+09:00,2020-11-20 12:34:23.420405+09:00,2020-11-20 17:22:59.680478+09:00,NaT,2020-11-20 05:38:12.486355+09:00,2020-11-20 09:08:05.960550+09:00,2020-11-20 14:58:19.749555+09:00,2020-11-20 19:01:03.706202+09:00,NaT,2020-11-20 08:13:48.419396+09:00,2020-11-20 13:44:16.701570+09:00,2020-11-20 18:03:41.010004+09:00,2020-11-20 06:11:25.744306+09:00,2020-11-20 10:13:16.341467+09:00,2020-11-20 16:00:26.067649+09:00,2020-11-20 20:13:51.508795+09:00
4,2020-11-20 06:47:25.333852+09:00,2020-11-20 11:24:25.398498+09:00,2020-11-20 16:37:26.900134+09:00,NaT,2020-11-20 07:28:13.313896+09:00,2020-11-20 12:35:21.864503+09:00,2020-11-20 17:24:55.998596+09:00,NaT,2020-11-20 05:38:12.486355+09:00,2020-11-20 09:10:02.778814+09:00,2020-11-20 15:00:16.157772+09:00,2020-11-20 19:02:02.180154+09:00,NaT,2020-11-20 08:14:46.623562+09:00,2020-11-20 13:45:14.695223+09:00,2020-11-20 18:04:38.824098+09:00,2020-11-20 06:12:23.778115+09:00,2020-11-20 10:15:12.999749+09:00,2020-11-20 16:01:23.721502+09:00,2020-11-20 20:14:49.932733+09:00
5,2020-11-20 06:47:25.333852+09:00,2020-11-20 11:25:23.812471+09:00,2020-11-20 16:38:25.124126+09:00,2020-11-20 21:21:51.757721+09:00,2020-11-20 07:29:11.497952+09:00,2020-11-20 12:35:21.864503+09:00,2020-11-20 17:25:54.122703+09:00,NaT,2020-11-20 05:39:10.820366+09:00,2020-11-20 09:11:00.492888+09:00,2020-11-20 15:01:14.521707+09:00,2020-11-20 19:03:00.544160+09:00,NaT,2020-11-20 08:15:44.717689+09:00,2020-11-20 13:46:12.769196+09:00,2020-11-20 18:05:36.817995+09:00,2020-11-20 06:13:21.882304+09:00,2020-11-20 10:15:12.999749+09:00,2020-11-20 16:01:23.721502+09:00,2020-11-20 20:15:48.136788+09:00
6,2020-11-20 06:48:23.867885+09:00,2020-11-20 11:27:19.860451+09:00,2020-11-20 16:39:23.138310+09:00,2020-11-20 21:21:51.757721+09:00,2020-11-20 07:30:09.852052+09:00,2020-11-20 12:37:17.872591+09:00,2020-11-20 17:25:54.122703+09:00,NaT,2020-11-20 05:40:09.574495+09:00,2020-11-20 09:11:58.936906+09:00,2020-11-20 15:02:12.425791+09:00,2020-11-20 19:04:56.752145+09:00,NaT,2020-11-20 08:17:40.476012+09:00,2020-11-20 13:47:12.143569+09:00,2020-11-20 18:07:33.366142+09:00,2020-11-20 06:16:16.444665+09:00,2020-11-20 10:17:09.907939+09:00,2020-11-20 16:02:22.065725+09:00,2020-11-20 20:15:48.136788+09:00
7,2020-11-20 06:52:16.574104+09:00,2020-11-20 11:30:15.042558+09:00,2020-11-20 16:43:15.494320+09:00,2020-11-20 21:22:49.802759+09:00,2020-11-20 07:34:03.138212+09:00,2020-11-20 12:40:12.834762+09:00,2020-11-20 17:28:48.724817+09:00,NaT,2020-11-20 05:45:11.855312+09:00,2020-11-20 09:15:51.333406+09:00,2020-11-20 15:05:07.197699+09:00,2020-11-20 19:07:51.123890+09:00,NaT,2020-11-20 08:21:33.322721+09:00,2020-11-20 13:49:08.371304+09:00,2020-11-20 18:10:27.788135+09:00,2020-11-20 06:19:10.866508+09:00,2020-11-20 10:19:05.997078+09:00,2020-11-20 16:05:16.167636+09:00,2020-11-20 20:18:42.749025+09:00
8,2020-11-20 06:52:16.574104+09:00,2020-11-20 11:31:13.536567+09:00,2020-11-20 16:43:15.494320+09:00,2020-11-20 21:22:49.802759+09:00,2020-11-20 07:35:00.812511+09:00,2020-11-20 12:40:12.834762+09:00,2020-11-20 17:28:48.724817+09:00,NaT,2020-11-20 05:45:11.855312+09:00,2020-11-20 09:16:49.177475+09:00,2020-11-20 15:06:05.671551+09:00,2020-11-20 19:07:51.123890+09:00,NaT,2020-11-20 08:22:31.356928+09:00,2020-11-20 13:49:08.371304+09:00,2020-11-20 18:10:27.788135+09:00,2020-11-20 06:20:08.930596+09:00,2020-11-20 10:20:04.560308+09:00,2020-11-20 16:06:14.531617+09:00,2020-11-20 20:18:42.749025+09:00
9,2020-11-20 06:54:13.511890+09:00,2020-11-20 11:32:13.990719+09:00,2020-11-20 16:44:13.798366+09:00,2020-11-20 21:23:48.005829+09:00,2020-11-20 07:35:59.186705+09:00,2020-11-20 12:41:11.048934+09:00,2020-11-20 17:30:44.842912+09:00,NaT,2020-11-20 05:46:10.269334+09:00,2020-11-20 09:17:47.251734+09:00,2020-11-20 15:07:04.145549+09:00,2020-11-20 19:08:49.577710+09:00,NaT,2020-11-20 08:24:27.775224+09:00,2020-11-20 13:51:05.049751+09:00,2020-11-20 18:12:24.276298+09:00,2020-11-20 06:20:08.930596+09:00,2020-11-20 10:22:01.238227+09:00,2020-11-20 16:07:12.715435+09:00,NaT
10,2020-11-20 06:54:13.511890+09:00,2020-11-20 11:32:13.990719+09:00,2020-11-20 16:45:11.692572+09:00,2020-11-20 21:24:46.299790+09:00,2020-11-20 07:36:57.420836+09:00,2020-11-20 12:41:11.048934+09:00,2020-11-20 17:30:44.842912+09:00,NaT,2020-11-20 05:46:10.269334+09:00,2020-11-20 09:18:45.255758+09:00,2020-11-20 15:07:04.145549+09:00,2020-11-20 19:08:49.577710+09:00,NaT,2020-11-20 08:24:27.775224+09:00,2020-11-20 13:51:05.049751+09:00,2020-11-20 18:13:22.530319+09:00,2020-11-20 06:21:12.254848+09:00,2020-11-20 10:22:01.238227+09:00,2020-11-20 16:07:12.715435+09:00,2020-11-20 20:20:38.776876+09:00


In [12]:
def get_full_data_index(_oneDay_df):
    '''
    함수 [get_full_data_index(df)]은 데이터프레임 안에서 결측치가 하나도 없는 행의 index를 반환합니다.
    시간표를 배차 시간순으로 정렬하기 위해 데이터프레임의 첫번째 행을 기준으로 열(axis=1)을 정렬합니다.
    하지만 데이터프레임의 첫번째 행을 기준으로 정렬을 하게되면 결측치로 인하여 정렬이 안되는 열이 발생합니다.
    결측치가 없는 행을 찾기 위해 제작한 함수 입니다.

    '''
    for i in range(len(_oneDay_df.columns)):
        for j in range(len(_oneDay_df)):
            if(_oneDay_df.iloc[j].isnull().sum() == i):
                return j
            
        print("모든 행에 결측치가 존재합니다.")

In [13]:
# 출차시간(1행)을 기준으로 column을 정렬합니다.
# oneDay_df = _oneDay_df.sort_values(by=1, axis=1)
lineIndex = get_full_data_index(_oneDay_df)+1
oneDay_df = _oneDay_df.sort_values(by=lineIndex, axis=1)


# datetime 객체를 string 객체로 변환합니다. 시간을 가지고 계산해야 하는 일은 이전에 실행하세요.
oneDayString_df = oneDay_df.astype('string')
for column in oneDayString_df.columns:
    oneDayString_df[column] = oneDayString_df[column].str.slice(start=10, stop=16)

# [column명 : 차량번호] station_df 와 oneDay_df를 합칩니다. datetime이 아닌 다른 타입의 오브젝트가 행에 붙게 되므로 지금부터는 편집이 어렵습니다.
_finalOneDay_df = pd.concat([station_df,oneDayString_df], axis=1)
_finalOneDay_df.set_index('stationSeq', drop=True, inplace=True)
_finalOneDay_df

모든 행에 결측치가 존재합니다.


Unnamed: 0_level_0,stationId,stationName,경기70아8644_0,경기70아8643_0,경기70아8959_0,경기70아6146_0,경기70아6322_0,경기70아8644_142,경기70아8643_181,경기70아8959_187,경기70아6146_208,경기70아6322_173,경기70아8644_335,경기70아8643_372,경기70아8959_369,경기70아6146_389,경기70아6322_353,경기70아8644_525,경기70아8643_589,경기70아8959_592,경기70아6146_607,경기70아6322_566
stationSeq,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,205000227,도촌동9단지앞,,05:36,06:10,06:43,07:25,08:11,09:06,10:11,,12:32,13:42,14:56,15:58,16:34,17:22,18:01,18:58,20:11,,
2,205000220,도촌7단지.8단지,,,06:10,06:45,07:26,08:12,09:08,10:13,,12:33,13:43,14:58,16:00,16:35,17:22,18:02,19:00,20:12,,
3,205000221,도촌초등학교.도촌종합사회복지관,,05:38,06:11,06:45,07:27,08:13,09:08,10:13,11:24,12:34,13:44,14:58,16:00,16:36,17:22,18:03,19:01,20:13,,
4,205000352,도촌4단지,,05:38,06:12,06:47,07:28,08:14,09:10,10:15,11:24,12:35,13:45,15:00,16:01,16:37,17:24,18:04,19:02,20:14,,
5,205000222,도촌3단지.스위첸파티오1단지,,05:39,06:13,06:47,07:29,08:15,09:11,10:15,11:25,12:35,13:46,15:01,16:01,16:38,17:25,18:05,19:03,20:15,21:21,
6,205000231,동강프라자앞,,05:40,06:16,06:48,07:30,08:17,09:11,10:17,11:27,12:37,13:47,15:02,16:02,16:39,17:25,18:07,19:04,20:15,21:21,
7,205000217,도촌1.2단지,,05:45,06:19,06:52,07:34,08:21,09:15,10:19,11:30,12:40,13:49,15:05,16:05,16:43,17:28,18:10,19:07,20:18,21:22,
8,206000391,동원.SK아파트,,05:45,06:20,06:52,07:35,08:22,09:16,10:20,11:31,12:40,13:49,15:06,16:06,16:43,17:28,18:10,19:07,20:18,21:22,
9,206000676,매화마을주공4단지,,05:46,06:20,06:54,07:35,08:24,09:17,10:22,11:32,12:41,13:51,15:07,16:07,16:44,17:30,18:12,19:08,,21:23,
10,206000144,성남금융고,,05:46,06:21,06:54,07:36,08:24,09:18,10:22,11:32,12:41,13:51,15:07,16:07,16:45,17:30,18:13,19:08,20:20,21:24,


In [14]:
# 정렬된 column에 출차 순서를 배정합니다. (1호차, 2호차, ...)
columnCount = len(oneDayString_df.columns)
oneDayString_df.columns = np.arange(1,columnCount+1)

# [column명 : n호차] station_df 와 oneDay_df를 합칩니다. datetime이 아닌 다른 타입의 오브젝트가 행에 붙게 되므로 지금부터는 편집이 어렵습니다.
finalOneDay_df = pd.concat([station_df,oneDayString_df], axis=1)
df = finalOneDay_df.set_index('stationSeq', drop=True)

## 일일 시간표 데이터 csv로 내보내기

In [15]:
rootPath = 'C:/Users/jongh/OneDrive/School/석사/1-2/자료구조/프로젝트/5.DATA/Master/dataAPI/DailyTimeTable/' + str(routeId)
createFolder(rootPath)
outputPath = rootPath + '/' + str(routeId) + '_' + str(year)[-2:] + '-' + str(month) + '-' + str(day) + '.csv'
df.to_csv(outputPath, encoding='ANSI')

# CSV 파일을 읽어서 EXCEL파일로 내보낸다 : 블로그 게시 목적

## 일일 시간표 데이터 csv 읽어오기

In [16]:
df = pd.read_csv(outputPath, encoding='ANSI')

## 노선번호 받아오기

In [17]:
KEY = 'yEaR%2F3MDedRSlVJL%2F2pxnVg0yre1N5VF3RZ%2FUAt56MJ7J2mNpfqhUvy05pXV0uhHTVY7DbyCR8xmMaDdYga67Q%3D%3D' # 종현
apiKey = unquote(KEY)

# 노선정보항목조회 busrouteservice/info
# 해당 노선에 대한 노선번호, 기점/종점 정류소, 첫차/막차시간, 배차간격, 운행업체 등의 운행계획 정보를 제공합니다.
url = 'http://openapi.gbis.go.kr/ws/rest/busrouteservice/info'
queryParams = '?' + urlencode({ quote_plus('serviceKey') : apiKey, quote_plus('routeId') : routeId })
# print(url+queryParams)

request = Request(url + queryParams)
request.get_method = lambda: 'GET'
oneLineXML = urlopen(request).read().decode('utf8')
# print(oneLineXML)

xtree = ET.fromstring(oneLineXML)
resultCode = int(xtree[1].find("resultCode").text)
msgBody = xtree[2]

busRouteInfoItem = msgBody[0]
i=0
for info in busRouteInfoItem:
    print(f"{i} {info.tag} : {info.text}")
    i+=1

0 companyId : 4100400
1 companyName : 대원버스
2 companyTel : 02-455-2114
3 districtCd : 2
4 downFirstTime : 06:10
5 downLastTime : 23:40
6 endStationId : 121001315
7 endStationName : 사당역(중)
8 peekAlloc : 20
9 regionName : 과천,서울,성남,안양,의왕
10 routeId : 204000060
11 routeName : 103
12 routeTypeCd : 13
13 routeTypeName : 일반형시내버스
14 startMobileNo : 6199
15 startStationId : 205000227
16 startStationName : 도촌동9단지앞
17 upFirstTime : 05:00
18 upLastTime : 22:20
19 nPeekAlloc : 30


In [18]:
routeName = busRouteInfoItem.find("routeName").text

## Excel로 내보내기

In [19]:
rootPath = 'C:/Users/jongh/OneDrive/School/석사/1-2/자료구조/프로젝트/5.DATA/Master/dataAPI/ExcelExport/'
excelFileName = str(routeName) + '.xlsx'
filePath = rootPath + excelFileName

createFolder(rootPath)
print(filePath)

C:/Users/jongh/OneDrive/School/석사/1-2/자료구조/프로젝트/5.DATA/Master/dataAPI/ExcelExport/103.xlsx


### 배포용 DataFrame 수정

In [20]:
export_df = df.set_index('stationName').drop('stationId', axis=1).drop('stationSeq', axis=1)

### 내보내기

In [21]:
#-- write an object to an Excel sheet using pd.DataFrame.to_excel()
export_df.to_excel(filePath, # directory and file name to write
            sheet_name = '평일', 
            na_rep = '', 
            float_format = "%.2f", 
            header = True, 
            #columns = ["group", "value_1", "value_2"], # if header is False
            index = True, 
            index_label = "정류장", 
            startrow = 0, 
            startcol = 0, 
            engine = 'openpyxl', 
            freeze_panes = (1, 1)
            )

## Excel 접근하여 수정하기
https://naakjii.tistory.com/56

In [22]:
wb = openpyxl.load_workbook(filePath)
sheet = wb['평일']

In [23]:
sheet.max_column, sheet.max_row

(21, 122)

In [24]:
sheet.column_dimensions['A'].width = 14

In [25]:
for cell in sheet['A']:
    cell.alignment = openpyxl.styles.Alignment(horizontal='left', vertical='center')
#     cell.fill = openpyxl.styles.fills.PatternFill(patternType='solid', fgColor=openpyxl.styles.colors.Color(rgb='00FF00'))

In [26]:
wb.save(filePath)

In [27]:
wb.close()