## 라이브러리 설치

In [1]:
# !pip3 install schedule
# !pip3 install pytz
# !pip3 install pandas

## 라이브러리 불러오기

In [97]:
# API 호출
# ## https://data.go.kr/tcs/dss/selectApiDataDetailView.do?publicDataPk=15057440
from urllib.request import Request, urlopen
# Request https://docs.python.org/ko/3/library/urllib.request.html#urllib.request.Request
# urlopen https://docs.python.org/ko/3/library/urllib.request.html#urllib.request.urlopen
from urllib.parse import urlencode, quote_plus
# urlenocde https://docs.python.org/ko/3/library/urllib.parse.html#urllib.parse.urlencode
# quote_plus https://docs.python.org/ko/3/library/urllib.parse.html#urllib.parse.quote_plus
from urllib.parse import unquote
import requests

# 시간 생성
import time
import datetime
from pytz import timezone, utc

# 폴더 자동 생성
import os

# 코드 스케줄링
import threading
import schedule

import pandas as pd

# XML 파싱
import xml.etree.ElementTree as ET

## KEY unquote

In [3]:
KEY = 'yEaR%2F3MDedRSlVJL%2F2pxnVg0yre1N5VF3RZ%2FUAt56MJ7J2mNpfqhUvy05pXV0uhHTVY7DbyCR8xmMaDdYga67Q%3D%3D' # 종현
# apiKEY = requests.utils.unquote(KEY)
apiKey = unquote(KEY)
# print(apiKey)
KEYSW = 'M%2B4%2FqUiadT8X8PhgFjaQLDu%2BIOgPMURfGsOX%2FmVxwHQVJgnVR%2FMPjDYXkuQNwUFbZXlfnX5Lls3SUCiCLIFjgQ%3D%3D'
apiKeySW = unquote(KEYSW)

## @현재 시간 생성 함수

In [4]:
def getNowTime():
    fileDateTime = utc.localize(datetime.datetime.utcnow()).astimezone(timezone('Asia/Seoul'))       # 자정이 넘어간 시간 파일 경로가 바뀌는 문제를 해결해야 합니다.
    return fileDateTime

## routeId의 첫차시간과 막차시간을 datetime으로 저장합니다.

In [80]:
def makeInfoFile(routeId):
    '''
    InfoFile을 만드는 함수 입니다.
    busrouteservice/info API를 호출하여 ...
    '''
    infoRootPath = 'C:/Users/jongh/OneDrive/School/석사/1-2/자료구조/프로젝트/5.DATA/BusInfo/'
    infoFilePath = infoRootPath + str(routeId) + '.txt'
    try:
        infoFile = open(infoFilePath, 'w', encoding = 'utf-8', newline = '\n')
    except:
        createFolder(infoRootPath)
        infoFile = open(infoFilePath, 'w', encoding = 'utf-8', newline = '\n')


    # 노선정보항목조회 busrouteservice/info
    # 해당 노선에 대한 노선번호, 기점/종점 정류소, 첫차/막차시간, 배차간격, 운행업체 등의 운행계획 정보를 제공합니다.
    url = 'http://openapi.gbis.go.kr/ws/rest/busrouteservice/info'
    queryParams = '?' + urlencode({ quote_plus('serviceKey') : apiKey, quote_plus('routeId') : routeId })
    # print(url+queryParams)

    request = Request(url + queryParams)
    request.get_method = lambda: 'GET'
    oneLineXML = urlopen(request).read().decode('utf8')
    # print(oneLineXML)

    xtree = ET.fromstring(oneLineXML)
    resultCode = int(xtree[1].find("resultCode").text)
    msgBody = xtree[2]

    busRouteInfoItem = msgBody[0]
    i=0
    infoFile.write("INFOFILE ")
    infoFile.write(str(routeId))
    infoFile.write('\n')

    for info in busRouteInfoItem:
        infoFile.write(info.tag)
        infoFile.write(' ')
        infoFile.write(info.text)
        infoFile.write('\n')
        # print(f"{i} {info.tag} : {info.text}")
        i+=1
    infoFile.close()

In [81]:
def readInfoCSV(routeId):
    infoRootPath = 'C:/Users/jongh/OneDrive/School/석사/1-2/자료구조/프로젝트/5.DATA/BusInfo/'
    infoFilePath = infoRootPath + str(routeId) + '.txt'
    df= pd.read_csv(infoFilePath, sep=' ', index_col='INFOFILE')
    return df

In [82]:
def getBusTime(routeId):
    # 첫차 출발시간과 막차 정류장 도착시간을 받습니다.
    # ISSUE1: 막차 정류장 도착시간이 실제 시간과 차이가 많이납니다.
    # ISSUE2: 자정이 넘어가면 하루를 더해줘야 합니다. 자정이 넘어갔다는 사실을 어떻게 알게 할까요? 
    #         >>> 출발 시간보다 작으면 하루를 더해주고 출발시간보다 크면 날짜를 그대로 사용합니다.
    try: 
        info_df = readInfoCSV(routeId)
    except:
        makeInfoFile(routeId)
        info_df = readInfoCSV(routeId)
        
    upFirstTime = info_df.loc['downFirstTime'].values[0]
    downLastTime = info_df.loc['downLastTime'].values[0]

    startDate = getNowTime()
    if(int(upFirstTime.replace(':','')) < int(downLastTime.replace(':',''))):
        endDate = startDate
    else:
        endDate = startDate + datetime.timedelta(days=1)

    startDate = startDate.isoformat()[:11] + upFirstTime + ':00.000000+09:00'
    endDate = endDate.isoformat()[:11] + downLastTime + ':00.000000+09:00'

    startDatetime = datetime.datetime.strptime(startDate, '%Y-%m-%dT%H:%M:%S.%f%z')
    endDatetime = datetime.datetime.strptime(endDate, '%Y-%m-%dT%H:%M:%S.%f%z')
    return (startDatetime, endDatetime)

In [89]:
(startDatetime, endDatetime) = getBusTime(222000074)
print(startDatetime, endDatetime, sep='\n')

2020-11-19 06:20:00+09:00
2020-11-20 00:25:00+09:00


## @routeIdList 작성 선행

In [8]:
routeIdList = \
[200000193, 204000046,
 204000059, 204000065, 232000072, 229000072,
 222000088, 200000262, 234001511, 200000109,
 227000039, 233000140, 232000007, 234000013, 233000270, 219000027,
 204000056, 234001290, 236000050, 234001138, 234000065, 233000031,
 228000393, 234000027, 232000090, 234001251, 232000073, 228000395,
 232000005, 233000135, 233000125, 234001244, 216000026, 233000266,
 234000042, 216000043, 216000061, 234000130, 213000019, 234001694,
 233000258, 234000075, 234000884, 227000038, 228000184, 232000047,
 200000149, 200000145, 213000015, 228000181, 233000131, 228000262,
 234000310, 224000047, 234001271, 200000145, 224000050, 224000019,
 222000074, 204000024, 236000176, 229000111, 228000263, 222000078,
 234000882, 235000085, 234000069, 228000177, 234000079, 221000003,
 229000263, 229000266, 204000060,
#  229000112, 227000040, 234000873, 228000388, 228000176, 204000070,
#  234000051, 234000136, 222000073, 233000136, 234000873, 234001163,
#  234000886, 234000015, 229000023, 234000873, 
#  234000148, 234000002, 229000028, 229000028, 222000076, 222000075,
#  205000002, 233000265, 233000139, 227000019, 200000175, 228000175,
#  228000182, 200000108, 207000070, 234000309, 234000313, 200000112,
#  200000205, 222000107, 234000016, 200000110, 204000041, 228000389,
#  234001204, 234001516, 200000104, 222000049, 236000149, 200000119,
#  204000082, 221000033, 228000394, 234000031, 229000061, 222000169,
#  234000050, 204000057, 234000324, 234000878, 235000092, 200000120,
#  213000024, 234000011, 200000115, 229000097, 218000007, 233000032,
#  232000103, 222000090, 218000010, 206000007, 204000081, 232000092,
#  232000092, 219000016, 219000006, 200000150, 222000084, 222000046,
#  219000013, 229000247, 222000137, 233000142,
#  234001609, 234001203, 233000269
]

In [9]:
len(routeIdList)

73

## @filePathList 생성

In [10]:
# 폴더 자동 생성 함수
# https://data-make.tistory.com/170
def createFolder(directory):
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSError:
        print ('Error: Creating directory. ' +  directory)

In [11]:
########################################################################
## 코드를 실행하는 장치에 따라 달라지는 경로 입니다. 수정해서 사용하세요. ###
########################################################################

fileDevicepath = 'C:/Users/jongh/OneDrive/School/석사/1-2/자료구조/프로젝트/5.DATA'
# fileDevicepath = 'C:/Users/Administrator/OneDrive/School/석사/1-2/자료구조/프로젝트/5.DATA'    # AWS 사용시 선택하는 경로

# 기능 구분 폴더 입니다. 실제 데이터 기록시 반드시 확인하세요.
fileAPIName = 'buslocationservice'                                                            # AWS 사용시 선택하는 폴더 이름
# fileAPIName = 'testFolder1'

# 루트 폴더 입니다.
fileRootPath = '/Master/dataAPI/'

In [12]:
def makeFilePathList():
    
    global fileDevicepath, fileRootPath, fileAPIName
    filePathList = []

    for routeId in routeIdList:
        
        # 고정된 이름 및 변수에 의해 자동으로 생성 되는 이름 입니다. 변수를 확인하세요.
        fileRouteId = str(routeId)                        # 파일 이름에 routeId를 포함 시킬지 확인합니다.
        fileDateTime = getNowTime().isoformat()[2:10]      # 자정이 넘어간 시간 파일 경로가 바뀌는 문제를 해결해야 합니다.
        fileExtension = '.txt'

        #파일의 폴더 경로와 이름을 각각 출력합니다.
        folderPath = str(fileDevicepath + fileRootPath + fileAPIName + '/' + fileRouteId)
        # print(folderPath)
        fileName = fileRouteId + '_' + fileDateTime + fileExtension
        # print(fileName)

        # 파일의 최종 경로를 출력합니다.
        filePath = str(folderPath + '/' + fileName)

        # 폴더를 생성합니다.
        createFolder(folderPath)

        # 파일 경로 리스트에 파일경로를 추가합니다.
        filePathList.append(filePath)
        
    return filePathList

In [13]:
def newFile(routeIdList, filePathList):
    for routeId, filePath in zip(routeIdList, filePathList):
        textFile = open(filePath, 'w', encoding = 'utf-8', newline = '\n')
        textFile.write(str(routeId))
        textFile.write(' ')
        textFile.write(getNowTime().isoformat())
        textFile.write("\n")

In [14]:
filePathList = makeFilePathList()
print(len(filePathList), end=' ')
print(filePathList[0])

73 C:/Users/jongh/OneDrive/School/석사/1-2/자료구조/프로젝트/5.DATA/Master/dataAPI/buslocationservice/200000193/200000193_20-11-19.txt


## @로그파일을 생성한다

In [15]:
def makeLogFile():
    global fileDevicepath, fileRootPath, fileAPIName
    logfileRootPath = fileDevicepath + fileRootPath + fileAPIName + '/_log'
    fileDateTime = getNowTime().isoformat()[2:10]
    fileName = '_log.txt'
    
    logPath = logfileRootPath + '/' + fileDateTime + fileName 
    # print(logPath)
    # print(logfileRootPath)
    
    # logPath 경로에 폴더를 생성합니다.
    createFolder(logfileRootPath)
    
    return logPath

logPath = makeLogFile()

In [16]:
def newLogFile(logPath):
    logFile = open(logPath, 'w', encoding = 'utf-8', newline = '\n')
    logFile.write(getNowTime().isoformat())
    logFile.write("\n")
    
    # logFile = open(logPath, 'a', encoding = 'utf-8', newline = '\n')
    # return logFile

In [17]:
newLogFile(logPath)

## switchList 생성함수

In [125]:
routeIdList = [204000046]
switchList = [0 for _ in range(len(routeIdList))]
switchList

[0]

In [128]:
def testFunction(routeIdList):
    global switchList
    
    
    for routeId in routeIdList:
        (startDatetime, endDatetime) = getBusTime(routeId)
        print()
    
    
    
testFunction(routeIdList)

## "openapi.gbis.go.kr" 도메인 API 호출

In [112]:
def openAPICall(routeIdList, filePathList):
    ############################################################  
    url = 'http://openapi.gbis.go.kr/ws/rest/buslocationservice'# <<<<<<<<<<<<< 도메인 확인
    ############################################################
    global switchList
    pastTime = getNowTime()
    logFile = open(logPath, 'a', encoding = 'utf-8', newline = '\n')
    
    
    print(pastTime.isoformat(),'\t API [ buslocationservice ] 호출을 [ 시작 ] 하였습니다.')  

    for routeId, filePath in zip(routeIdList, filePathList):
        # filePath에 있는 .txt파일을 append 모드로 엽니다.
        textFile = open(filePath, 'a', encoding = 'utf-8', newline = '\n')

        # API를 호출하여 XML 형식으로 된 string 데이터를 변수 "oneLineXML" 에 저장합니다.
        queryParams = '?' + urlencode({ quote_plus('serviceKey') : apiKey, quote_plus('routeId') : routeId })
        request = Request(url + queryParams)
        request.get_method = lambda: 'GET'
        oneLineXML = urlopen(request).read().decode('utf8')
        
        # xtree는 "ws.bus.go.kr" 도메인 API 호출에서 "headerCd"에 상관없이 3개의 태그를 갖습니다.
        # [comMsgHeader, msgHeader, msgBody]
        xtree = ET.fromstring(oneLineXML)

        # comMsgHeader : []
        # comMsgHeader = xtree[0]


        # msgHeader : [queryTime, resultCode, resultMessage]
        # Index Error : IE01
        try:
            msgHeader = xtree[1]
        except IndexError:
            # textFile Index Error IE01 출력
            # textFile.write(getNowTime().isoformat())
            # textFile.write(" Index Error EC01\n")
            # logFile Index Error IE01 출력
            logFile.write(getNowTime().isoformat())
            logFile.write(' ')
            logFile.write(str(routeId))
            logFile.write(" Index Error EC01\n")
            logFile.write(oneLineXML)
            logFile.write('\n')
            # 프롬프트 Index Error IE01 출력
            print(getNowTime(), routeId, end=' ')
            print("Index Error EC01")
            continue
        
        # API Call Failure : CF01 
        resultCode = int(msgHeader[1].text)
        # print(resultCode)
        resultMessage = msgHeader[2]
        if(resultCode != 0):
            # textFile API Call Failure 출력
            # textFile.write(getNowTime().isoformat())
            # textFile.write(" API Call Failure CF01 ")
            # textFile.write(resultMessage.text)
            # textFile.write("\n")
            # logFile API Call Failure 출력
            # logFile.write(getNowTime().isoformat())
            # logFile.write(' ')
            # logFile.write(str(routeId))
            # logFile.write(" API Call Failure CF01 ")
            # logFile.write(resultMessage.text)
            # logFile.write("\n")
            # 프롬프트 API Call Failure 출력
            # print(getNowTime(), routeId, end=' ')
            # print(" API Call Failure CF01", end=' ')
            # print(resultMessage.text)
            continue;

        # msgBody : dateTime + [ 8개 태그 ]
        # Index Error 02 : IE02
        try:
            msgBody = xtree[2]
        except IndexError:
            # textFile Index Error IE02 출력
            # textFile.write(getNowTime().isoformat())
            # textFile.write(" Index Error IE02\n")
            # logFile Index Error 01 출력
            logFile.write(getNowTime().isoformat())
            logFile.write(' ')
            logFile.write(str(routeId))
            logFile.write(" Index Error IE02\n")
            logFile.write(oneLineXML)
            logFile.write('\n')
            # 프롬프트 Index Error IE02 출력
            print(getNowTime(), routeId, end=' ')
            print("Index Error IE2")
            continue
        
        #itemTagList = ['endBus', 'lowPlate', 'plateNo', 'plateType', 'remainSeatCnt', 'routeId', 'stationId', 'stationSeq']
        for busLocationList in msgBody:
            busLocationValueList = []
            textFile.write(getNowTime().isoformat())
            textFile.write(' ')
            # itemValueList.append(getNowTime().isoformat())    # "ws.bus.go.kr" 도메인은 API 호출시간 ( "dataTm" ) 제공
            for busLocation in busLocationList:
                # busLocationValueList.append(busLocation.text)
                textFile.write(busLocation.text)
                textFile.write(' ')
            textFile.write("\n")
            # print(busLocationValueList)

    currentTime = getNowTime()
    print(currentTime.isoformat(),'\t API [ buslocationservice ] 호출을 [ 완료 ] 하였습니다. 소요시간: ', currentTime - pastTime)   

    textFile.close()
    logFile.close()

In [113]:
openAPICall(routeIdList, filePathList)

0
2020-11-19T21:29:16.032409+09:00 	 API [ buslocationservice ] 호출을 [ 시작 ] 하였습니다.
2020-11-19T21:29:22.845499+09:00 	 API [ buslocationservice ] 호출을 [ 완료 ] 하였습니다. 소요시간:  0:00:06.813090


## @Schedule 모듈 사용을 위한 API 호출 생성 함수 생성

In [20]:
####################################
newFile(routeIdList, filePathList)##   <<<  데이터파일을 초기화 합니다. 주의해서 사용하세요!
newLogFile(logPath)               ##   <<<  로그파일을 초기화 합니다. 주의해서 사용하세요!
####################################

## @최종코드

In [None]:
schedule.clear()
schedule.every(52).seconds.do(lambda: openAPICall(routeIdList, filePathList))
print(getNowTime(), "\t프로세스를 시작합니다.")

while True:
    try:
        schedule.run_pending()
        time.sleep(1)
    except:
        # logFile Timeout Error TE01 출력
        logFile = open(logPath, 'a', encoding = 'utf-8', newline = '\n')
        logFile.write(getNowTime().isoformat())
        logFile.write(' ')
        logFile.write(" Timeout Error TE01\n")
        # 프롬프트 Timeout Error TE01 출력
        print(getNowTime(), end=' ')
        print("Timeout Error TE01")
        logFile.close()
        continue

2020-11-19 20:34:33.489147+09:00 	프로세스를 시작합니다.
