In [1]:
# API 호출
# ## https://data.go.kr/tcs/dss/selectApiDataDetailView.do?publicDataPk=15057440
from urllib.request import Request, urlopen
# Request https://docs.python.org/ko/3/library/urllib.request.html#urllib.request.Request
# urlopen https://docs.python.org/ko/3/library/urllib.request.html#urllib.request.urlopen
from urllib.parse import urlencode, quote_plus
# urlenocde https://docs.python.org/ko/3/library/urllib.parse.html#urllib.parse.urlencode
# quote_plus https://docs.python.org/ko/3/library/urllib.parse.html#urllib.parse.quote_plus
from urllib.parse import unquote
import requests

# 시간 생성
import time
import datetime
from pytz import timezone, utc

# 폴더 자동 생성
import os

# 코드 스케줄링
import threading
import schedule

import pandas as pd

# XML 파싱
import xml.etree.ElementTree as ET

In [2]:
def getOnlyDirectory(index):
    
    global subCategoryDict, category
    rootDirectory = os.getcwd()
    
    subCategory = subCategoryDict[index]
    directory = rootDirectory + category + subCategory
    return directory

In [3]:
def getDirectory(index, routeId):
    global subCategoryDict, category
    rootDirectory = os.getcwd()
    # category = '/TEST'
    subCategory = subCategoryDict[index]
    directory = rootDirectory + category + subCategory + str(routeId) + '/'
    return directory

In [4]:
def getToday():
    todayDate = scheduleDict['today'].strftime('%Y-%m-%d')
    return todayDate

In [5]:
def makeTextFile(subcategory, routeId, Dict):
    global scheduledict
    folderPath = getDirectory(subcategory, routeId)
    todayDate = scheduleDict[routeId]['today'].strftime("%Y-%m-%d")
    filePath = folderPath + todayDate + '.txt'

    if(os.path.isfile(filePath)):
        file = open(filePath, 'a', encoding = 'utf-8', newline = '\n')
        # print('파일이 존재합니다.')
        pass

    else:
        createFolder(folderPath)
        file = open(filePath, 'w', encoding = 'utf-8', newline = '\n')
        file.write(str(routeId))
        file.write(' ')
        file.write(getNowTime().isoformat())
        file.write('\n')
        # print('파일을 생성하였습니다.')
        
    file.close()
    Dict[routeId] = filePath
    
    return Dict

In [6]:
def makeOnlyTextFile(Dict, subcategory):
    global scheduledict
    folderPath = getOnlyDirectory(subcategory)
    print(f"folderPath:\t{folderPath}")
    todayDate = scheduleDict['today'].strftime("%Y-%m-%d")
    print(f"todayDate:\t{todayDate}")
    filePath = folderPath + todayDate + '.txt'
    print(f"filePath:\t{filePath}")

    if(os.path.isfile(filePath)):
        file = open(filePath, 'a', encoding = 'utf-8', newline = '\n')
        # print('파일이 존재합니다.')
        pass

    else:
        createFolder(folderPath)
        file = open(filePath, 'a', encoding = 'utf-8', newline = '\n')
        file.write(getNowTime().isoformat())
        file.write('\n')
        # print('파일을 생성하였습니다.')
        
    file.close()
    Dict['today'] = filePath
    
    return Dict

In [7]:
# 폴더 자동 생성 함수
# https://data-make.tistory.com/170
def createFolder(directory):
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSError:
        print ('Error: Creating directory. ' +  directory)

In [8]:
# 테스트 타임 생성기
# https://qastack.kr/programming/993358/creating-a-range-of-dates-in-python
def getSampleTime(count):
    baseTime = datetime.datetime.today()
    numhours = count
    dateList = [baseTime + datetime.timedelta(hours=x) for x in range(count)]
    return dateList

In [9]:
def getRouteIdList():
    folderPath = getOnlyDirectory('rid')
    fileName = 'routeIdList.txt'
    filePath = folderPath + fileName

    routeIdList = []
    if(os.path.isfile(filePath)):
        routeIdFile = open(filePath, 'r', encoding = 'utf-8', newline = '\n')
        # print('파일이 존재합니다.')

    else:
        createFolder(folderPath)
        routeIdFile = open(filePath, 'r', encoding = 'utf-8', newline = '\n')

    routeId = routeIdFile.readline()
    while(routeId):
        routeIdList.append(int(routeId))
        routeId = (routeIdFile.readline())

    routeIdFile.close()
    
    return routeIdList

In [10]:
def openAPICall(routeId):
    global callCount
    #############################################################  
    url = 'http://openapi.gbis.go.kr/ws/rest/buslocationservice'# <<<<<<<<<<<<< 도메인 확인
    #############################################################

    # API를 호출하여 XML 형식으로 된 string 데이터를 변수 "oneLineXML" 에 저장합니다.
    queryParams = '?' + urlencode({ quote_plus('serviceKey') : apiKey, quote_plus('routeId') : routeId })
    request = Request(url + queryParams)
    request.get_method = lambda: 'GET'
    callCount += 1
    oneLineXML = urlopen(request).read().decode('utf8')

    # xtree는 "ws.bus.go.kr" 도메인 API 호출에서 "headerCd"에 상관없이 3개의 태그를 갖습니다.
    # [comMsgHeader, msgHeader, msgBody]
    xtree = ET.fromstring(oneLineXML)
    
    return xtree

In [11]:
def getNowTime():
    fileDateTime = utc.localize(datetime.datetime.utcnow()).astimezone(timezone('Asia/Seoul'))
    return fileDateTime

In [12]:
def readInfoCSV(routeId):
    directory = getDirectory('info', routeId)
    # infoRootPath = 'C:/Users/jongh/OneDrive/School/석사/1-2/자료구조/프로젝트/5.DATA/BusInfo/'
    infoFilePath = directory + str(routeId) + '.txt'
    df= pd.read_csv(infoFilePath, sep=' ', index_col='INFOFILE')
    return df

In [13]:
def makeInfoFile(routeId):
    '''
    InfoFile을 만드는 함수 입니다.
    busrouteservice/info API를 호출하여 ...
    '''
    directory = getDirectory('info', routeId)
    infoFilePath = directory + str(routeId) + '.txt'
    try:
        infoFile = open(infoFilePath, 'w', encoding = 'utf-8', newline = '\n')
    except:
        createFolder(directory)
        infoFile = open(infoFilePath, 'w', encoding = 'utf-8', newline = '\n')


    # 노선정보항목조회 busrouteservice/info
    # 해당 노선에 대한 노선번호, 기점/종점 정류소, 첫차/막차시간, 배차간격, 운행업체 등의 운행계획 정보를 제공합니다.
    url = 'http://openapi.gbis.go.kr/ws/rest/busrouteservice/info'
    queryParams = '?' + urlencode({ quote_plus('serviceKey') : apiKey, quote_plus('routeId') : routeId })
    # print(url+queryParams)

    request = Request(url + queryParams)
    request.get_method = lambda: 'GET'
    oneLineXML = urlopen(request).read().decode('utf8')
    # print(oneLineXML)

    xtree = ET.fromstring(oneLineXML)
    resultCode = int(xtree[1].find("resultCode").text)
    msgBody = xtree[2]

    busRouteInfoItem = msgBody[0]
    i=0
    infoFile.write("INFOFILE ")
    infoFile.write(str(routeId))
    infoFile.write('\n')

    for info in busRouteInfoItem:
        infoFile.write(info.tag)
        infoFile.write(' ')
        infoFile.write(info.text)
        infoFile.write('\n')
        # print(f"{i} {info.tag} : {info.text}")
        i+=1
    infoFile.close()

In [14]:
def getBusTime(routeId):
    # 첫차 출발시간과 막차 정류장 도착시간을 받습니다.
    # ISSUE1: 막차 정류장 도착시간이 실제 시간과 차이가 많이납니다.
    # ISSUE2: 자정이 넘어가면 하루를 더해줘야 합니다. 자정이 넘어갔다는 사실을 어떻게 알게 할까요? 
    #         >>> 출발 시간보다 작으면 하루를 더해주고 출발시간보다 크면 날짜를 그대로 사용합니다.
    try: 
        info_df = readInfoCSV(routeId)
    except:
        makeInfoFile(routeId)
        info_df = readInfoCSV(routeId)
        
    upFirstTime = info_df.loc['upFirstTime'].values[0]
    downLastTime = info_df.loc['upLastTime'].values[0]

    startDate = getNowTime()
    if(int(upFirstTime.replace(':','')) < int(downLastTime.replace(':',''))):
        endDate = startDate
    else:
        endDate = startDate + datetime.timedelta(days=1)

    startDate = startDate.isoformat()[:11] + upFirstTime + ':00.000000+09:00'
    endDate = endDate.isoformat()[:11] + downLastTime + ':00.000000+09:00'
    todayDate = getNowTime().isoformat()[:11] + '00:00:00.000000+09:00'

    startDatetime = datetime.datetime.strptime(startDate, '%Y-%m-%dT%H:%M:%S.%f%z') - datetime.timedelta(minutes=10) # buffer time 입니다.
    endDatetime = datetime.datetime.strptime(endDate, '%Y-%m-%dT%H:%M:%S.%f%z') + datetime.timedelta(days=0)
    todayDatetime = datetime.datetime.strptime(todayDate, '%Y-%m-%dT%H:%M:%S.%f%z')    
    
    return {routeId:{'start':startDatetime, 'end':endDatetime, 'today':todayDatetime}}

In [15]:
def makeScheduleDict(routeIdList):
    global scheduleDict
    try:
        scheduleDict
    except:
        scheduleDict = {}
    todayTime = getNowTime().isoformat()[:11] + '00:00:00.000000+09:00'
    scheduleDict['today'] = datetime.datetime.strptime(todayTime, '%Y-%m-%dT%H:%M:%S.%f%z')
    for routeId in routeIdList:
        if(routeId in scheduleDict):
            # print("continue")
            continue
        tempDict = getBusTime(routeId)
        scheduleDict = {**scheduleDict, **tempDict} # Python 3.5+
        # print("add-scheduledict")
    return scheduleDict

In [16]:
def makeSwitchDict(routeIdList):
    global scheduleDict, switchDict
    
    try:
        switchDict
    except:
        switchDict = {}
        
    curTime = getNowTime()
    for routeId in routeIdList:
        if(routeId in switchDict):
            continue
        if(curTime < scheduleDict[routeId]['start']):
            switch = False
        elif(scheduleDict[routeId]['start'] <= curTime < scheduleDict[routeId]['end']):
            switch = True
        else:
            scheduleDict[routeId]['start'] + datetime.timedelta(days=1)
            scheduleDict[routeId]['end'] + datetime.timedelta(days=1)
            switch = False
        switchDict[routeId] = switch
    return switchDict

In [17]:
def makeTxtFileDict(routeIdList, subCategory):
    txtFileDict = {}
    #txtFileDict['today'] = 
    for routeId in routeIdList:
        txtFileDict = makeTextFile(subCategory, routeId, txtFileDict)
    return txtFileDict

In [18]:
def updateRouteId(routeId):
    global scheduleDict, dttFileDict, logFileDict
    scheduleDict[routeId]['start'] += datetime.timedelta(days=1)
    scheduleDict[routeId]['end'] += datetime.timedelta(days=1)
    scheduleDict[routeId]['today'] += datetime.timedelta(days=1)
    dttFileDict = makeTextFile('dtt', routeId, dttFileDict)
    logFileDict = makeTextFile('dtt-log', routeId, logFileDict)

In [19]:
def getSwitch(routeId):    
    global scheduleDict, switchDict, dttFileDict, logFileDict
    nowTime = getNowTime()
    logFile = open(logFileDict['today'], "a")
    rlogFile = open(logFileDict[routeId], "a")
    
    #print(nowTime, switchDict[routeId], nowTime > scheduleDict[routeId]['start'], nowTime > scheduleDict[routeId]['end'])

    if(not switchDict[routeId]): #BOOL 1 - switchDict[routeId] : FALSE
        if(nowTime > scheduleDict[routeId]['start']): #BOOL 2 - 첫차 : TRUE
            #print('State #1', end='\t') # State : 1...
            logFile.write(f'\t\t {nowTime.strftime("%H:%M")}\tS#1\t{routeId}\n')            
            rlogFile.write(f'\t\t {nowTime.strftime("%H:%M")}\tS#1\t{routeId}\n')
            logFile.close()
            switchDict[routeId] = True
            
            return False # FTF_ > FALSE

        else: #BOOL 2 - 첫차 : FALSE
            #print('State #0', end='\t') # State : 0...            
            rlogFile.write('State #0\t') # State : 0...

            return False # FFF_ > FALSE

    if(switchDict[routeId]): #BOOL 1 - switchDict[routeId] : TRUE
                             #BOOL 2 - 첫차 : TRUE
        xtree = openAPICall(routeId) # 함수 [ openAPICall ] 호출... xtree를 생성하였습니다.
        try:
            resultCode = int(xtree.find('msgHeader').find('resultCode').text)
            
        except:
            #print('State ERR', end='\t')
            rlogFile.write('State ERR\t')

            return None
        
        if(nowTime > scheduleDict[routeId]['end']): #BOOL 3 - 막차 : TRUE
 
                #print(resultCode)
                if(resultCode): #BOOL 4 - result code : TRUE
                    logFile.write(f'\t\t {nowTime.strftime("%H:%M")}\tS#3\t{routeId}\n')
                    logFile.close()
                    rlogFile.write(f'\t\t {nowTime.strftime("%H:%M")}\tS#3\t{routeId}\n')
                    rlogFile.close()
                    
                    switchDict[routeId] = False
                    
                    updateRouteId(routeId)
                    
                    return False # TTTT > FALSE
                
                else: #BOOL 4 - result code : False ... 운행중
                    if(getNowTime() > scheduleDict[routeId]['start'] + datetime.timedelta(days=1)):
                        #BOOL 4 - result code : not False > True
                        #BOOL 2' : 다음날 첫차 : TRUE 
                        logFile.write(f'\t\t {nowTime.strftime("%H:%M")}\tS#5\t{routeId}\n')
                        rlogFile.write(f'\t\t {nowTime.strftime("%H:%M")}\tS#5\t{routeId}\n')

                        updateRouteId(routeId)
                
                #BOOL 4 - result code : FALSE
                #print('State #4', end='\t') # State : 4...
                rlogFile.write(f'\t\t {nowTime.strftime("%H:%M")}\tS#4\t{routeId}\n')
                
                return xtree # TTTF > xtree                     
           
        else: #BOOL 3 - 막차 : FALSE
            if(resultCode): #BOOL 4 - result code : TRUE
                logFile.write(f'\t\t {nowTime.strftime("%H:%M")}\tS#2-1\t{routeId}\n')
                rlogFile.write(f'\t\t {nowTime.strftime("%H:%M")}\tS#2-1\t{routeId}\n')

                logFile.close()
                #print('State #2-1', end='\t') # State : 2-1...
                
                return False # TTFT
            
            else: #BOOL 4 - result code : FALSE
                #print('State #2-2', end='\t') # State : 2-2...
                rlogFile.write(f'\t\t {nowTime.strftime("%H:%M")}\tS#2-2\t{routeId}\n')

                return xtree # TTFF > xtree
    
    logFile.close()

In [20]:
def parser(routeId):
    global scheduleDict, switchDict, dttFileDict, logFileDict 
    dttFile = open(dttFileDict[routeId], "a", encoding = 'utf-8', newline = '\n')
    logFile = open(logFileDict[routeId], "a", encoding = 'utf-8', newline = '\n')
    # print(dttFileDict[routeId])
    # print(routeId, end='\t')
    xtree = getSwitch(routeId)
    if(None):
        return
    if(xtree):
        msgHeader = xtree[1]
        queryTime = msgHeader[0]
        msgBody = xtree[2]
        
        for busLocationList in msgBody:
            busLocationValueList = []
            dttFile.write(getNowTime().isoformat()[:-13])
            dttFile.write(' ')
            for busLocation in busLocationList:
                # print(busLocation.text, end=' ')
                dttFile.write(busLocation.text)
                dttFile.write(' ')
            # print('\n')
            dttFile.write("\n")
    else:
        pass
        # dttFile.write(str(xtree))
        
    dttFile.close()
    logFile.close()

In [21]:
def parsingRouteIdList(routeIdList):
    global callCount
    tempCallCount = callCount
    pastTime = getNowTime()
    for routeId in routeIdList:

        parser(routeId)

    # print(f"누적 호출수: {callCount}\t소요시간: {getNowTime() - pastTime}")
    duration = 59 - (getNowTime() - pastTime).seconds
    if(duration<0):
        duration = 0
        logFile = open(logFileDict['today'], "a")
        logFile.write(f"{getNowTime()}\t{routeId}\tduration<0 : 노선 수 조정이 필요합니다.")
        logFile.close()
    time.sleep(duration)
    oneCycleCallCount = callCount - tempCallCount
    
    return (duration, oneCycleCallCount)

In [22]:
def updateToday():
    global scheduleDict, callCount, logFileDict
    
    tomorrowDate = scheduleDict['today'] + datetime.timedelta(days=1)
    todayDate = getNowTime()
    if(todayDate >= tomorrowDate):
        logFile = open(logFileDict['today'], "a")
        callCount = 0
        scheduleDict['today'] += datetime.timedelta(days=1)
        logFile.write(f"scheduleDict['today']를 업데이트 하였습니다: {scheduleDict['today']}\n")
        print(f"scheduleDict['today']:\t{scheduleDict['today']}")

        routeIdList = getRouteIdList()
        testRouteId = 204000046
        logFile.write(f"routeIdList를 업데이트 하였습니다: {routeIdList[0]}\n")
        
        scheduleDict = makeScheduleDict(routeIdList)
        logFile.write(f"scheduleDict를 업데이트 하였습니다: {scheduleDict[testRouteId]}\n")
        
        switchDict = makeSwitchDict(routeIdList)
        logFile.write(f"switchDict 업데이트 하였습니다: {switchDict[testRouteId]}\n")
        
        dttFileDict = makeTxtFileDict(routeIdList, 'dtt')
        logFile.write(f"dttFileDict[testRouteId]를 업데이트 하였습니다: {dttFileDict[testRouteId]}\n")
        
        logFileDict = makeTxtFileDict(routeIdList, 'dtt-log')
        logFile.write(f"logFileDict[routeId]를 업데이트 하였습니다: {logFileDict[testRouteId]}\n")
        
        logFileDict = makeOnlyTextFile(logFileDict, 'main-log')
        logFile.write(f"logFileDict-main-log를 업데이트 하였습니다: {logFileDict['today']}\n")
        logFile.close()
        
        return

In [23]:
subCategoryDict = {'dtt' : '/daily-time-table/', 
                   'dtt-log' : '/log/daily-time-table/', 
                   'rid' : '/route-id-list/',
                   'main-log' : '/log/main/',
                   'info' : '/BusInfo/'
                  }

In [24]:
KEY = 'yEaR%2F3MDedRSlVJL%2F2pxnVg0yre1N5VF3RZ%2FUAt56MJ7J2mNpfqhUvy05pXV0uhHTVY7DbyCR8xmMaDdYga67Q%3D%3D' # 종현
apiKey = unquote(KEY)
callCount = 0

In [25]:
isTest = int(input("테스트 버전 입니까?... True[1] OR False[0] : ")) 

##############################
##############################
                          ####
if(isTest):               ####
    category = '/TEST'    ####
                          ####
else:                     ####
    category = '/DATA'    ####
                          ####
##############################
##############################

테스트 버전 입니까?... True[1] OR False[0] :  0


In [26]:
routeIdList = getRouteIdList()
print(f"{routeIdList} 생성을 완료하였습니다.")

[200000104, 200000108, 200000109, 200000119, 200000120, 200000145, 200000175, 200000193, 200000205, 204000046, 204000056, 204000057, 204000059, 204000065, 204000081, 204000082, 205000002, 213000019, 213000024, 216000026, 218000010, 218000011, 219000006, 221000003, 221000033, 222000046, 222000075, 222000076, 222000078, 222000107, 222000137, 222000169, 227000019, 227000038, 227000039, 228000176, 228000177, 228000181, 228000263, 228000388, 228000389, 229000023, 229000028, 229000028, 229000072, 229000097, 229000111, 229000112, 232000047, 232000072, 232000072, 232000073, 232000081, 232000092, 232000098, 233000031, 233000125, 233000131, 233000135, 233000142, 233000258, 233000265, 234000002, 234000011, 234000013, 234000031, 234000042, 234000050, 234000075, 234000079, 234000130, 234000310, 234000313, 234000324, 234000878, 234000882, 234000884, 234001203, 234001204, 234001244, 234001511, 234001516, 234001609, 236000050, 229000266, 229000263] 생성을 완료하였습니다.


In [27]:
scheduleDict = makeScheduleDict(routeIdList)

In [28]:
switchDict = makeSwitchDict(routeIdList)

In [29]:
dttFileDict = makeTxtFileDict(routeIdList, 'dtt')

In [30]:
logFileDict = makeTxtFileDict(routeIdList, 'dtt-log')

In [31]:
logFileDict = makeOnlyTextFile(logFileDict, 'main-log')

folderPath:	/home/ubuntu/Data-Structure-Project/AWS/DATA/log/main/
todayDate:	2020-12-29
filePath:	/home/ubuntu/Data-Structure-Project/AWS/DATA/log/main/2020-12-29.txt


In [32]:
# 오류 노선ID 확인용
TEST_ROUTEID = 204000056
print(f"getNowTime:\t{getNowTime()}", f"start:\t{scheduleDict[TEST_ROUTEID]['start']}", f"end:\t{scheduleDict[TEST_ROUTEID]['end']}", f"today:\t{scheduleDict[TEST_ROUTEID]['today']}", sep='\n')
print(f"scheduleDict:\t{scheduleDict[TEST_ROUTEID]}")
print(f"switchDict:\t{switchDict[TEST_ROUTEID]}")
print(f"dttFileDict:\t{dttFileDict[TEST_ROUTEID]}")
print(f"logFileDict:\t{logFileDict[TEST_ROUTEID]}")
print(f"다음날 첫차:\t{scheduleDict[TEST_ROUTEID]['start'] + datetime.timedelta(days=1)}")
print(f"logFileDict['today']:\t{logFileDict['today']}")
print(f"scheduleDict['today']:\t{scheduleDict['today']}")

getNowTime:	2020-12-29 03:24:48.794413+09:00
start:	2020-12-29 04:50:00+09:00
end:	2020-12-29 21:50:00+09:00
today:	2020-12-29 00:00:00+09:00
scheduleDict:	{'start': datetime.datetime(2020, 12, 29, 4, 50, tzinfo=datetime.timezone(datetime.timedelta(seconds=32400))), 'end': datetime.datetime(2020, 12, 29, 21, 50, tzinfo=datetime.timezone(datetime.timedelta(seconds=32400))), 'today': datetime.datetime(2020, 12, 29, 0, 0, tzinfo=datetime.timezone(datetime.timedelta(seconds=32400)))}
switchDict:	False
dttFileDict:	/home/ubuntu/Data-Structure-Project/AWS/DATA/daily-time-table/204000056/2020-12-29.txt
logFileDict:	/home/ubuntu/Data-Structure-Project/AWS/DATA/log/daily-time-table/204000056/2020-12-29.txt
다음날 첫차:	2020-12-30 04:50:00+09:00
logFileDict['today']:	/home/ubuntu/Data-Structure-Project/AWS/DATA/log/main/2020-12-29.txt
scheduleDict['today']:	2020-12-29 00:00:00+09:00


In [None]:
while(True):
    pastTime = getNowTime()
    updateToday()
    (duration, oneCycleCallCount) = parsingRouteIdList(routeIdList)
    strTime = getNowTime().strftime("%y/%m/%d %H:%M")
    print(f"{strTime}\t호출:{oneCycleCallCount:3d} 누적호출:{callCount:6d} 소요시간: {str(getNowTime() - pastTime)[2:-4]}")    
    logFile = open(logFileDict['today'], "a")
    logFile.write(f"{strTime}\t호출:{oneCycleCallCount:3d} 누적호출:{callCount:6d} 소요시간: {str(getNowTime() - pastTime)[2:-4]}\n")
    logFile.close()