In [None]:
!pip install tika

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import requests,lxml.html,re,os,sys,copy
import pandas as pd
from IPython.display import display
from functools import reduce
import pathlib
from tika import parser as tikaparser
from google.colab import data_table
data_table.enable_dataframe_formatter()

In [None]:
class enrollmentCrawler:
  def __init__(self):
    self.regexPatternForEnrollment = {
      'schoolAndYr': '([^\d]+)\s*(\d+)\s*學年度.+選結果公告',
      'singleDepartmentsInHtml': '(<H3><a name=\"\d+\">(.|\n)*?)(?=(<H3>|<\/BODY>))',
      'futureStudentRough': '(\[備\s?\d+\])?(\d{4,})([^\-]+){0,}$', #[\u4E00-\u9FFF\uF900-\uFAFF○Ｏ] 比對會失敗，稍晚檢查
      'departmentRoughName': '(\d+)-+(.+)',
      'enrollmentsRoughIdentity': '(.+(生|⽣|民|⺠))((正|備)取).*合計\s*(\d+)\s*名',
    }
  def swapPositionForElements(self, srcobj, indexa, indexb):
    if isinstance(srcobj, pd.DataFrame):
      rowa, rowb = srcobj.iloc[indexa], srcobj.iloc[indexb].copy()
      srcobj.iloc[indexb] = rowa
      srcobj.iloc[indexa] = rowb
      return srcobj
    if isinstance(srcobj, pd.core.series.Series):
      rowa, rowb = srcobj.iloc[indexa], copy.deepcopy(srcobj.iloc[indexb])
      srcobj.iloc[indexb] = rowa
      srcobj.iloc[indexa] = rowb
      return srcobj
    if isinstance(srcobj, list):
      elemb = copy.deepcopy(srcobj[indexb])
      srcobj[indexb] = srcobj[indexa]
      srcobj[indexa] = elemb
    return srcobj
  def requesturl_get_ret(self, url, params=None, encoding='utf-8', **kwargs):
    r = requests.get(url, params=params, **kwargs)
    if encoding is not None:
      r.encoding = encoding
    return r
  def detectEncodingFromHtml(self, srchtml, replace_with_encoding='UTF-8'):
    pattern = '(<meta\shttp-equiv=\"Content-Type\".+text/html;\s?charset=)(.+)(\"[\s\/]*>)'
    if replace_with_encoding is not None:
      replacedHtml = re.sub(pattern, '\\1{}\\3'.format(replace_with_encoding), srchtml)
      return replacedHtml
    else:
      matchres = re.search(pattern, srchtml)
      return matchres.group(2)
  def filterOutEmptyElements(self, srcList):
    return [element for element in srcList if element not in ['\u3000','',' ','  ',None]]
  def extractSchoolAndSchoolYearFromRegexGroup(self, schoolAndYrRegexGroup):
    return schoolAndYrRegexGroup.group(1), schoolAndYrRegexGroup.group(2)
  def cleanEnrollmentHtmlToRecords(self, srcHtml):
    try:
      srcHtml = srcHtml.text
    except:
      pass
    enrollmentsList = []
    htmlParsedNodes = lxml.html.fromstring(srcHtml)
    title = htmlParsedNodes.xpath('/html/body/center/h2/text()')[0]
    schoolAndYrPattern = self.regexPatternForEnrollment['schoolAndYr']
    schoolAndYr = re.search(schoolAndYrPattern, title)
    school, schoolyr = self.extractSchoolAndSchoolYearFromRegexGroup(schoolAndYr)
    singleDepartments = re.findall(self.regexPatternForEnrollment['singleDepartmentsInHtml'],srcHtml)
    singleDepartments = [s[0] for s in singleDepartments]
    for singleDepartment_i,singleDepartment in enumerate(singleDepartments):
      if re.search('table',singleDepartment) is None:
        pass
      else:
        parsedSingleDepartment = lxml.html.fromstring(singleDepartment)
        departmentRoughName = parsedSingleDepartment.xpath('//h3//text()')[0]
        #print(f'now in {departmentRoughName}')
        enrollmentsTypeTables = parsedSingleDepartment.xpath('//table[@class="enterTable"]')
        for enrollmentsTypeTable in enrollmentsTypeTables:
          try:
            enrollmentsRoughIdentity = enrollmentsTypeTable.xpath('./thead//text()')[0]
          except Exception as e:
            #print('enrollmentsTypeTable is {}'.format(lxml.html.tostring(enrollmentsTypeTable) ))
            raise(e)
          #print(f'now identity is {enrollmentsRoughIdentity}')
          futureStudents = enrollmentsTypeTable.xpath('./tboy//text()')
          futureStudents = self.filterOutEmptyElements(futureStudents)
          for futureStudent_i,futureStudent in enumerate(futureStudents):
            futureStudentSingleRecord = {
                'school':school, 'schoolyr':schoolyr, 'departmentRoughName':departmentRoughName,
                'enrollmentsRoughIdentity': enrollmentsRoughIdentity,
                'futureStudentRough': futureStudent
            }
            enrollmentsList.append(futureStudentSingleRecord)
    return enrollmentsList
  def appendStudentDepartmentIdentityInf(self, srcdf, srcRegexInf):
    src_target_relations = {
      'futureStudentRank':['futureStudentsRegexGroups',1],
      'futureStudentID':['futureStudentsRegexGroups',2],
      'futureStudentName':['futureStudentsRegexGroups',3],
      'departmentID':['departmentRegexGroups',1],
      'departmentName':['departmentRegexGroups',2],
      'enrollmentIdentity':['identityRegexGroups',1],
      'enrollmentIdentityPriority':['identityRegexGroups',3],
    }
    for key,value in src_target_relations.items():
      try:
        srcdf[key] = srcRegexInf[value[0]].apply(lambda fstug:fstug.group(value[1]))
      except Exception as e:
        print(f'error in handling {key} and {value}')
        for emi,elem in enumerate(srcRegexInf[value[0]]):
          print(srcdf.iloc[emi])
          print(elem[emi])
        raise(e)
    return srcdf
  def generateRegexGroups(self, eDF):
    """
    input: 要比對regex pattern已取出學生、系所、入學身分別資訊的df，依據futureStudentRough departmentRoughName enrollmentsRoughIdentity 或 texts欄位
    output: 比對結果
    """
    defaultTargetColumn = 'texts'
    returnRegexGroups = {}
    for srccolumn in ['futureStudentRough','departmentRoughName','enrollmentsRoughIdentity']:
      targetcolumn = srccolumn+'RegexGroups'
      checkcolumn = srccolumn if srccolumn in eDF.columns else defaultTargetColumn
      try:
        returnRegexGroups[targetcolumn] = eDF[checkcolumn].apply(lambda department: re.search(self.regexPatternForEnrollment[srccolumn],department))
      except Exception as e:
        for rowi,ridentity in enumerate(eDF[checkcolumn].tolist()):
          print(f'rid is {ridentity}')
          display(eDF.loc[rowi,:])
          re.search(self.regexPatternForEnrollment[checkcolumn],ridentity)
        raise(e)
    return returnRegexGroups.values()
  def cleanEnrollmentDF(self, eDF):
    """
    input: 有考生簡略資訊的df
    """
    eDF = eDF.reset_index(drop=True)
    for rowi,regexGroup in enumerate(eDF['futureStudentsRegexGroups']):
      try:
        displaystr = ('group1 is {}, group2 is {}, group3 is {}'.format(
          regexGroup.group(1),
          regexGroup.group(2),
          regexGroup.group(3)
        ))
      except Exception as e:
        print('error at {} for {}, dropped.'.format(e, eDF.iloc[rowi,:]))
        raise(e)
    
    eDF = self.appendStudentDepartmentIdentityInf(eDF, eDF)
    eDF = eDF.loc[:,['school','schoolyr','futureStudentID','futureStudentName','departmentID','departmentName','enrollmentIdentity','enrollmentIdentityPriority','futureStudentRank']]
    return eDF
  def enrollmentWebPageToDF(self, srcurl):
    if not isinstance(srcurl, list):
      srcUrls = [srcurl]
    else:
      srcUrls = srcurl
    srcHtmls = [self.requesturl_get_ret(url, encoding=None) for url in srcUrls]
    srcEncodings = [self.detectEncodingFromHtml(srcHtml.text,replace_with_encoding=None) for srcHtml in srcHtmls]
    for i,encoding in enumerate(srcEncodings):
      srcHtmls[i].encoding = encoding
    enrollmentsDF = [self.cleanEnrollmentHtmlToRecords(srcHtml) for srcHtml in srcHtmls]
    enrollmentsDF = reduce(lambda x,y: x+y, enrollmentsDF)
    enrollmentsDF = pd.DataFrame.from_records(enrollmentsDF)
    
    enrollmentsDF['futureStudentsRegexGroups'], enrollmentsDF['departmentRegexGroups'], enrollmentsDF['identityRegexGroups'] = self.generateRegexGroups(enrollmentsDF)
    enrollmentsDF = self.cleanEnrollmentDF(enrollmentsDF)
    return enrollmentsDF
  ######################
  ## PDFs
  ######################
  def extract_texts_in_pdf(self, targetPdfFile):#, furtherprocess=True
    """
    returns a list of strings
    """
    return tikaparser.from_file(str(targetPdfFile))['content'].split("\n")
  def generateNecessaryInfDfFromPdf(self, pdffile):
    """
    輸出 粗糙的入學資訊
    """
    textsInPdfraw = self.extract_texts_in_pdf(pdffile)
    textsInPdfraw = self.filterOutEmptyElements(textsInPdfraw)
    textsInPdfraw = [text.strip() for text in textsInPdfraw]
    textsInPdfraw = [text for text in textsInPdfraw if re.search('file:\/\/\/', text) is None]
    textsInPdfraw = [text for text in textsInPdfraw if re.search('^\d+\/\d+$', text) is None]
    textsInPdfraw = [text for text in textsInPdfraw if re.search('^第\d頁，共\d頁$', text) is None]
    textsInPdfraw = self.filterOutEmptyElements(textsInPdfraw)

    textsInPdf = []
    for text in textsInPdfraw:
      if re.search(self.regexPatternForEnrollment['futureStudentRough'],text) is None:
        textsInPdf.append(text)
      else:
        textsInPdf.extend( self.filterOutEmptyElements( re.split(r'\s?((?:\[備[\s\d ]+\])?(?:\d+[\s ]?[^\s]+))', text) ) )
    tempDfOfText = pd.DataFrame({'texts':textsInPdf})
    
    tempDfOfText['futureStudentsRegexGroups'], tempDfOfText['departmentRegexGroups'], tempDfOfText['identityRegexGroups'] = self.generateRegexGroups(tempDfOfText)
    schoolAndYrRgexGroups = tempDfOfText['texts'].apply(lambda text: re.search(self.regexPatternForEnrollment['schoolAndYr'],text))
    school, schoolyr = self.extractSchoolAndSchoolYearFromRegexGroup(schoolAndYrRgexGroups[pd.Series.notnull(schoolAndYrRgexGroups)].iloc[0])
    tempDfOfText['enrollmentInfIsDepartmentRoughName'] = tempDfOfText['departmentRegexGroups'].apply(lambda dep: dep is not None )
    tempDfOfText['enrollmentsInfIsRoughIdentity'] = tempDfOfText['identityRegexGroups'].apply(lambda identity: identity is not None )
    tempDfOfText['enrollmentsInfIsFutureStudentRough'] = tempDfOfText['futureStudentsRegexGroups'].apply(lambda fstud: fstud is not None )
    #處理pdfminer把置中文字和靠左文字順序顛倒的問題
    #例如 212011--航空光機電系(電機與電⼦群電機類) 中 一般生正取 合計 1 名 出問題 解決方式是把每一個比對到的置中文字區塊往前移
    #for roughtIdentityPos in tempDfOfText[tempDfOfText['enrollmentsInfIsRoughIdentity']==True].index:
    #  tempDfOfText = swapPositionForElements(tempDfOfText, roughtIdentityPos, roughtIdentityPos-1)
    
    enrollmentsInfIdentityGroups = tempDfOfText.loc[:,['texts','enrollmentsInfIsRoughIdentity','identityRegexGroups']]
    enrollmentsInfIdentityGroups['enrollmentsInfIdentityAmount'] = tempDfOfText['identityRegexGroups'].apply(lambda res: None if res is None else res.group(5))#.astype('int32')
    enrollmentsInfIdentityGroups['enrollmentsInfIdentityType'] = tempDfOfText['identityRegexGroups'].apply(lambda res: None if res is None else res.group(1))
    enrollmentsInfIdentityGroups['enrollmentsInfIdentityQualifiedType'] = tempDfOfText['identityRegexGroups'].apply(lambda res: None if res is None else res.group(3))
    enrollmentsInfIdentityGroups = enrollmentsInfIdentityGroups.loc[enrollmentsInfIdentityGroups['enrollmentsInfIsRoughIdentity']==True,:].drop(columns=['enrollmentsInfIsRoughIdentity']).reset_index(drop=True)

    idxForDepartmentOffset = tempDfOfText['enrollmentInfIsDepartmentRoughName'][tempDfOfText['enrollmentInfIsDepartmentRoughName']==True].index.tolist()
    idxForDepartmentEnd = idxForDepartmentOffset[1:]+[tempDfOfText.shape[0]-1]
    tempDFsOfEnrollmentByDepartment = [tempDfOfText[idxForDepartmentOffset[i]:idxForDepartmentEnd[i]] for i,pos in enumerate(idxForDepartmentOffset)]
    return {
      'tempDfOfText':tempDfOfText,
      'enrollmentsInfIdentityGroupsNoDuplicates':enrollmentsInfIdentityGroups.drop_duplicates().reset_index(drop=True),
      'tempDFsOfEnrollmentByDepartment':tempDFsOfEnrollmentByDepartment,
      'school':school,
      'schoolyr':schoolyr,
    }
  def tempDepartmentsEnrollmentDfClean(self, inputParamDict):
    """
    輸入：pdf中的各個粗糙的純文字學系招生身分別原始表格文字清單 身分別資訊
    把每個表格加上入學身分+系所資訊
    """
    srcdfs = list(inputParamDict['tempDFsOfEnrollmentByDepartment']) if not isinstance(inputParamDict['tempDFsOfEnrollmentByDepartment'], list) else inputParamDict['tempDFsOfEnrollmentByDepartment']
    returndfs = []
    for srcdf_i,srcdf in enumerate(srcdfs):
      tempSingleDepartmentEnrollmentDf = srcdf.reset_index(drop=True)#[tempDFofEnrollmentByDepartment[0]['enrollmentsInfIsRoughIdentity']==True]
      if len(tempSingleDepartmentEnrollmentDf.index)==0:
        continue
      try:
        tempSingleDepartmentEnrollmentDf['departmentRoughName'] = tempSingleDepartmentEnrollmentDf.loc[0,'texts']
        tempSingleDepartmentEnrollmentDf['departmentRegexGroups'] = tempSingleDepartmentEnrollmentDf.loc[0,'departmentRegexGroups']
      except Exception as e:
        print(f'error at {srcdf_i}')
        display(tempSingleDepartmentEnrollmentDf)
        raise(e)
        sys.exit()
      #debug用
      #if re.search('212001',tempSingleDepartmentEnrollmentDf.loc[0,'texts'])==None:
      #  continue

      tempSingleDepartmentEnrollmentIdentities = tempSingleDepartmentEnrollmentDf[tempSingleDepartmentEnrollmentDf['enrollmentsInfIsRoughIdentity']==True].loc[:,['texts']]
      tempSingleDepartmentEnrollmentIdentities = pd.merge(
          left=tempSingleDepartmentEnrollmentIdentities,
          right=inputParamDict['enrollmentsInfIdentityGroupsNoDuplicates'])

      #解決如212006--資訊工程系遊戲設計與競技組(電機與電子群資電類)中 一般生正取 重複出現的問題
      tempSingleDepartmentEnrollmentIdentities['duplicated'] = tempSingleDepartmentEnrollmentIdentities.duplicated(subset=['enrollmentsInfIdentityType','enrollmentsInfIdentityQualifiedType'])
      duplicatedIdentities = tempSingleDepartmentEnrollmentIdentities[tempSingleDepartmentEnrollmentIdentities['duplicated']].loc[:,'texts']
      tempSingleDepartmentEnrollmentDf = tempSingleDepartmentEnrollmentDf[~tempSingleDepartmentEnrollmentDf['texts'].isin(duplicatedIdentities)]

      tempSingleDepartmentEnrollmentDf = tempSingleDepartmentEnrollmentDf.loc[tempSingleDepartmentEnrollmentDf['enrollmentsInfIsFutureStudentRough']==True].reset_index(drop=True)
      tempSingleDepartmentEnrollmentIdentities = tempSingleDepartmentEnrollmentIdentities[~tempSingleDepartmentEnrollmentIdentities['duplicated']]
      cumulative_row_i = 0
      tempSingleDepartmentEnrollmentDf['enrollmentsRoughIdentity'] = None
      for tempSingleDepartmentEnrollmentIdentities_i in tempSingleDepartmentEnrollmentIdentities.index:
        addnum = int(tempSingleDepartmentEnrollmentIdentities['enrollmentsInfIdentityAmount'][tempSingleDepartmentEnrollmentIdentities_i])
        identityType = tempSingleDepartmentEnrollmentIdentities['texts'][tempSingleDepartmentEnrollmentIdentities_i]
        identityRegexGroup = tempSingleDepartmentEnrollmentIdentities['identityRegexGroups'][tempSingleDepartmentEnrollmentIdentities_i]
        try:
          tempSingleDepartmentEnrollmentDf.loc[cumulative_row_i:cumulative_row_i+addnum, 'enrollmentsRoughIdentity'] = identityType
          tempSingleDepartmentEnrollmentDf.loc[cumulative_row_i:cumulative_row_i+addnum, 'identityRegexGroups'] = identityRegexGroup
          cumulative_row_i = cumulative_row_i+addnum
        except Exception as e:
          display('error at addnum {} identityType {}'.format(addnum, identityType))
          display(tempSingleDepartmentEnrollmentIdentities)
          raise(e)
      tempSingleDepartmentEnrollmentDf = tempSingleDepartmentEnrollmentDf.drop(columns=[col for col in tempSingleDepartmentEnrollmentDf.columns if re.search('InfIs', col) is not None])
      tempSingleDepartmentEnrollmentDf = tempSingleDepartmentEnrollmentDf.rename(columns={'texts':'futureStudentRough'})
      tempSingleDepartmentEnrollmentDf['school'], tempSingleDepartmentEnrollmentDf['schoolyr'] = inputParamDict['school'], inputParamDict['schoolyr']
          
      returndfs.append(tempSingleDepartmentEnrollmentDf)
    return pd.concat(returndfs).reset_index(drop=True)

  def generateEnrollmentDfFromPdf(self, pdffiles):
    if not isinstance(pdffiles, list):
      pdffiles = [pdffiles]
    overallResDF = []
    for pdffile in pdffiles:
      resDF = self.generateNecessaryInfDfFromPdf(pdffile)
      resDF = self.tempDepartmentsEnrollmentDfClean(resDF)
      resDF = self.cleanEnrollmentDF(resDF)
      overallResDF.append(resDF)
    overallResDF = pd.concat(overallResDF).reset_index(drop=True)
    return overallResDF

# 測試從榜單PDF檔案中清理成dataframe並存成crawledData.csv

In [None]:
testInputPdfFiles = ["1 萬能科大.pdf", "2 淡江大學.pdf", "3 義守大學.pdf"]#"1 萬能科大.pdf", "2 淡江大學.pdf", "3 義守大學.pdf"
testInputPdfFiles = [(pathlib.Path() / f).resolve() for f in testInputPdfFiles]
crawlerInstance = enrollmentCrawler()
crawledData = crawlerInstance.generateEnrollmentDfFromPdf(testInputPdfFiles)
display(crawledData)
crawledData.to_csv('crawledData.csv')

Unnamed: 0,school,schoolyr,futureStudentID,futureStudentName,departmentID,departmentName,enrollmentIdentity,enrollmentIdentityPriority,futureStudentRank
0,萬能科技⼤學,111,52010383,翁Ｏ媛,212001,室內設計與營建科技系室內設計與管理組(機械群),⼀般⽣,正取,
1,萬能科技⼤學,111,56010171,劉Ｏ誠,212001,室內設計與營建科技系室內設計與管理組(機械群),⼀般⽣,正取,
2,萬能科技⼤學,111,56010235,孫Ｏ祥,212001,室內設計與營建科技系室內設計與管理組(機械群),⼀般⽣,正取,
3,萬能科技⼤學,111,52010275,徐Ｏ哲,212001,室內設計與營建科技系室內設計與管理組(機械群),⼀般⽣,正取,
4,萬能科技⼤學,111,57010135,郭Ｏ誠,212001,室內設計與營建科技系室內設計與管理組(機械群),⼀般⽣,正取,
...,...,...,...,...,...,...,...,...,...
743,義守大學,111,65170237,陳Ｏ詮,814014,廚藝學系(餐旅群),一般生,備取,[備 3]
744,義守大學,111,64200001,吳Ｏ俞,814015,大眾傳播學系(藝術群影視類),一般生,正取,
745,義守大學,111,67200025,林Ｏ晨,814015,大眾傳播學系(藝術群影視類),一般生,正取,
746,義守大學,111,70200180,秦ＯＯ,814015,大眾傳播學系(藝術群影視類),一般生,正取,


#測試從榜單網頁中清理成dataframe並存成crawledData.csv

In [None]:
srcUrls = ['https://recruit.ctu.edu.tw/var/file/44/1044/img/1110627001.html',
 'https://rd.asia.edu.tw/uploads/archive_file_multiple/file/62b940418199fb55206131b4/index.html',
 'https://www.cyut.edu.tw/~recruit/Interviewstudent/index.html']
crawlerInstance = enrollmentCrawler()
crawledData = crawlerInstance.enrollmentWebPageToDF(srcUrls)
display(crawledData)
crawledData.to_csv('crawledData.csv')

Unnamed: 0,school,schoolyr,futureStudentRank,futureStudentID,futureStudentName,departmentID,departmentName,enrollmentIdentity,enrollmentIdentityPriority
0,建國科技大學,111,,61010512,張Ｏ銓,213001,機械工程系(機械群),一般生,正取
1,建國科技大學,111,,61010492,高Ｏ程,213001,機械工程系(機械群),一般生,正取
2,建國科技大學,111,,65010094,許Ｏ婷,213001,機械工程系(機械群),一般生,正取
3,建國科技大學,111,,65010022,林Ｏ紘,213001,機械工程系(機械群),一般生,正取
4,建國科技大學,111,,65010153,詹Ｏ竣,213001,機械工程系(機械群),一般生,正取
...,...,...,...,...,...,...,...,...,...
3755,朝陽科技大學,111,[備4],71540005,洪Ｏ嶸,201058,飛行與民航人員技術系(商業與管理群),一般生,備取
3756,朝陽科技大學,111,[備5],66090416,楊Ｏ諭,201058,飛行與民航人員技術系(商業與管理群),一般生,備取
3757,朝陽科技大學,111,[備6],61530133,洪Ｏ惟,201058,飛行與民航人員技術系(商業與管理群),一般生,備取
3758,朝陽科技大學,111,[備2],63090145,楊Ｏ嘉,201058,飛行與民航人員技術系(商業與管理群),一般生,備取


### 其他測試Debug

In [None]:
splitPattern = r'\s{1,2}((\[備\d+\])?\d+\s?[\u4E00-\u9FFF\uF900-\uFAFF○Ｏ]+)'
splitPattern = r'\s{1,2}(\[備\s?\d+\])?(\d{4,})([^\-]+){0,}'
splitPattern = r'\s?((?:\[備[\s\d ]+\])?(?:\d+[\s ]?[^\s]+))'
display(re.split(splitPattern, '70090589 戴Ｏ駿 52540098 吳Ｏ蓉 70091178 曾Ｏ鈺 75090151 馬Ｏ爾 69090113 魏Ｏ臻'))
display(re.split(splitPattern, '64070099黃Ｏ禎 52070065吳Ｏ梃'))
display(re.split(splitPattern, '[備2]52070134劉Ｏ珆 [備3]52070154林Ｏ柔 [備4]52070077彭Ｏ忻'))
display(re.split(splitPattern, '[備 1]52070427 秦Ｏ妡 [備 2]68070143 林Ｏ均'))

['',
 '70090589 戴Ｏ駿',
 '',
 '52540098 吳Ｏ蓉',
 '',
 '70091178 曾Ｏ鈺',
 '',
 '75090151 馬Ｏ爾',
 '',
 '69090113 魏Ｏ臻',
 '']

['', '64070099黃Ｏ禎', '', '52070065吳Ｏ梃', '']

['', '[備2]52070134劉Ｏ珆', '', '[備3]52070154林Ｏ柔', '', '[備4]52070077彭Ｏ忻', '']

['', '[備 1]52070427 秦Ｏ妡', '', '[備 2]68070143 林Ｏ均', '']