In [None]:
import requests
import json
import time
import copy
import re
import math
import unicodedata
from bs4 import BeautifulSoup as bs
import tqdm as tqdm

In [None]:
def web_request(method_name, url, dict_data, is_urlencoded=True):
    """Web GET or POST request를 호출 후 그 결과를 dict형으로 반환 """
    method_name = method_name.upper()  # 메소드이름을 대문자로 바꾼다
    if method_name not in ('GET', 'POST'):
        raise Exception('method_name is GET or POST plz...')

    if method_name == 'GET':  # GET방식인 경우
        response = requests.get(url=url, params=dict_data)
    elif method_name == 'POST':  # POST방식인 경우
        if is_urlencoded is True:
            response = requests.post(url=url, data=dict_data,
                                     headers={'Content-Type': 'application/x-www-form-urlencoded', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 Edg/109.0.1518.70'})
        else:
            response = requests.post(url=url, data=json.dumps(dict_data), headers={'Content-Type': 'application/json'})

    dict_meta = {'status_code': response.status_code, 'ok': response.ok, 'encoding': response.encoding,
                 'Content-Type': response.headers['Content-Type']}
    if 'json' in str(response.headers['Content-Type']):  # JSON 형태인 경우
        return {**dict_meta, **response.json()}
    else:  # 문자열 형태인 경우
        return {**dict_meta, **{'text': response.text}}

In [None]:
def 우리동네키움센터전체목록만들기(req_method, url, body):
    def htmlparse(list_html, labels):
        temp_list = []
        for item in list_html.select('div.board_box_ty02 li'):
            if item.select_one('p.box_info_p') != None:
                센터정보 = {}
                list_values = []
                detail_link = item.select_one('a')['onclick']
                detail_link = detail_link[detail_link.index('(')+1:detail_link.rindex(')')].replace('"','').split(',')
                idx = 0
                for inside_item in item.select('span'):
                    if inside_item.select_one('em') == None:
                        # 0: 센터명, 4: 태그
                        list_values.append(inside_item.text.replace('\n',','))
                    else:
                        # 1주소,2전화번호,3이용료
                        inside_item.find('em').decompose()
                        val = inside_item.text.strip().replace('\t', '').replace('\n','')
                        # 주소가 노멀라이즈 했는데 이상하게 표현됨...
                        #val = unicodedata.normalize("NFKD", val)
                        val = val
                        list_values.append(val)
                    idx = idx+1
            센터정보 = dict(zip(labels, list_values))
            센터정보['q_svcClCode'] = body['q_svcClCode']
            센터정보['q_fclty'] = detail_link[0]
            센터정보['q_fcltyId'] = detail_link[1]
            센터정보['ext_linkpage'] = detail_link[3]
            센터정보['_id'] = 'ObjectID()'
            temp_list.append(센터정보)
        return temp_list
    
    first_res = web_request(req_method, url, body)
    first_html = bs(first_res['text'], "html.parser")
    센터정보리스트 = []
    list_labels = ['센터명', '주소', '전화번호', '이용료', '태그']
    total_cnt = first_html.select_one('p.board_info').text
    total_cnt = total_cnt[total_cnt.index('전체'):total_cnt.index('건')]
    total_cnt = re.sub(r'[^0-9]', '', total_cnt)
    total_pages = math.ceil(int(total_cnt)/15)
    센터정보리스트 = htmlparse(first_html, list_labels)
    print('1페이지 센터정보생성')
    
    for idx in tqdm(range(2,total_pages+1)):
        body['q_currPage'] = idx
        res = web_request(req_method, url, body)
        html = bs(res['text'], "html.parser")
        temp = htmlparse(html, list_labels)
        센터정보리스트.extend(temp)
        print(str(idx)+'페이지 센터정보생성')
        
    return 센터정보리스트

In [None]:
def 상세화면조회함수(svcClCode, fcltyId, fclty):
    q_fclty = fclty
    q_clturEventEtcSn = ''
    q_gubun = ''
    q_fcltyId= ''
    if svcClCode == '1006' or svcClCode == '1008':
        pass
    else:
        if svcClCode == '2001':
            q_clturEventEtcSn = fcltyId
            q_gubun = '1001'
        elif svcClCode == '2002':
            q_clturEventEtcSn = fcltyId
            q_gubun = '1002'
        elif svcClCode == '2003' or svcClCode == '1007':
            q_clturEventEtcSn = fcltyId
        else:
            q_fcltyId = fcltyId
    body = []
    body = [('q_hiddenVal', 1), ('q_fcltyId', q_fcltyId), ('q_fclty', q_fclty), ('q_clturEventEtcSn', q_clturEventEtcSn), ('q_gubun',q_gubun)]
    response = web_request('POST', DETAIL_PAGE, body)
    res_html = bs(response['text'], "html.parser")
    axis_info = res_html.select_one('div.center_infoBox a.center_info_btn02')['onclick']
    axis_info = axis_info[axis_info.index('(')+1:axis_info.rindex(')')-1].translate(str.maketrans({"'":None}))
    return axis_info.split(',')[:2]

In [None]:
LIST_PAGE = 'https://icare.seoul.go.kr/icare/user/fcltyInfoManage/BD_selectFcltyInfoManageList.do'
DETAIL_PAGE = 'https://icare.seoul.go.kr/icare/user/fcltyInfoManage/BD_selectFcltyInfoManage.do'
REQ_POST = 'POST'

In [None]:
탭목록 = []
list_res = web_request(REQ_POST, LIST_PAGE, {'q_hiddenVal':'1','q_searchAt':'Y'})
list_html = bs(list_res['text'], "html.parser")
for item in list_html.select('div.mt_10 li'):
    if item.text == '전체': continue
    cd_val = item.select_one('a')['onclick']
    cd_val = cd_val[cd_val.index('(')+2:cd_val.index(')')-1]
    temp_dict = {'label':item.text,'q_hiddenVal':'1','q_searchAt':'Y','q_currPage':'1', 'q_rowPerPage':'15'
                 ,'q_fcltyClCode':'1005','q_tabVal':cd_val,'q_svcClCode':cd_val}
    
    if cd_val == '1008':
        temp_dict['q_moreSvc'] = '2002,2003'
    else:
        temp_dict['q_moreSvc'] = cd_val
    
    탭목록.append(temp_dict)

{'label': '우리동네 키움센터', 'q_hiddenVal': '1', 'q_searchAt': 'Y', 'q_currPage': '1', 'q_rowPerPage': '15', 'q_fcltyClCode': '1005', 'q_tabVal': '1001', 'q_svcClCode': '1001', 'q_moreSvc': '1001'}
{'label': '지역 아동센터', 'q_hiddenVal': '1', 'q_searchAt': 'Y', 'q_currPage': '1', 'q_rowPerPage': '15', 'q_fcltyClCode': '1005', 'q_tabVal': '1003', 'q_svcClCode': '1003', 'q_moreSvc': '1003'}
{'label': '초등돌봄교실', 'q_hiddenVal': '1', 'q_searchAt': 'Y', 'q_currPage': '1', 'q_rowPerPage': '15', 'q_fcltyClCode': '1005', 'q_tabVal': '1005', 'q_svcClCode': '1005', 'q_moreSvc': '1005'}
{'label': '공동육아방', 'q_hiddenVal': '1', 'q_searchAt': 'Y', 'q_currPage': '1', 'q_rowPerPage': '15', 'q_fcltyClCode': '1005', 'q_tabVal': '1002', 'q_svcClCode': '1002', 'q_moreSvc': '1002'}
{'label': '공동육아 나눔터', 'q_hiddenVal': '1', 'q_searchAt': 'Y', 'q_currPage': '1', 'q_rowPerPage': '15', 'q_fcltyClCode': '1005', 'q_tabVal': '1004', 'q_svcClCode': '1004', 'q_moreSvc': '1004'}
{'label': '방과후 아카데미', 'q_hiddenVal': '1', 'q_searchAt': 'Y', 'q_currPage': '1', 'q_rowPerPage': '15', 'q_fcltyClCode': '1005', 'q_tabVal': '1010', 'q_svcClCode': '1010', 'q_moreSvc': '1010'}
{'label': '영.유아 보육시설', 'q_hiddenVal': '1', 'q_searchAt': 'Y', 'q_currPage': '1', 'q_rowPerPage': '15', 'q_fcltyClCode': '1005', 'q_tabVal': '1006', 'q_svcClCode': '1006', 'q_moreSvc': '1006'}
{'label': '교육', 'q_hiddenVal': '1', 'q_searchAt': 'Y', 'q_currPage': '1', 'q_rowPerPage': '15', 'q_fcltyClCode': '1005', 'q_tabVal': '1008', 'q_svcClCode': '1008', 'q_moreSvc': '2002,2003'}
{'label': '지역보육정보', 'q_hiddenVal': '1', 'q_searchAt': 'Y', 'q_currPage': '1', 'q_rowPerPage': '15', 'q_fcltyClCode': '1005', 'q_tabVal': '1007', 'q_svcClCode': '1007', 'q_moreSvc': '1007'}

유아교육프로그램
#q_clturEvent: 2003
나이로 영유아
#q_ageSeCode: 1001
'공동육아방', 'q_hiddenVal': '1', 'q_searchAt': 'Y', 'q_currPage': '1', 'q_rowPerPage': '15', 'q_fcltyClCode': '1005', 'q_tabVal': '1002', 'q_svcClCode': '1002', 'q_moreSvc': '1002'}
'label': '공동육아 나눔터', 'q_hiddenVal': '1', 'q_searchAt': 'Y', 'q_currPage': '1', 'q_rowPerPage': '15', 'q_fcltyClCode': '1005', 'q_tabVal': '1004', 'q_svcClCode': '1004', 'q_moreSvc': '1004'}
'label': '영.유아 보육시설', 'q_hiddenVal': '1', 'q_searchAt': 'Y', 'q_currPage': '1', 'q_rowPerPage': '15', 'q_fcltyClCode': '1005', 'q_tabVal': '1006', 'q_svcClCode': '1006', 'q_moreSvc': '1006'}
{'label': '지역보육정보', 'q_hiddenVal': '1', 'q_searchAt': 'Y', 'q_currPage': '1', 'q_rowPerPage': '15', 'q_fcltyClCode': '1005', 'q_tabVal': '1007', 'q_svcClCode': '1007', 'q_moreSvc': '1007'}

In [None]:
body = {'q_hiddenVal': '1', 'q_searchAt': 'Y', 'q_currPage': '1', 'q_rowPerPage': '15', 'q_fcltyClCode': '1005', 'q_tabVal': '1002', 'q_svcClCode': '1002', 'q_moreSvc': '1002', 'q_ageSeCode': '1001'}

In [None]:
first_res = web_request(REQ_POST, LIST_PAGE, body)
first_html = bs(first_res['text'], "html.parser")

In [None]:
# 사이트 링크가 존재하며 예약페이지 따로 있음
<a href="#" onclick='fnView("1002","NW200403","2호점 공릉2동 도담도담나눔터(공동육아방)","https://www.nwscc.or.kr/php/index.php?pno=03080402");'>
<span class="box_title">2호점 공릉2동 도담도담나눔터(공동육아방)</span>
<p class="box_info_p">
<span class="box_txt">
<em>주소</em><b>서울특별시 노원구 화랑로51길 78</b>
</span>
<span class="box_txt">
<em>연락처</em>

In [None]:
def 공동육아방상세화면(svcClCode, fcltyId, fclty):
    q_fclty = fclty
    q_clturEventEtcSn = ''
    q_gubun = ''
    q_fcltyId= ''
    if svcClCode == '1006' or svcClCode == '1008':
        pass
    else:
        if svcClCode == '2001':
            q_clturEventEtcSn = fcltyId
            q_gubun = '1001'
        elif svcClCode == '2002':
            q_clturEventEtcSn = fcltyId
            q_gubun = '1002'
        elif svcClCode == '2003' or svcClCode == '1007':
            q_clturEventEtcSn = fcltyId
        else:
            q_fcltyId = fcltyId
    body = []
    body = [('q_hiddenVal', 1), ('q_fcltyId', q_fcltyId), ('q_fclty', q_fclty), ('q_clturEventEtcSn', q_clturEventEtcSn), ('q_gubun',q_gubun)]
    response = web_request('POST', DETAIL_PAGE, body)
    res_html = bs(response['text'], "html.parser")
    # 여기서 부터 파싱해서 넣을것을 찾자
    # using_time = ''
    #for item in result.select('.center_i_con ul')[1].select('li'):
    #    using_time += item.text.strip().replace('\n', '') + '\n'
    # "mapPopup('37.62607','127.08812','2호점 공릉2동 도담도담나눔터(공동육아방)');"
    # 좌표넣을때 숫자로 집어넣자
    # result.select_one('div.center_infoBox a.center_info_btn02')['onclick']
    
    return res_html

In [None]:
result_temp = {}
result_temp[result.select_one('.center_i_btn a').text]= result.select_one('.center_i_btn a')['href']
result_temp['이용시간']= ''
for item in result.select('.center_i_con ul')[1].select('li'):
    result_temp['이용시간'] += item.text.strip().replace('\n', '') + '\n'
axis_info = result.select_one('div.center_infoBox a.center_info_btn02')['onclick']
axis_info = axis_info[axis_info.index('(')+1:axis_info.rindex(')')-1].translate(str.maketrans({"'":None}))
axis_info = axis_info.split(',')
result_temp['위도']= float(axis_info[0])
result_temp['경도']= float(axis_info[1])

In [None]:
{'예약 신청': 'https://www.nwscc.or.kr/php/index.php?pno=03080402',
 '이용시간': '1타임09:30 ~ 11:30\n2타임13:00 ~ 15:00\n3타임15:30 ~ 17:30\n',
 '위도': 37.62607,
 '경도': 127.08812}