In [7]:
import requests as rq
import json
import pandas as pd
from tqdm import tqdm
import time
import pickle
import numpy as np
from datetime import datetime

## Get Data

### Get Room List From Subway

In [8]:
def get(url):
    res = rq.get(url)
    if res.status_code == 200:
        return json.loads(res.text)
    else:
        print(url, res.status_code)
        return None

In [9]:
def get_room_ids(subway_info):
    subway_id = subway_info['id']
    subway_name = subway_info['name']
    room_ids = []
    visited = []
    room_list_json = get(f"https://apis.zigbang.com/v3/items/ad/{subway_id}")
    for i in room_list_json['list_items']:
        if i.get('simple_item') and i['simple_item']['item_id'] not in visited:
            data = {}
            data['item_id'] = i['simple_item']['item_id']
            data['section_type'] = i['section_type']
            data['name'] = subway_name
            room_ids.append(data)
            visited.append(data['item_id'])
    return room_ids

In [10]:
today = datetime.now().strftime('%y%m%d')

In [11]:
url_subway_all = 'https://apis.zigbang.com/property/biglab/subway/all'
subway_ids = get(url_subway_all)
subway_seoul_ids = [x for x in subway_ids if x['local1'] == '서울특별시']
len(subway_seoul_ids)

296

In [None]:
room_ids = []
for ssi in subway_seoul_ids:
    print(ssi['name'])
    data = get_room_ids(ssi)
    room_ids.extend(data)
    time.sleep(2)

서울역
시청역
종각역
종로3가역
종로5가역
동대문역
신설동역
제기동역
청량리역
동묘앞역
을지로입구역
을지로3가역
을지로4가역
동대문역사문화공원역
신당역
상왕십리역
왕십리역
한양대역
뚝섬역
성수역
건대입구역
구의역
강변역
잠실나루역
잠실역
잠실새내역
종합운동장역
삼성역
선릉역
역삼역
강남역
교대역
서초역
방배역
사당역
낙성대역
서울대입구역
봉천역
신림역
신대방역
구로디지털단지역
대림역
신도림역
문래역
영등포구청역
당산역
합정역
홍대입구역
신촌역
이대역
아현역
충정로역
용답역
신답역
도림천역
양천구청역
신정네거리역
용두역
까치산역
구파발역
연신내역
불광역
녹번역
홍제역
무악재역
독립문역
경복궁역
안국역
충무로역
동대입구역
약수역
금호역
옥수역


In [None]:
room_id_df = pd.DataFrame(room_ids)
room_id_df.to_csv(f'data_subway/room_ids_{today}.csv', index=False)

In [None]:
subway_info = []
subway_line_info = []
for ssi in subway_seoul_ids:
    ssi_cop = ssi.copy()
    lines = ssi_cop.pop('lines')
    subway_info.append(ssi_cop)
    for line in lines:
        data = {}
        data['name'] = ssi['name']
        data['line'] = line['short_name']
        subway_line_info.append(data)

subway_info = pd.DataFrame(subway_info)
subway_line_info = pd.DataFrame(subway_line_info)

In [None]:
subway_info.to_csv(f'data_subway/subway_info_{today}.csv', index=False)
subway_line_info.to_csv(f'data_subway/subway_line_info_{today}.csv', index=False)

### Get Room info By Subway

In [None]:
class Subway:
    def __init__(self, subway_line_info):
        self.subway_line_info = subway_line_info
    
    def get_subway_from_line(self, line):
        if isinstance(line, int):
            line = str(line)
        subways = self.subway_line_info[self.subway_line_info['line'] == line]['name'].values
        return subways
    

In [None]:
len(subway_line_info['line'].unique())

In [None]:
sw = Subway(subway_line_info)
line_1 = sw.get_subway_from_line(1)
room_1 = room_id_df[room_id_df['name'].isin(line_1)]

### Get Room Info All

In [None]:
room_id_df = pd.read_csv(f'data_subway/room_ids_{today}.csv')
subway_info = pd.read_csv(f'data_subway/subway_info_{today}.csv')
subway_line_info = pd.read_csv(f'data_subway/subway_line_info_{today}.csv')

In [None]:
ids = room_id_df['item_id'].unique()

In [None]:
# start = 26000  # 현재 저장된 데이터 개수
# ids_ = ids[start:]

In [None]:
room_info_list = []
for idx, room_id in tqdm(enumerate(ids)):
    room_info_url = f"https://apis.zigbang.com/v2/items/{room_id}"
    room_info = get(room_info_url)
    room_info_list.append(room_info)
    time.sleep(np.random.random()*0.5 + 0.2)
    if idx % 500 == 499:
        with open(f'data_room/room_info_{start+idx+1}_{today}.pkl', 'wb') as f:
            pickle.dump(room_info_list[idx-499:idx+1], f)

## Trash

In [44]:
req1 = rq.get("https://apis.zigbang.com/v3/items/ad/17?deposit_s=0&detail=false&domain=zigbang&floor=1~%7Crooftop%7Csemibase&radius=1&rent_s=0&sales_type=")
req2 = rq.get("https://apis.zigbang.com/v3/items/ad/17")