### Zigbang 원룸 매물 데이터 수집

In [1]:
import requests
import pandas as pd

#### Process
    - 동이름으로 위도 경도 구하기
    - 위도 경도로 geohash 알아내기
    - geohash로 매물 아이디 가져오기
    - 매물 아이디로 매물 정보 가져오기

#### 1. 동이름으로 위도 경도 구하기

In [19]:
addr = '망원동'
url = f'https://apis.zigbang.com/v2/search?leaseYn=N&q={addr}&serviceType=원룸'
response = requests.get(url)

data = response.json()['items'][0]
lat, lng = data['lat'], data['lng']
lat, lng

(37.556785583496094, 126.9013442993164)

#### 2. 위도 경도로 geohash 알아내기

In [24]:
!pip install geohash2

Collecting geohash2
  Downloading geohash2-1.1.tar.gz (15 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: geohash2
  Building wheel for geohash2 (setup.py): started
  Building wheel for geohash2 (setup.py): finished with status 'done'
  Created wheel for geohash2: filename=geohash2-1.1-py3-none-any.whl size=15556 sha256=aedc37f67e866c2b009b5987438ab5c24081bd8ef54f3ef5fa027afb8f2bba64
  Stored in directory: c:\users\user\appdata\local\pip\cache\wheels\00\d5\b6\3fbe4088f7912982f596eaddfd593d16096468a2f13e470ae7
Successfully built geohash2
Installing collected packages: geohash2
Successfully installed geohash2-1.1


In [26]:
import geohash2

geohash = geohash2.encode(lat, lng, precision=5)
geohash

'wydjx'

#### 3. geohash로 매물 아이디 가져오기

In [34]:
url = f'https://apis.zigbang.com/v2/items/oneroom?geohash={geohash}&depositMin=0&rentMin=0&salesTypes[0]=전세&salesTypes[1]=월세&domain=zigbang&checkAnyItemWithoutFilter=true'
response = requests.get(url)
response

<Response [200]>

In [42]:
item_ids = []
for data in response.json()['items']:
    item_ids.append(data['itemId'])
len(item_ids), item_ids[:5]

(381, [42196796, 42078100, 42149571, 42184803, 42204685])

In [40]:
item_ids = [data['itemId'] for data in response.json()['items']]
len(item_ids), item_ids[:5]

(381, [42196796, 42078100, 42149571, 42184803, 42204685])

#### 4. 매물 아이디로 매물 정보 가져오기

In [50]:
url = 'https://apis.zigbang.com/v2/items/list'
params = {
    'domain': 'zigbang',
    'item_ids': item_ids
}
response = requests.post(url, params)
response

<Response [200]>

In [70]:
data = response.json()['items']
df = pd.DataFrame(data)
df = df[df['address1'].str.contains(addr)].reset_index(drop=True)
df = df[['item_id', 'sales_title', 'deposit', 'rent', 'size_m2', 'floor', 'building_floor', 'title', 'manage_cost', 'address1']]
df.tail(3)

Unnamed: 0,item_id,sales_title,deposit,rent,size_m2,floor,building_floor,title,manage_cost,address1
18,42159695,월세,2000,75,16.1,2,4,신축 . 5천 . 65만원까지 조절 가능,7,서울시 마포구 망원동
19,42039500,월세,30000,20,29.07,2,6,청년버팀목대출 보증보험가입 가능 투룸 반전세,5,서울시 마포구 망원동
20,42232358,월세,300,73,17.0,3,3,"v망원한강코앞,화이트톤방,깔끔한 화장실,채광,조용한주택가",8,서울시 마포구 망원동


In [72]:
pd.options.display.max_columns=40

In [74]:
df.columns

Index(['item_id', 'sales_title', 'deposit', 'rent', 'size_m2', 'floor',
       'building_floor', 'title', 'manage_cost', 'address1'],
      dtype='object')

In [83]:
# function
def oneroom(addr):
    url = f'https://apis.zigbang.com/v2/search?leaseYn=N&q={addr}&serviceType=원룸'
    response = requests.get(url)
    data = response.json()['items'][0]
    lat, lng = data['lat'], data['lng']
    
    geohash = geohash2.encode(lat, lng, precision=5)
    
    url = f'https://apis.zigbang.com/v2/items/oneroom?geohash={geohash}&depositMin=0&rentMin=0&salesTypes[0]=전세&salesTypes[1]=월세&domain=zigbang&checkAnyItemWithoutFilter=true'
    response = requests.get(url)
    item_ids = [data['itemId'] for data in response.json()['items']]
    
    url = 'https://apis.zigbang.com/v2/items/list'
    params = {'domain': "zigbang", 'item_ids': item_ids[:900]}
    response = requests.post(url, params)
    data = response.json()['items']
    df = pd.DataFrame(data)
    df = df[df['address1'].str.contains(addr)].reset_index(drop=True)
    return df[['item_id', 'sales_title', 'deposit', 'rent', 'size_m2', 'floor', 'building_floor', 'title',
    'manage_cost', 'address1']]

In [85]:
df = oneroom('개포동')
df.tail(2)

Unnamed: 0,item_id,sales_title,deposit,rent,size_m2,floor,building_floor,title,manage_cost,address1
51,41718888,전세,33000,0,37.79,3,4,"깔끔하고 심플한 투룸, 주차 가능, 풀옵션 완비",20,서울시 강남구 개포동
52,40818078,전세,38000,0,71.64,3,3,"깨끗하고 넓은 공간 쓰리룸, 즉시 입주 가능, 주차 가능",0,서울시 강남구 개포동


In [None]:
df = pd.DataFrame({'id': list('456')})