# 직방 데이터 크롤링 

In [93]:
import requests
import pandas as pd
import urllib
from urllib.parse import urlencode
from pandas.io.json import json_normalize
from scipy.spatial import distance

In [5]:
def get_addr_info(keyword="망원동"):
    url = "https://apis.zigbang.com/search/?q={}".format(keyword)
    response = requests.get(url)
    json_obj = response.json()
    return json_obj["items"][0]["lat"], json_obj["items"][0]["lng"]

In [6]:
def get_ids(lat, lng):
    url = "https://api.zigbang.com/v3/items2?lat_south={}&lat_north={}&lng_west={}&lng_east={}\
            &room=[01,02,03,04,05]".format(lat - 0.005, lat + 0.005, lng - 0.005, lng + 0.005)
    params = {
        "lat_south": lat - 0.01,
        "lat_north": lat + 0.01,
        "lng_west": lng - 0.01,
        "lng_east": lng + 0.01,
        "room": "[01,02,03,04,05]",
    }
    params_str = urlencode(params)
    url = "https://api.zigbang.com/v3/items2?" + params_str
    response = requests.get(url)
    json_obj = response.json()
    items = json_obj["list_items"]
    return [item["simple_item"]["item_id"] for item in items]

In [91]:
def make_target(target="홍익대학교"):
    encText = urllib.parse.quote(target)
    url = "https://maps.googleapis.com/maps/api/geocode/json?address=" + encText \
    + "&key=AIzaSyD7jDQUoMG1bS8SvukFrySE7aKhzSxSDts"
    response = requests.get(url)
    geo_info = response.json()
    target_lat = geo_info["results"][0]["geometry"]["location"]["lat"]
    target_lng = geo_info["results"][0]["geometry"]["location"]["lng"]
    return target_lat, target_lng

In [124]:
def get_items(ids):
    url = "https://api.zigbang.com/v3/items?detail=true&item_ids={}".format(str(ids).replace(" ",""))
    response = requests.get(url)
    json_obj = response.json()
    items = json_obj["items"]
    datas = [item["item"] for item in items]
    result_df = json_normalize(datas)
    
    # 위도 경도 나누어 주기
    result_df["lat"] = result_df.random_location.apply(lambda x: float(x.split(",")[0]))
    result_df["lng"] = result_df.random_location.apply(lambda x: float(x.split(",")[1]))

    filter_columns = ["rent", "deposit", "floor", "size", "address1", "address2", "lat", "lng", "options", "manage_cost", "parking", "elevator", "movein_date",\
                  "agent_name", "agent_phone", "agent_email"]
    return result_df[filter_columns]

In [140]:
def main(addr="망원동", target="홍익대학교"):
    lat, lng = get_addr_info(addr)
    ids = get_ids(lat, lng)
    result_df = get_items(ids)
    target_lat, target_lng = make_target(target)
    
    # target 값과 euclidean distance 구하여 가까운 30개의 매물 뽑아내기
    distance_ls = []
    for lat, lng in zip(result_df["lat"], result_df["lng"]):
        distance_ls.append(distance.euclidean((target_lat, target_lng),\
                                            (lat, lng))*10000)
    result_df["distance"] = distance_ls
    result_df = result_df.sort_values(by=["distance"]).reset_index(drop=True)[:30]
    return result_df

In [141]:
df = main("상수동")

In [143]:
df.to_csv("zigbang_crawling.csv", index=False)