In [None]:
# 네이버 부동산 크롤러 객체 생성
from modules import NaverRealEstate, set_conditon_params

locations = ['마포구', '서대문구', '용산구', '중구', '종로구', '성동구', '동대문구', 
             '광진구', '송파구', '강남구', '서초구', '동작구', '관악구', '구로구', '영등포구']

real_estate_objects = {}

for location in locations:
    real_estate_objects[location] = NaverRealEstate(location=f"서울시 {location}")

# 검색 조건 설정
params = set_conditon_params(wprcRange=(1, 2000), population=3, spc_per_person=3, fee_per_person=15, rletTpCd="OR:SG:SMS", tradTpCd="B2:B3")

In [None]:
# 웹 크롤링
for location, obj in real_estate_objects.items():
    obj.crawl_data(params)
    obj.save_to_excel("main")

In [None]:
# 매물 데이터 엑셀 파일을 읽어서 객체에 저장
for location, obj in real_estate_objects.items():
    obj.load_from_excel("main")

In [None]:
import pandas as pd

# 전용면적 최소값 설정
for location, obj in real_estate_objects.items():
    obj.df = obj.df[obj.df['전용면적'] >= params['spcMin']]

# 중복 데이터 제거
duplicates_data = []

for location, obj in real_estate_objects.items():
    total = obj.df.shape[0]
    obj.drop_df_duplicates()
    unique = obj.df.shape[0]
    row_data = [location, total, total - unique, unique, (total - unique) / total * 100]
    duplicates_data.append(row_data)

duplicates_columns = ['지역', '전체매물', '중복제거', '최종매물', '중복률']
duplicates = pd.DataFrame(duplicates_data, columns=duplicates_columns)

duplicates.loc[len(duplicates)] = ['계', duplicates['전체매물'].sum(), duplicates['중복제거'].sum(), duplicates['최종매물'].sum(), duplicates['중복률'].mean()]

print(duplicates)

# 딕셔너리 매물 개 수에 따라 내림차순 정렬
real_estate_objects = dict(sorted(real_estate_objects.items(), key=lambda x: x[1].df.shape[0], reverse=True))

In [None]:
# 지상/지하 매물 개수 시각화
import matplotlib.pyplot as plt

plt.rcParams['font.family'] = 'AppleGothic'

def classify_floor(row) :
    if row.startswith('B') :
        return '지하'
    else :
        return '지상'
    
floor = {location: obj.df.groupby(obj.df['층 수'].apply(classify_floor)) for location, obj in real_estate_objects.items()}

locations = [location for location in real_estate_objects.keys()]

ground_counts = []
basement_counts = []

for location in locations :
    ground_counts.append(floor[location].size()['지상'] if '지상' in floor[location].size() else 0)
    basement_counts.append(floor[location].size()['지하'] if '지하' in floor[location].size() else 0)

bar_width = 0.5
index = range(len(locations))

plt.figure(figsize=(12, 6))
bar1 = plt.bar(index, ground_counts, bar_width, label='지상', color='skyblue')
bar2 = plt.bar(index, basement_counts, bar_width, label='지하', color='salmon', bottom=ground_counts)

plt.xlabel('지역', fontsize=15)
plt.ylabel('매물 개수', fontsize=12)
plt.title('지상/지하 매물 개수', fontsize=20)
plt.xticks(index, locations, fontsize=10)
plt.legend(fontsize=15)

plt.tight_layout()
plt.show()

In [None]:
# 매물 유형별 개수 시각화
import numpy as np

property_type = {location: obj.df.groupby('매물유형') for location, obj in real_estate_objects.items()}

one_room_counts = []
office_counts = []
store_counts = []

for location in locations :
    one_room_counts.append(property_type[location].size()['원룸'] if '원룸' in property_type[location].size() else 0)
    office_counts.append(property_type[location].size()['사무실'] if '사무실' in property_type[location].size() else 0)
    store_counts.append(property_type[location].size()['상가'] if '상가' in property_type[location].size() else 0)

bar_width = 0.5
index = range(len(locations))

plt.figure(figsize=(12, 6))
bar1 = plt.bar(index, one_room_counts, bar_width, label='원룸', color='skyblue')
bar2 = plt.bar(index, office_counts, bar_width, label='사무실', color='salmon', bottom=one_room_counts)
bar3 = plt.bar(index, store_counts, bar_width, label='상가', color='lightgreen', bottom=np.array(one_room_counts) + np.array(office_counts))

plt.xlabel('지역', fontsize=15)
plt.ylabel('매물 개수', fontsize=12)
plt.title('매물 유형별 개수', fontsize=20)
plt.xticks(index, locations, fontsize=10)
plt.legend(fontsize=15)

plt.tight_layout()
plt.show()

In [None]:
# 보증금 분포 시각화
import matplotlib.gridspec as gridspec

deposit = {}

deposit['total'] = [data.df['가격'] for data in real_estate_objects.values()]

for location, obj in real_estate_objects.items():
    deposit[location] = obj.df['가격']

fig = plt.figure(figsize=(8, 12))
gs = gridspec.GridSpec(7, 3)

ax_deposit_total = fig.add_subplot(gs[0:2, 0:3])
ax_deposit_total = plt.hist(deposit['total'], bins=10)
ax_deposit_total = plt.xlabel('보증금 (만원)')
ax_deposit_total = plt.ylabel('매물 개수')
ax_deposit_total = plt.title('전체 매물 보증금 분포')

for i, location in enumerate(locations):
    ax_deposit = fig.add_subplot(gs[2 + i // 3, i % 3])
    ax_deposit = plt.hist(deposit[location], bins=10)
    ax_deposit = plt.xlim(0, params['wprcMax'])
    ax_deposit = plt.title(f'{location}')

plt.tight_layout()
plt.show()

In [None]:
# 월세 분포 시각화
rent = {}

rent['total'] = [data.df['월세'] for data in real_estate_objects.values()]

for location, obj in real_estate_objects.items():
    rent[location] = obj.df['월세']

fig = plt.figure(figsize=(8, 12))
gs = gridspec.GridSpec(7, 3)

ax_rent_total = fig.add_subplot(gs[0:2, 0:3])
ax_rent_total = plt.hist(rent['total'], bins=10)
ax_rent_total = plt.xlabel('월세 (만원)')
ax_rent_total = plt.ylabel('매물 개수')
ax_rent_total = plt.title('전체 매물 월세 분포')

for i, location in enumerate(locations):
    ax_rent = fig.add_subplot(gs[2 + i // 3, i % 3])
    ax_rent = plt.hist(rent[location], bins=10)
    ax_rent = plt.xlim(0, params['rprcMax'])
    ax_rent = plt.title(f'{location}')

plt.tight_layout()
plt.show()

In [None]:
# 매물 지도 마킹
import os
import folium
import base64
from branca.colormap import LinearColormap

df = pd.concat([data.df for data in real_estate_objects.values()])

m = folium.Map(location=[37.5312135, 127.0016985], zoom_start=12)

min_rent = df['월세'].min()
max_rent = df['월세'].max()

colormap = LinearColormap(['blue', 'green', 'yellow', 'red'], vmin=min_rent, vmax=max_rent)

icons_dir = 'icon'
marker_shapes = {
    '원룸': 'circle.svg',
    '사무실': 'triangle.svg',
    '상가': 'square.svg',
}

for index, row in df.iterrows() :
    popup_content = f"보증금: {row['보증금']} | 월세: {row['월세']} | <a href=\"https://fin.land.naver.com/articles/{row['상품번호']}\">링크</a>"
    
    marker_shape = marker_shapes.get(row['매물유형'])

    with open(os.path.join(icons_dir, marker_shape), 'r') as f:
        svg_code = f.read()
        svg_code = svg_code.replace('%COLOR%', colormap(row['월세']))
    
    encoded = base64.b64encode(svg_code.encode('utf-8')).decode('utf-8')

    icon = folium.CustomIcon(icon_image=f"data:image/svg+xml;base64,{encoded}", icon_size=(25, 25))

    folium.Marker(
        location=[row['위도'], row['경도']],
        popup=folium.Popup(popup_content, max_width=300),
        icon=icon,
    ).add_to(m)
        
    

colormap.add_to(m)

m.save('data/main/map.html')
m