# 한국 부동산 시장 분석 및 투자 전략 수립

## 데이터 전처리

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from IPython.display import display

warnings.filterwarnings('ignore')

plt.rcParams['font.family'] = 'Malgun Gothic'
plt.rcParams['axes.unicode_minus'] = False

df_sales_raw = pd.read_csv('./data/아파트(매매)_실거래가.csv', encoding='EUC-KR')
df_rents_raw = pd.read_csv('./data/아파트(전월세)_실거래가.csv', encoding='EUC-KR')

In [3]:
# 데이터 정리
df_sales = pd.DataFrame({
    '구': df_sales_raw['시군구'].str.split(' ').str[1],
    '동': df_sales_raw['시군구'].str.split(' ').str[2],
    '전용면적': df_sales_raw['전용면적(㎡)'],
    '계약일': pd.to_datetime(df_sales_raw['계약년월'].astype(str) + df_sales_raw['계약일'].astype(str).str.zfill(2)),
    '건축년도': df_sales_raw['건축년도'],
    '구분': '매매',
    '거래금액': df_sales_raw['거래금액(만원)'].str.replace(',', '').astype(int) * 10000,
    '매수자': df_sales_raw['매수자'],
    '매도자': df_sales_raw['매도자'],
    '해제사유발생일': df_sales_raw['해제사유발생일']
})

# 취소된 거래 제거
valid_mask = df_sales_raw['해제사유발생일'] == '-'
df_sales = df_sales[valid_mask]
df_sales = df_sales.drop('해제사유발생일', axis=1)

df_sales.to_csv('./data/sales_clean.csv', index=False)
display(df_sales.head())

Unnamed: 0,구,동,전용면적,계약일,건축년도,구분,거래금액,매수자,매도자
0,성동구,상왕십리동,55.2,2025-06-30,2014,매매,1270000000,개인,개인
1,용산구,산천동,114.48,2025-06-30,2001,매매,1620000000,개인,개인
2,광진구,구의동,59.98,2025-06-30,2018,매매,1340000000,개인,개인
4,용산구,도원동,114.99,2025-06-30,2001,매매,1700000000,개인,개인
5,중구,신당동,59.94,2025-06-30,2002,매매,1200000000,개인,개인


In [4]:
df_rents = pd.DataFrame({
    '구': df_rents_raw['시군구'].str.split(' ').str[1],
    '동': df_rents_raw['시군구'].str.split(' ').str[2],
    '전용면적': df_rents_raw['전용면적(㎡)'],
    '계약일': pd.to_datetime(df_rents_raw['계약년월'].astype(str) + df_rents_raw['계약일'].astype(str).str.zfill(2)),
    '건축년도': df_rents_raw['건축년도'],
    '구분': df_rents_raw['전월세구분'],
    '보증금': df_rents_raw['보증금(만원)'].str.replace(',', '').astype(int) * 10000,
    '월세금': df_rents_raw['월세금(만원)'].str.replace(',', '').astype(int) * 10000,
    '계약구분': df_rents_raw['계약구분']
})

df_monthly = df_rents[df_rents['구분'] == '월세']
df_long = df_rents[df_rents['구분'] == '전세']

df_monthly.to_csv('./data/monthly_clean.csv', index=False)
display(df_monthly.head())
df_long.to_csv('./data/long_clean.csv', index=False)
display(df_long.head())

Unnamed: 0,구,동,전용면적,계약일,건축년도,구분,보증금,월세금,계약구분
0,강동구,상일동,84.923,2025-06-30,2019,월세,400000000,1000000,갱신
1,강동구,상일동,39.82,2025-06-30,2021,월세,16870000,310000,-
3,강동구,상일동,59.97,2025-06-30,2019,월세,204440000,290000,-
5,강동구,상일동,39.82,2025-06-30,2021,월세,64780000,110000,갱신
7,강동구,성내동,16.09,2025-06-30,2023,월세,52400000,470000,갱신


Unnamed: 0,구,동,전용면적,계약일,건축년도,구분,보증금,월세금,계약구분
2,강동구,상일동,114.98,2025-06-30,2011,전세,525600000,0,신규
4,강동구,상일동,84.93,2025-06-30,2020,전세,800000000,0,신규
6,강동구,암사동,111.22,2025-06-30,2008,전세,840000000,0,갱신
8,강동구,고덕동,59.785,2025-06-30,2019,전세,605000000,0,갱신
9,강동구,고덕동,84.88,2025-06-30,2016,전세,770000000,0,신규
