# 서울 부동산 시장 분석 및 투자 전략 수립

## 과제 2: 아파트 규모별 가격 트렌드 및 생애주기 투자 전략

전용면적별(소형, 중형, 대형) 아파트의 가격 변동 추이를 분석, 생애주기와 데이터 분석에 따른 최적 투자 전략과 거주 전략을 제시

- 전용면적 기준으로 분류 후 규모별 가격 변동 추이 분석, 시각화
- 생애주기
  - 20~30: 신혼 / 첫 주택 구입 시기 
  - 40~50: 자산 증식 시기
  - 60~  : 다운사이징 / 현금화 단계

In [19]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from IPython.display import display

warnings.filterwarnings('ignore')

plt.rcParams['font.family'] = 'Malgun Gothic'
plt.rcParams['axes.unicode_minus'] = False

df_sales = pd.read_csv('./data/sales_clean.csv')
df_sales['계약일'] = pd.to_datetime(df_sales['계약일'], format='%Y%m%d')

df_sales_copy = df_sales.copy()
df_sales_copy = df_sales_copy[df_sales_copy['공공임대'].isna()]
# display(df_sales_copy.head())

In [20]:
# 전용면적 기준 분류 및 필요 컬럼 추출
def area(df):
    if df['전용면적'] >= 85:
        return '대형'
    elif df['전용면적'] >= 60:
        return '중형'
    else:
        return '소형'

df_area_analysis = pd.DataFrame({
    '전용면적': df_sales_copy['전용면적'],
    '계약월': pd.to_datetime(df_sales_copy['계약월'], format='%Y%m'),
    '건축년도': df_sales_copy['건축년도'],
    '거래금액': df_sales_copy['거래금액'],
    '평단가': df_sales_copy['평단가'],
    '면적구분': df_sales_copy.apply(area, axis=1),
    '신축구분': df_sales_copy['건축년도'].apply(lambda x: '신축' if 2025 - x <= 10 else '중고')
})

display(df_area_analysis.head())

Unnamed: 0,전용면적,계약월,건축년도,거래금액,평단가,면적구분,신축구분
0,84.98,2020-12-01,2013,820000000,31894205,중형,중고
1,240.305,2020-12-01,2011,7100000000,97675058,대형,중고
2,84.88,2020-12-01,2018,1800000000,70093458,중형,신축
3,104.22,2020-12-01,1978,595000000,18870917,대형,중고
4,84.92,2020-12-01,2001,1350000000,52549630,중형,중고


In [31]:
# 월간/연간 통계 분석
from da_utils.calc_monthly_annual import calc_monthly_annual
monthly_stats, annual_stats = calc_monthly_annual(df_area_analysis, '면적구분')
display(monthly_stats, annual_stats)

Unnamed: 0,면적구분,계약월,월별거래건수,월평균거래금액,월평균평단가,월별수익률,이동평균,이동표준편차
0,대형,2020-01-01,897,1.052286e+09,2.811883e+07,,,
1,대형,2020-02-01,1176,1.082832e+09,2.921661e+07,0.028615,,
2,대형,2020-03-01,623,1.133290e+09,3.012994e+07,0.045545,,
3,대형,2020-04-01,502,1.306891e+09,3.362868e+07,0.142526,,
4,대형,2020-05-01,1038,1.364007e+09,3.524010e+07,0.042776,,
...,...,...,...,...,...,...,...,...
193,중형,2025-02-01,2662,1.535431e+09,6.154794e+07,0.140432,1.277120e+09,9.586154e+07
194,중형,2025-03-01,4133,1.456529e+09,5.846085e+07,-0.052755,1.301297e+09,1.018023e+08
195,중형,2025-04-01,2152,1.166401e+09,4.734206e+07,-0.222134,1.298429e+09,1.053146e+08
196,중형,2025-05-01,3060,1.304678e+09,5.239205e+07,0.112034,1.302037e+09,1.046698e+08


Unnamed: 0,면적구분,연도,연간수익률,변동성
0,대형,2020,0.62811,0.054793
1,대형,2021,0.257913,0.071762
2,대형,2022,-0.345308,0.151269
3,대형,2023,0.036353,0.073388
4,대형,2024,0.307688,0.048973
5,대형,2025,-0.145102,0.098772
6,소형,2020,0.361244,0.067856
7,소형,2021,-0.155217,0.068742
8,소형,2022,-0.121233,0.098856
9,소형,2023,0.162494,0.072473


In [None]:
# 건축년도 기반 추가분석
## 면적별 전체 분석
year_by_area = df_area_analysis.groupby('면적구분').agg({
    '건축년도': ['mean', 'count'],
    '거래금액': ['mean', 'sum']
    }).reset_index().round(2)
year_by_area.columns = ['면적구분', '평균건축년도', '거래량', '평균거래금액', '총거래금액']

## (면적-신축/중고)에 따른 추가분석
new_old_ratio = df_area_analysis.groupby(['면적구분', '신축구분']).size().reset_index(name='거래량')
new_old_ratio['비율(%)'] = new_old_ratio.groupby('면적구분')['거래량'].apply(lambda x: x * 100 / x.sum()).values.round(2)

# 보기 편하게 정렬
order = ['소형', '중형', '대형']
year_by_area['면적구분'] = pd.Categorical(year_by_area['면적구분'], categories=order, ordered=True)
year_by_area = year_by_area.sort_values(['면적구분'])

new_old_ratio['면적구분'] = pd.Categorical(new_old_ratio['면적구분'], categories=order, ordered=True)
new_old_ratio = new_old_ratio.sort_values(['면적구분'])

display(year_by_area, new_old_ratio)

Unnamed: 0,면적구분,평균건축년도,거래량,평균거래금액,총거래금액
1,소형,2002.35,112064,713238500.0,79928356310000
2,중형,2002.59,108318,1116395000.0,120925653350000
0,대형,2000.01,42295,1860383000.0,78684917230000


Unnamed: 0,면적구분,신축구분,거래량,비율(%)
2,소형,신축,20524,18.31
3,소형,중고,91540,81.69
4,중형,신축,16780,15.49
5,중형,중고,91538,84.51
0,대형,신축,3180,7.52
1,대형,중고,39115,92.48
