# 데이터 시각화

In [None]:
import os
import pandas as pd
import numpy as np

In [None]:
os.getcwd()

In [None]:
os.chdir('../../data')

In [None]:
sorted(os.listdir())

In [None]:
apt = pd.read_csv(
    'APT_Data_Prep.csv',
    low_memory=False,
    parse_dates=['계약일자']
)

In [None]:
apt.head()

In [None]:
apt.info()

## 데이터 정렬

In [None]:
apt['층'].sort_values()

In [None]:
apt['층'].sort_values(ascending=False)

In [None]:
apt.sort_values(by=['층', '거래금액'], ascending=False)

In [None]:
apt.sort_values(by=['층', '거래금액'], ascending=[False, True])

## 집계함수로 데이터 요약

In [None]:
apt['거래금액'].count()

In [None]:
apt['거래금액'].sum()

In [None]:
apt['거래금액'].mean()

In [None]:
apt['거래금액'].std()

### agg 메서드

#### 시리즈

In [None]:
apt['거래금액'].agg(func=['count', 'sum', 'mean', 'std'])
# count    2.208900e+05
# sum      2.233578e+06
# mean     1.011172e+01
# std      7.461396e+00
# Name: 거래금액, dtype: float64

In [None]:
apt['거래금액'].agg(func={'개수': 'count','합계': 'sum','평균': 'mean','표준편차': 'std'})
# 개수      2.208900e+05
# 합계      2.233578e+06
# 평균      1.011172e+01
# 표준편차    7.461396e+00
# Name: 거래금액, dtype: float64

#### 데이터프레임

In [None]:
apt.select_dtypes(include='number').agg(func=['mean', 'std'])
# 거래금액	입주년도	계약년도	계약월	계약일	전용면적	층	경과년수	세대수	주차대수
# mean	10.111720	2002.322215	2021.735869	6.221241	15.823908	75.471219	9.534578	19.413654	1065.668079	1175.280823
# std	7.461396	10.396538	1.643023	3.134888	8.666083	30.573625	6.370747	10.336370	1253.131045	1614.546846

## 범주별 집계 함수 groupby

In [None]:
apt.groupby(by='시군구')['거래금액'].count().sort_values(ascending=False)

In [None]:
apt.groupby(by='시군구')['거래금액'].mean().sort_values(ascending=False)

In [None]:
apt.groupby(by='시군구')['거래금액'].std().sort_values(ascending=False)

In [None]:
cond = apt['시군구'].eq('강남구')
apt[cond].groupby(by='법정동')['거래금액'].mean().sort_values(ascending=False)

In [None]:
apt[cond].groupby(by='법정동')['거래금액'].std().sort_values(ascending=False)

### groupby + agg

In [None]:
apt.groupby(by='시군구')['거래금액'].agg(func=['mean', 'std'])

In [None]:
apt.groupby(by=['시군구', '계약년도'])['거래금액'].agg(func=['mean', 'std'])

### 피벗 테이블

In [None]:
pd.pivot_table(
    data=apt,
    index='시군구',
    columns='계약년도',
    values='거래금액',
    aggfunc=['mean', 'std'],
    margins=True,
    margins_name='합계'
)