# 수치데이터 요약하기 - 기술통계 값 보기

## 학습목표
- 기초 통계로 데이터의 분포를 살펴보고, 시각화해봅니다.

## 핵심 키워드
- mean
- median
- max
- min
- count
- describe

## 학습하기
---

In [2]:
# 기본셋팅
import pandas as pd
import numpy as np
import seaborn

import matplotlib.pyplot as plt
# window 의 한글 폰트 설정
plt.rc('font', family='Malgun Gothic')

# Mac 의 한글 폰트 설정
# plt.rc('font', family='AppleGothic')
plt.rc('axes', unicode_minus=False)

# 그래프가 노트북 안에 보이게 하기 위해
%matplotlib inline

# 폰트가 선명하게 보이기 위해
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('retina')

# 데이터 불러오기
df = pd.read_csv('source/dataset.csv', low_memory=False)

### 1. 기초통계값

In [3]:
# 데이터 타입을 알기위해 dtypes를 찍어볼 수 있습니다.
df.dtypes
df['위도'].dtypes

dtype('float64')

In [4]:
# 평균
df['위도'].mean()

# 중앙값
df['위도'].median()

# 최대값
df['위도'].max()

# 최소값
df['위도'].min()

# 데이터 개수
df['위도'].count()

# 행렬크기
df.shape

# 데이터 요약하기
df['위도'].describe()

count    91335.000000
mean        36.624711
std          1.041361
min         33.219290
25%         35.811830
50%         37.234652
75%         37.507463
max         38.499659
Name: 위도, dtype: float64

### 2개 이상의 Column 요약하기

In [5]:
df[['위도','경도']].describe()

Unnamed: 0,위도,경도
count,91335.0,91335.0
mean,36.624711,127.487524
std,1.041361,0.842877
min,33.21929,124.717632
25%,35.81183,126.914297
50%,37.234652,127.08455
75%,37.507463,128.108919
max,38.499659,130.909912


### 특정 데이터만 요약하기

In [9]:
# 숫자로 된 데이터만 요약 : number
df.describe(include='number')

Unnamed: 0,상가업소번호,시도코드,시군구코드,행정동코드,법정동코드,지번코드,대지구분코드,지번본번지,지번부번지,도로명코드,건물본번지,건물부번지,구우편번호,신우편번호,경도,위도
count,91335.0,90956.0,90956.0,91335.0,91280.0,91335.0,91335.0,91335.0,72079.0,91335.0,91335.0,10604.0,91323.0,91333.0,91335.0,91335.0
mean,21218180.0,32.586712,32898.381877,3293232000.0,3293385000.0,3.293191e+18,1.001336,587.534549,17.005092,329320700000.0,251.200482,7.241135,428432.911085,28085.47698,127.487524,36.624711
std,5042828.0,13.045138,12985.393171,1297387000.0,1297706000.0,1.297393e+18,0.036524,582.519364,53.451413,129739100000.0,477.456487,12.426816,193292.339066,18909.01455,0.842877,1.041361
min,2901108.0,11.0,11110.0,1111052000.0,1111010000.0,1.11101e+18,1.0,1.0,1.0,111102000000.0,0.0,1.0,100011.0,1000.0,124.717632,33.21929
25%,20019310.0,26.0,26350.0,2635065000.0,2635011000.0,2.635011e+18,1.0,162.0,2.0,263504200000.0,29.0,1.0,302120.0,11681.0,126.914297,35.81183
50%,22119000.0,41.0,41117.0,4111758000.0,4111710000.0,4.111711e+18,1.0,462.0,5.0,411174300000.0,92.0,2.0,440300.0,24353.0,127.08455,37.234652
75%,24809840.0,43.0,43113.0,4311370000.0,4311311000.0,4.311311e+18,1.0,858.0,14.0,431133200000.0,257.0,9.0,602811.0,46044.0,128.108919,37.507463
max,28524700.0,50.0,50130.0,5013061000.0,5013032000.0,5.013061e+18,2.0,7338.0,1428.0,501304900000.0,8795.0,244.0,799801.0,63643.0,130.909912,38.499659


In [10]:
# 문자열 데이터만 요약 : object
df.describe(include='object')

Unnamed: 0,상호명,지점명,상권업종대분류코드,상권업종대분류명,상권업종중분류코드,상권업종중분류명,상권업종소분류코드,상권업종소분류명,표준산업분류코드,표준산업분류명,...,법정동명,대지구분명,지번주소,도로명,건물관리번호,건물명,도로명주소,동정보,층정보,호정보
count,91335,1346,91335,91335,91335,91335,91335,91335,86413,86413,...,91280,91335,91335,91335,91335,46453,91335,7406,44044,15551
unique,56910,858,1,1,5,5,34,34,15,15,...,2822,2,53118,16610,54142,17946,54031,556,74,849
top,리원,장례식장,S,의료,S01,병원,S02A01,약국,Q86201,일반 의원,...,중동,대지,서울특별시 동대문구 제기동 965-1,서울특별시 강남구 강남대로,1123010300109650001031604,현대아파트,서울특별시 동대문구 약령중앙로8길 10,1,1,1
freq,152,97,91335,91335,60774,60774,18964,18964,22555,22555,...,874,91213,198,326,198,131,198,1571,15994,1286


In [11]:
# 모든 데이터 요약 : all
df.describe(include='all')

Unnamed: 0,상가업소번호,상호명,지점명,상권업종대분류코드,상권업종대분류명,상권업종중분류코드,상권업종중분류명,상권업종소분류코드,상권업종소분류명,표준산업분류코드,...,건물관리번호,건물명,도로명주소,구우편번호,신우편번호,동정보,층정보,호정보,경도,위도
count,91335.0,91335,1346,91335,91335,91335,91335,91335,91335,86413,...,91335,46453,91335,91323.0,91333.0,7406.0,44044.0,15551.0,91335.0,91335.0
unique,,56910,858,1,1,5,5,34,34,15,...,54142,17946,54031,,,556.0,74.0,849.0,,
top,,리원,장례식장,S,의료,S01,병원,S02A01,약국,Q86201,...,1123010300109650001031604,현대아파트,서울특별시 동대문구 약령중앙로8길 10,,,1.0,1.0,1.0,,
freq,,152,97,91335,91335,60774,60774,18964,18964,22555,...,198,131,198,,,1571.0,15994.0,1286.0,,
mean,21218180.0,,,,,,,,,,...,,,,428432.911085,28085.47698,,,,127.487524,36.624711
std,5042828.0,,,,,,,,,,...,,,,193292.339066,18909.01455,,,,0.842877,1.041361
min,2901108.0,,,,,,,,,,...,,,,100011.0,1000.0,,,,124.717632,33.21929
25%,20019310.0,,,,,,,,,,...,,,,302120.0,11681.0,,,,126.914297,35.81183
50%,22119000.0,,,,,,,,,,...,,,,440300.0,24353.0,,,,127.08455,37.234652
75%,24809840.0,,,,,,,,,,...,,,,602811.0,46044.0,,,,128.108919,37.507463
