## 결측치 
- 결측값 (missing value)
    - NaN, Na, Null
    - 알 수 없는 값, 알려지지 않은 값

In [3]:
import pandas as pd
import numpy as np

In [2]:
data = {
    'city':['서울', '경기', '제주', '부산'],
    'total1': [12000, 75000, 63000, 42000],
    'total2': [15000, 18000, 66000, 55000]
}

# 데이터프레임 생성
data_frame = pd.DataFrame(data)
data_frame

Unnamed: 0,city,total1,total2
0,서울,12000,15000
1,경기,75000,18000
2,제주,63000,66000
3,부산,42000,55000


In [4]:
# 경기의 total1 값을 NaN으로 변경
data_frame.loc[1, 'total1'] = np.nan
data_frame

Unnamed: 0,city,total1,total2
0,서울,12000.0,15000
1,경기,,18000
2,제주,63000.0,66000
3,부산,42000.0,55000


In [5]:
data_frame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   city    4 non-null      object 
 1   total1  3 non-null      float64
 2   total2  4 non-null      int64  
dtypes: float64(1), int64(1), object(1)
memory usage: 224.0+ bytes


### 결측치 확인

In [7]:
## 결측치 확인 : 마스크
data_frame.isnull()
data_frame.isna()

Unnamed: 0,city,total1,total2
0,False,False,False
1,False,True,False
2,False,False,False
3,False,False,False


In [8]:
# 특정 컬럼에서 결측치 확인
# total1에서 결측치인 행 추출
data_frame.total1.isna()

0    False
1     True
2    False
3    False
Name: total1, dtype: bool

In [9]:
data_frame[data_frame.total1.isna()]
data_frame[data_frame.total1.isnull()]
data_frame[data_frame['total1'].isna()]
data_frame[data_frame['total1'].isnull()]

Unnamed: 0,city,total1,total2
1,경기,,18000


### 결측치 변경

In [10]:
# 결측치를 원하는 값으로 변경
data_frame.fillna(0)

Unnamed: 0,city,total1,total2
0,서울,12000.0,15000
1,경기,0.0,18000
2,제주,63000.0,66000
3,부산,42000.0,55000


In [11]:
data_frame.fillna('결측치')

Unnamed: 0,city,total1,total2
0,서울,12000.0,15000
1,경기,결측치,18000
2,제주,63000.0,66000
3,부산,42000.0,55000


### 결측치가 있는 행 또는 열 삭제

In [12]:
# 결측치가 있는 행 삭제
data_frame.dropna()

Unnamed: 0,city,total1,total2
0,서울,12000.0,15000
2,제주,63000.0,66000
3,부산,42000.0,55000


In [13]:
# 결측치가 있는 열 삭제
data_frame.dropna(axis=1)

Unnamed: 0,city,total2
0,서울,15000
1,경기,18000
2,제주,66000
3,부산,55000


In [14]:
# 모든 값이 결측치인 행 추가
data_frame.loc[len(data_frame)] = np.nan
data_frame

Unnamed: 0,city,total1,total2
0,서울,12000.0,15000.0
1,경기,,18000.0
2,제주,63000.0,66000.0
3,부산,42000.0,55000.0
4,,,


In [16]:
data_frame.dropna(how='all')

Unnamed: 0,city,total1,total2
0,서울,12000.0,15000.0
1,경기,,18000.0
2,제주,63000.0,66000.0
3,부산,42000.0,55000.0
