In [1]:
import pandas as pd

In [2]:
data = {
    'city':['서울', '경기', '제주', '부산'],
    'total1': [12000, 75000, 63000, 42000],
    'total2': [15000, 18000, 66000, 55000]
}

# 데이터프레임 생성
data_frame = pd.DataFrame(data)
data_frame

Unnamed: 0,city,total1,total2
0,서울,12000,15000
1,경기,75000,18000
2,제주,63000,66000
3,부산,42000,55000


In [3]:
# 데이터프레임 dtype
data_frame.dtypes

city      object
total1     int64
total2     int64
dtype: object

## dtype 변경

### apply 함수 
- [문법] 데이터프레임['컬럼명'].apply(파이썬기본자료형)

In [4]:
# total1의 dtype 변경
# int64 -> object
data_frame.total1.dtype

dtype('int64')

In [4]:
# 복사본에만 적용
data_frame.total1.apply(str)[0] 

'12000'

In [5]:
data_frame.dtypes

city      object
total1     int64
total2     int64
dtype: object

In [6]:
# 원본 적용
data_frame.total1 = data_frame.total1.apply(str)
data_frame.dtypes

city      object
total1    object
total2     int64
dtype: object

In [7]:
# total1, total2의 dtype을 float으로 변경 => 동시에 여러 개 못 바꿈
# pd.to_numeric : 숫자를 알아서 타입을 바꿔줌
data_frame[['total1', 'total2']].apply(pd.to_numeric)
data_frame.dtypes

city      object
total1    object
total2     int64
dtype: object

### astype 함수
- [문법] 
    - 기본 자료형은 그냥 넣고 아닐 때는 문자열 형태로 넣기
    - 시리즈.astype('type')
    - 데이터프레임.astype({컬럼명1: 'type', 컬럼명2:'type'})

In [8]:
data_frame.dtypes

city      object
total1    object
total2     int64
dtype: object

In [9]:
data_frame.total1.astype(float)

0    12000.0
1    75000.0
2    63000.0
3    42000.0
Name: total1, dtype: float64

In [13]:
pd.Series([1, 3, 5]).astype('float16')

0    1.0
1    3.0
2    5.0
dtype: float16

In [12]:
# infinite : 무한
# 표현할 수 있는 값의 범위를 벗어나서 변환이 제대로 안 됨
data_frame.total1.astype('float16')

  return arr.astype(dtype, copy=True)


0    12000.0
1        inf
2    63008.0
3    41984.0
Name: total1, dtype: float16

In [10]:
data_frame.total1.astype('float32')

0    12000.0
1    75000.0
2    63000.0
3    42000.0
Name: total1, dtype: float32

In [11]:
data_frame.total1.astype('float64')

0    12000.0
1    75000.0
2    63000.0
3    42000.0
Name: total1, dtype: float64

In [14]:
data_frame.total1.astype(int)

0    12000
1    75000
2    63000
3    42000
Name: total1, dtype: int32

In [15]:
data_frame.total1.astype('int8')

0   -32
1    -8
2    24
3    16
Name: total1, dtype: int8

In [16]:
data_frame.total1.astype('int16')

0    12000
1     9464
2    -2536
3   -23536
Name: total1, dtype: int16

In [17]:
data_frame.total1.astype('int32')

0    12000
1    75000
2    63000
3    42000
Name: total1, dtype: int32

In [18]:
data_frame.total1.astype('int64')

0    12000
1    75000
2    63000
3    42000
Name: total1, dtype: int64

In [20]:
type(data_frame.total1.astype('int64')[0])

numpy.int64

In [21]:
data_frame.astype({'total1': float, 'total2': float})

Unnamed: 0,city,total1,total2
0,서울,12000.0,15000.0
1,경기,75000.0,18000.0
2,제주,63000.0,66000.0
3,부산,42000.0,55000.0


In [22]:
data_frame.astype({'total1': float, 'total2': float}).dtypes

city       object
total1    float64
total2    float64
dtype: object

In [24]:
data_frame.astype({'total1':'float32', 'total2': str})

Unnamed: 0,city,total1,total2
0,서울,12000.0,15000
1,경기,75000.0,18000
2,제주,63000.0,66000
3,부산,42000.0,55000


In [23]:
data_frame.astype({'total1':'float32', 'total2': str}).dtypes

city       object
total1    float32
total2     object
dtype: object

In [29]:
# 경기의 total1의 값을 없음으로 변경
data_frame.loc[1, 'total1'] = '없음'
data_frame

Unnamed: 0,city,total1,total2
0,서울,12000,15000
1,경기,없음,18000
2,제주,63000,66000
3,부산,42000,55000


In [30]:
data_frame.total1.dtype

dtype('O')

In [33]:
# errors='coerce' : 숫자가 아닌 값을 결측치로 변환해줌
data_frame.total1.apply(pd.to_numeric, errors='coerce')

0    12000.0
1        NaN
2    63000.0
3    42000.0
Name: total1, dtype: float64