# **2. Data Frame**

In [1]:
import numpy as np
import pandas as pd

# 정리본

### DataFrame 속성 확인
- **df.info()** : 데이터 프레임의 행과 열 개수, 결측치 개수, dtype 등의 정보를 알려줌

- **df.columns** : 데이터 프레임의 column들의 이름을 list로 반환

- **df.index** : 데이터 프레임의 인덱스를 반환

- **df.head( 숫자 )**, **df.tail( 숫자 )** : 데이터 프레임의 상위 혹은 하위 지정한 숫자의 열을 보여줌(default 5개)

- **df.isnull()**, **df.isna()** : 데이터 프레임의 원소마다 결측치이면 True를 담은 새로운 데이터 프레임을 반환
    <br> cf) df.isnull().sum(axis = 0) : 데이터 프레임을 열 기준으로(axis=0) 결측치 개수를 보여줌
    <br> cf) df.notnull(), df.notna : 결측치이면 False, 결측치가 아니면 True를 담은 새로운 데이터 프레임을 반환

- **df.fillna('결측치 채울 값')** : 데이터 프레임 내의 결측치를 특정 값으로 채워 넣는다

- **df.dropna()** : 데이터 프레임 내의 결측치가 포함된 행을 삭제한다

### DataFrame 데이터 선택
- **df.set_index('특정 column명', inplace=True)** : 특정 column을 데이터 프레임의 index로 설정하고 inplace=True옵션으로 원본 데이터에 반영

- **df.loc['row 이름'], df.iloc[row 번호]** : 데이터 프레임의 특정 행(row)를 선택
    <br> cf) df.loc[ [ 'row1', 'row2' ] ] : row 여러 개를 선택할 때는 리스트로 묶어서 인자로 주어야 한다
    
- **df['column 이름']** : 데이터 프레임의 특정 열(column)을 선택
    <br> cf) df[ df['column'] == 'XXX' ] : 이처럼 df[] 안에 조건을 넣어서 해당 조건에 만족하는 부분만 새로운 데이터 프레임으로 반환 가능

- **df.loc['행 이름', '열 이름']** : 데이터 프레임의 행과 열을 선택

- **df.iloc[행 번호, 열 번호]** : 데이터 프레임의 행과 열을 선택

- **df['열 이름'][행 번호]** : 데이터 프레임의 행과 열을 선택

### DataFrame 데이터 수정
- **df.loc['새로운 행 이름'] = 데이터 값** : 데이터 프레임에 행(row)을 추가

- **df['새로운 열 이름'] = 데이터 값** : 데이터 프레임에 열(column)을 추가

- **df.drop(행 이름, axis = 0)** : 데이터 프레임의 특정 행(row)를 삭제

- **df.drop(열 이름, axis = 1)** : 데이터 프레임의 특정 열(column)을 삭제

- **df.replace( {dictionary} )** : 데이터 프레임의 특정 열 안에서 dictionary에 따라 값을 변환
    <br> cf) df['Legendary'].replace({True : 1, False : 0}, inplace=True) : legendary 열 내의 true는 1로, false는 0으로 대체하고 원본 데이터에 반영

- **df.sort_index(ascending = T/F)** : 데이터 프레임의 기존 index를 기준으로 오름차순(ascending=True), 내림차순 정렬한다

- **df.sort_values(기준 열, ascending = T/F)** : 데이터 프레임의 특정 column을 기준으로 오름차순(ascending=True), 내림차순 정렬한다
    <br> cf) df.sort_values(['Type 1', 'HP'], ascending=[True, True]) : 여러 column을 기준으로 할 때는 리스트로 묶어서 넣는다

### DataFrame 데이터 연산
- **df.describe()** : 데이터 프레임의 기술 통계량을 확인

- **df.apply( 함수 )** : 데이터 프레임에 함수를 적용한다

- **df.groupby( column명 )** : column을 기준으로 동일한 데이터끼리 그룹화한다

### csv 파일 관련
- **df = pd.read_csv( '파일경로/파일명.csv' )** : csv 파일을 읽어와서 df 변수에 저장

- **df.to_csv( '파일경로/파일명.csv' )** : 데이터 프레임 객체를 csv 파일로 저장한다 

### 0) csv 파일 불러오기

In [None]:
# 실습 데이터가 구글 Drive에 있는 경우, gdrive와 연동해야 함
from google.colab import drive
drive.mount('/content/gdrive/')

In [2]:
# pokemon.csv 파일 읽어와서 'df' 변수에 저장
# gdrive에 파일 저장했을 경우, 파일경로: /content/gdrive/My Drive/pokemon.csv
df = pd.read_csv('pokemon.csv')

### 1) DataFrame 속성 확인

In [3]:
# DataFrame 정보(행과 열의 개수, 결측치, 데이터타입 등) 확인
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 800 entries, 0 to 799
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  800 non-null    int64 
 1   #           800 non-null    int64 
 2   Name        800 non-null    object
 3   Type 1      800 non-null    object
 4   Type 2      414 non-null    object
 5   HP          800 non-null    int64 
 6   Attack      800 non-null    int64 
 7   Defense     800 non-null    int64 
 8   Sp. Atk     800 non-null    int64 
 9   Sp. Def     800 non-null    int64 
 10  Speed       800 non-null    int64 
 11  Generation  800 non-null    int64 
 12  Legendary   800 non-null    bool  
dtypes: bool(1), int64(9), object(3)
memory usage: 75.9+ KB


대부분의 column의 경우, 800개의 entries에 대해 결측치가 없지만 Type2의 경우 800 - 414개의 결측치가 있음을 확인할 수 있다  
HP부터 Generation까지 int형 데이터이고 Legendary는 bool 타입이

In [4]:
# DataFrame의 column 확인
df.columns

Index(['Unnamed: 0', '#', 'Name', 'Type 1', 'Type 2', 'HP', 'Attack',
       'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Generation', 'Legendary'],
      dtype='object')

In [5]:
# DataFrame의 index 확인
df.index        # 인덱스가 따로 설정되어 있지 않고 0부터 800미만으로 1씩 증가하게 설정되어 있음을 확인할 수 있다

RangeIndex(start=0, stop=800, step=1)

In [6]:
# DataFrame 속 데이터 상단 5개의 행 출력
df.head(5)  #df.head()와 동일(default로 5개를 출력)

Unnamed: 0.1,Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,3,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,4,4,Charmander,Fire,,39,52,43,60,50,65,1,False


In [7]:
# DataFrame 속 데이터 하단 7개의 행 출력
df.tail(7)

Unnamed: 0.1,Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
793,793,717,Yveltal,Dark,Flying,126,131,95,131,98,99,6,True
794,794,718,Zygarde50% Forme,Dragon,Ground,108,100,121,81,95,95,6,True
795,795,719,Diancie,Rock,Fairy,50,100,150,100,150,50,6,True
796,796,719,DiancieMega Diancie,Rock,Fairy,50,160,110,160,110,110,6,True
797,797,720,HoopaHoopa Confined,Psychic,Ghost,80,110,60,150,130,70,6,True
798,798,720,HoopaHoopa Unbound,Psychic,Dark,80,160,60,170,130,80,6,True
799,799,721,Volcanion,Fire,Water,80,110,120,130,90,70,6,True


- 결측치 확인 및 처리

```
# 결측치 확인
df.isnull()
df.isna()

df.notnull()
df.notna()

# 결측치 처리
df.fillna('결측치 채울 값')
df.dropna()
```



In [8]:
# column별 결측치 수 확인
df.isnull().sum(axis = 0) #isnull까지만 하면 전체 df에 대해 True/False로 반환

Unnamed: 0      0
#               0
Name            0
Type 1          0
Type 2        386
HP              0
Attack          0
Defense         0
Sp. Atk         0
Sp. Def         0
Speed           0
Generation      0
Legendary       0
dtype: int64

In [9]:
# 결측치 'Normal'로 채우기
df.fillna('Normal') #반환이지 df에 결과를 업데이트 하지는 않는다

Unnamed: 0.1,Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,3,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,4,4,Charmander,Fire,Normal,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,795,719,Diancie,Rock,Fairy,50,100,150,100,150,50,6,True
796,796,719,DiancieMega Diancie,Rock,Fairy,50,160,110,160,110,110,6,True
797,797,720,HoopaHoopa Confined,Psychic,Ghost,80,110,60,150,130,70,6,True
798,798,720,HoopaHoopa Unbound,Psychic,Dark,80,160,60,170,130,80,6,True


In [11]:
# 결측치 포함하고 있는 행 삭제
df.dropna()     #800개의 row에서 결측치가 있는 386개의 row를 제거하여 414개의 row만 남은 상황이다

Unnamed: 0.1,Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,3,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
6,6,6,Charizard,Fire,Flying,78,84,78,109,85,100,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,795,719,Diancie,Rock,Fairy,50,100,150,100,150,50,6,True
796,796,719,DiancieMega Diancie,Rock,Fairy,50,160,110,160,110,110,6,True
797,797,720,HoopaHoopa Confined,Psychic,Ghost,80,110,60,150,130,70,6,True
798,798,720,HoopaHoopa Unbound,Psychic,Dark,80,160,60,170,130,80,6,True


### 2) DataFrame 데이터 선택

- 행(row) 선택

```
# loc 사용
df.loc['row 이름']
# iloc 사용
df.iloc[row 번호]
```

In [12]:
# 'Name'열을 index로 지정
df.set_index('Name', inplace = True)       # inplace 옵션은 원본 데이터에 반영할지를 나타낸다
df 

Unnamed: 0_level_0,Unnamed: 0,#,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Bulbasaur,0,1,Grass,Poison,45,49,49,65,65,45,1,False
Ivysaur,1,2,Grass,Poison,60,62,63,80,80,60,1,False
Venusaur,2,3,Grass,Poison,80,82,83,100,100,80,1,False
VenusaurMega Venusaur,3,3,Grass,Poison,80,100,123,122,120,80,1,False
Charmander,4,4,Fire,,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...
Diancie,795,719,Rock,Fairy,50,100,150,100,150,50,6,True
DiancieMega Diancie,796,719,Rock,Fairy,50,160,110,160,110,110,6,True
HoopaHoopa Confined,797,720,Psychic,Ghost,80,110,60,150,130,70,6,True
HoopaHoopa Unbound,798,720,Psychic,Dark,80,160,60,170,130,80,6,True


In [13]:
# 피카츄(Pikachu)행 선택
# loc 사용
df.loc['Pikachu']

Unnamed: 0          30
#                   25
Type 1        Electric
Type 2             NaN
HP                  35
Attack              55
Defense             40
Sp. Atk             50
Sp. Def             50
Speed               90
Generation           1
Legendary        False
Name: Pikachu, dtype: object

In [14]:
# 피카츄(Pikachu)와 라이츄(Raichu)행 선택
# loc 사용
# 2개 이상 행 선택 시, 대괄호 2개 필요!
df.loc[['Pikachu', 'Raichu']]

Unnamed: 0_level_0,Unnamed: 0,#,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Pikachu,30,25,Electric,,35,55,40,50,50,90,1,False
Raichu,31,26,Electric,,60,90,55,90,80,110,1,False


- 열(column) 선택


```
df['column명']
```



In [15]:
# Speed열 선택
df['Speed']

Name
Bulbasaur                 45
Ivysaur                   60
Venusaur                  80
VenusaurMega Venusaur     80
Charmander                65
                        ... 
Diancie                   50
DiancieMega Diancie      110
HoopaHoopa Confined       70
HoopaHoopa Unbound        80
Volcanion                 70
Name: Speed, Length: 800, dtype: int64

In [16]:
# 포켓몬들의 Speed 평균값
df['Speed'].mean()

68.2775

In [17]:
# 포켓몬들의 Speed 중앙값
df['Speed'].median()

65.0

In [18]:
# 'Type 1'열 선택
df['Type 1']

Name
Bulbasaur                  Grass
Ivysaur                    Grass
Venusaur                   Grass
VenusaurMega Venusaur      Grass
Charmander                  Fire
                          ...   
Diancie                     Rock
DiancieMega Diancie         Rock
HoopaHoopa Confined      Psychic
HoopaHoopa Unbound       Psychic
Volcanion                   Fire
Name: Type 1, Length: 800, dtype: object

In [21]:
# Type1이 Water인 데이터 선택 (데이터 필터링)
df[df['Type 1'] == 'Water']

Unnamed: 0_level_0,Unnamed: 0,#,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Squirtle,9,7,Water,,44,48,65,50,64,43,1,False
Wartortle,10,8,Water,,59,63,80,65,80,58,1,False
Blastoise,11,9,Water,,79,83,100,85,105,78,1,False
BlastoiseMega Blastoise,12,9,Water,,79,103,120,135,115,78,1,False
Psyduck,59,54,Water,,50,52,48,65,50,55,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...
Froakie,724,656,Water,,41,56,40,62,44,71,6,False
Frogadier,725,657,Water,,54,63,52,83,56,97,6,False
Greninja,726,658,Water,Dark,72,95,67,103,71,122,6,False
Clauncher,762,692,Water,,50,53,62,58,63,44,6,False


In [22]:
# HP가 50 이상인 데이터 선택 (데이터 필터링)
df[df['HP'] >= 50]

Unnamed: 0_level_0,Unnamed: 0,#,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Ivysaur,1,2,Grass,Poison,60,62,63,80,80,60,1,False
Venusaur,2,3,Grass,Poison,80,82,83,100,100,80,1,False
VenusaurMega Venusaur,3,3,Grass,Poison,80,100,123,122,120,80,1,False
Charmeleon,5,5,Fire,,58,64,58,80,65,80,1,False
Charizard,6,6,Fire,Flying,78,84,78,109,85,100,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...
Diancie,795,719,Rock,Fairy,50,100,150,100,150,50,6,True
DiancieMega Diancie,796,719,Rock,Fairy,50,160,110,160,110,110,6,True
HoopaHoopa Confined,797,720,Psychic,Ghost,80,110,60,150,130,70,6,True
HoopaHoopa Unbound,798,720,Psychic,Dark,80,160,60,170,130,80,6,True


In [24]:
# Type1이 Water이고, HP가 50 이상인 데이터 선택 (데이터 필터링)
df[(df['Type 1'] == 'Water') & (df['HP'] >= 50)]

Unnamed: 0_level_0,Unnamed: 0,#,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Wartortle,10,8,Water,,59,63,80,65,80,58,1,False
Blastoise,11,9,Water,,79,83,100,85,105,78,1,False
BlastoiseMega Blastoise,12,9,Water,,79,103,120,135,115,78,1,False
Psyduck,59,54,Water,,50,52,48,65,50,55,1,False
Golduck,60,55,Water,,80,82,78,95,80,85,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...
KeldeoResolute Forme,714,647,Water,Fighting,91,72,90,129,90,108,5,False
Frogadier,725,657,Water,,54,63,52,83,56,97,6,False
Greninja,726,658,Water,Dark,72,95,67,103,71,122,6,False
Clauncher,762,692,Water,,50,53,62,58,63,44,6,False


- 행과 열 선택
```
# loc 사용 (행 먼저)
df.loc['행 이름', '열 이름']
# iloc 사용 (행 먼저)
df.iloc[행 번호, 열 번호]
# column명 사용 (열 먼저)
df['열 이름'][행 번호]
```

In [25]:
# 모든 포켓몬들의 Type 1, Type 2만 선택
# loc 사용
df.loc[:, 'Type 1' : 'Type 2'] #행은 전체(:) 선택, 열은 Type1,2만 선택

Unnamed: 0_level_0,Type 1,Type 2
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Bulbasaur,Grass,Poison
Ivysaur,Grass,Poison
Venusaur,Grass,Poison
VenusaurMega Venusaur,Grass,Poison
Charmander,Fire,
...,...,...
Diancie,Rock,Fairy
DiancieMega Diancie,Rock,Fairy
HoopaHoopa Confined,Psychic,Ghost
HoopaHoopa Unbound,Psychic,Dark


In [26]:
# index번호가 2부터 10까지인 포켓몬들의 HP(4) 선택
# iloc 사용
df.iloc[2:11, 4] #2이상 11미만(10까지)의 행과 4번 인덱스의 열(5번째 열)인 HP만을 출력

Name
Venusaur                     80
VenusaurMega Venusaur        80
Charmander                   39
Charmeleon                   58
Charizard                    78
CharizardMega Charizard X    78
CharizardMega Charizard Y    78
Squirtle                     44
Wartortle                    59
Name: HP, dtype: int64

In [27]:
# 처음 10개 포켓몬들의 HP(4)부터 speed(9) 데이터 선택
# iloc 사용
df.iloc[:10, 4:10]

Unnamed: 0_level_0,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bulbasaur,45,49,49,65,65,45
Ivysaur,60,62,63,80,80,60
Venusaur,80,82,83,100,100,80
VenusaurMega Venusaur,80,100,123,122,120,80
Charmander,39,52,43,60,50,65
Charmeleon,58,64,58,80,65,80
Charizard,78,84,78,109,85,100
CharizardMega Charizard X,78,130,111,130,85,100
CharizardMega Charizard Y,78,104,78,159,115,100
Squirtle,44,48,65,50,64,43


### 3) DataFrame 데이터 수정

- 행/열 추가 및 삭제

```
# 행 추가
df.loc['새로운 행 이름'] = 데이터 값
# 열 추가
df['새로운 열 이름'] = 데이터 값
```


```
# 행 삭제
df.drop(행 이름, axis = 0)
# 열 삭제
df.drop(열 이름, axis = 1)
```




In [28]:
# 'Unnamed: 0'column 삭제
# 원본 df에 수정사항 저장
df.drop('Unnamed: 0', axis=1, inplace=True) 

In [29]:
df

Unnamed: 0_level_0,#,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Bulbasaur,1,Grass,Poison,45,49,49,65,65,45,1,False
Ivysaur,2,Grass,Poison,60,62,63,80,80,60,1,False
Venusaur,3,Grass,Poison,80,82,83,100,100,80,1,False
VenusaurMega Venusaur,3,Grass,Poison,80,100,123,122,120,80,1,False
Charmander,4,Fire,,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...
Diancie,719,Rock,Fairy,50,100,150,100,150,50,6,True
DiancieMega Diancie,719,Rock,Fairy,50,160,110,160,110,110,6,True
HoopaHoopa Confined,720,Psychic,Ghost,80,110,60,150,130,70,6,True
HoopaHoopa Unbound,720,Psychic,Dark,80,160,60,170,130,80,6,True


In [40]:
# HP, Attack, Defense, Sp. Atk, Sp. Def, Speed 모두 더하여
# 'Total' 이름의 column 추가
df['Total'] = df.iloc[:, 3:9].sum(axis=1)       #df['Total'] = df['HP'] + df['Attack'] ... + df['Speed']와 동일한 결과의 코드
df

Unnamed: 0_level_0,#,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Bulbasaur,1,Grass,Poison,45,49,49,65,65,45,1,False,318
Ivysaur,2,Grass,Poison,60,62,63,80,80,60,1,False,405
Venusaur,3,Grass,Poison,80,82,83,100,100,80,1,False,525
VenusaurMega Venusaur,3,Grass,Poison,80,100,123,122,120,80,1,False,625
Charmander,4,Fire,,39,52,43,60,50,65,1,False,309
...,...,...,...,...,...,...,...,...,...,...,...,...
Diancie,719,Rock,Fairy,50,100,150,100,150,50,6,True,600
DiancieMega Diancie,719,Rock,Fairy,50,160,110,160,110,110,6,True,700
HoopaHoopa Confined,720,Psychic,Ghost,80,110,60,150,130,70,6,True,600
HoopaHoopa Unbound,720,Psychic,Dark,80,160,60,170,130,80,6,True,680


- 특정 데이터값 다른 값으로 대체 (replace 함수)

In [None]:
# 목표: Legendary 포켓몬 개수 세기

In [42]:
# 1단계) Legendary column에서 True는 1로, False는 0으로 바꾸기
# 원본 df에 수정사항 저장
df['Legendary'].replace({True : 1, False : 0}, inplace=True)
df

Unnamed: 0_level_0,#,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Bulbasaur,1,Grass,Poison,45,49,49,65,65,45,1,0,318
Ivysaur,2,Grass,Poison,60,62,63,80,80,60,1,0,405
Venusaur,3,Grass,Poison,80,82,83,100,100,80,1,0,525
VenusaurMega Venusaur,3,Grass,Poison,80,100,123,122,120,80,1,0,625
Charmander,4,Fire,,39,52,43,60,50,65,1,0,309
...,...,...,...,...,...,...,...,...,...,...,...,...
Diancie,719,Rock,Fairy,50,100,150,100,150,50,6,1,600
DiancieMega Diancie,719,Rock,Fairy,50,160,110,160,110,110,6,1,700
HoopaHoopa Confined,720,Psychic,Ghost,80,110,60,150,130,70,6,1,600
HoopaHoopa Unbound,720,Psychic,Dark,80,160,60,170,130,80,6,1,680


In [43]:
# 2단계) Legendary column 세로로 더하기
df['Legendary'].sum(axis=0)

65

- 데이터 정렬


```
# index 기준 정렬
df.sort_index(ascending = T/F)

# value 기준 정렬
df.sort_values(기준 열, ascending = T/F)
```



In [44]:
# 포켓몬 이름 알파벳 순서로 정렬
df.sort_index(ascending=True) # 이미 index는 포켓몬 이름으로 설정되어 있는 상태, 원본 데이터를 업데이트 한 것은 아니고 반환한 상태

Unnamed: 0_level_0,#,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Abomasnow,460,Grass,Ice,90,92,75,92,85,60,4,0,494
AbomasnowMega Abomasnow,460,Grass,Ice,90,132,105,132,105,30,4,0,594
Abra,63,Psychic,,25,20,15,105,55,90,1,0,310
Absol,359,Dark,,65,130,60,75,60,75,3,0,465
AbsolMega Absol,359,Dark,,65,150,60,115,60,115,3,0,565
...,...,...,...,...,...,...,...,...,...,...,...,...
Zoroark,571,Dark,,60,105,60,120,60,105,5,0,510
Zorua,570,Dark,,40,65,40,80,40,65,5,0,330
Zubat,41,Poison,Flying,40,45,35,30,40,55,1,0,245
Zweilous,634,Dark,Dragon,72,85,70,65,70,58,5,0,420


In [46]:
# Attack 높은 순서로 정렬
df.sort_values('Attack', ascending=False)

Unnamed: 0_level_0,#,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
MewtwoMega Mewtwo X,150,Psychic,Fighting,106,190,100,154,100,130,1,1,780
HeracrossMega Heracross,214,Bug,Fighting,80,185,115,40,105,75,2,0,600
GroudonPrimal Groudon,383,Ground,Fire,100,180,160,150,90,90,3,1,770
RayquazaMega Rayquaza,384,Dragon,Flying,105,180,100,180,100,115,3,1,780
DeoxysAttack Forme,386,Psychic,,50,180,20,180,20,150,3,1,600
...,...,...,...,...,...,...,...,...,...,...,...,...
Magikarp,129,Water,,20,10,55,15,20,80,1,0,200
Blissey,242,Normal,,255,10,10,75,135,55,2,0,540
Shuckle,213,Bug,Rock,20,10,230,10,230,5,2,0,505
Chansey,113,Normal,,250,5,5,35,105,50,1,0,450


In [47]:
# 동일한 Type 1끼리 정렬
df.sort_values('Type 1')

Unnamed: 0_level_0,#,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Sewaddle,540,Bug,Grass,45,53,70,40,60,42,5,0,310
Pinsir,127,Bug,,65,125,100,55,70,85,1,0,500
Burmy,412,Bug,,40,29,45,29,45,36,4,0,224
Scyther,123,Bug,Flying,70,110,80,55,80,105,1,0,500
Joltik,595,Bug,Electric,50,47,50,57,50,65,5,0,319
...,...,...,...,...,...,...,...,...,...,...,...,...
Totodile,158,Water,,50,65,64,44,48,43,2,0,314
Basculin,550,Water,,70,92,65,80,55,98,5,0,460
Vaporeon,134,Water,,130,65,60,110,95,65,1,0,525
Panpour,515,Water,,50,53,48,53,48,64,5,0,316


In [49]:
# 동일한 Type 1끼리 정렬, 동일한 Type 1내에서는 HP 오름차순으로 정렬
df.sort_values('HP', ascending=True).sort_values('Type 1')

Unnamed: 0_level_0,#,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Shedinja,292,Bug,Ghost,1,90,45,30,30,40,3,0,236
WormadamPlant Cloak,413,Bug,Grass,60,59,85,79,105,36,4,0,424
Beautifly,267,Bug,Flying,60,70,50,100,50,65,3,0,395
Scolipede,545,Bug,Poison,60,100,89,55,69,112,5,0,485
Butterfree,12,Bug,Flying,60,45,50,90,80,70,1,0,395
...,...,...,...,...,...,...,...,...,...,...,...,...
Shellos,422,Water,,76,48,48,57,62,34,4,0,325
Blastoise,9,Water,,79,83,100,85,105,78,1,0,530
BlastoiseMega Blastoise,9,Water,,79,103,120,135,115,78,1,0,630
Palpitoad,536,Water,Ground,75,65,55,65,55,69,5,0,384


In [50]:
df.sort_values(['Type 1', 'HP'], ascending=[True, True]) #위와 동일한 결과를 출력하는 코드

Unnamed: 0_level_0,#,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Shedinja,292,Bug,Ghost,1,90,45,30,30,40,3,0,236
Shuckle,213,Bug,Rock,20,10,230,10,230,5,2,0,505
Combee,415,Bug,Flying,30,30,42,30,42,70,4,0,244
Venipede,543,Bug,Poison,30,45,59,30,39,57,5,0,260
Nincada,290,Bug,Ground,31,45,90,30,30,40,3,0,266
...,...,...,...,...,...,...,...,...,...,...,...,...
Lapras,131,Water,Ice,130,85,80,85,95,60,1,0,535
Vaporeon,134,Water,,130,65,60,110,95,65,1,0,525
Wailmer,320,Water,,130,70,35,70,35,60,3,0,400
Alomomola,594,Water,,165,75,80,40,45,65,5,0,470


### 4) DataFrame 데이터 연산

- 데이터 기술통계량 확인

In [51]:
df.describe()

Unnamed: 0,#,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total
count,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0
mean,362.81375,69.25875,79.00125,73.8425,72.82,71.9025,68.2775,3.32375,0.08125,435.1025
std,208.343798,25.534669,32.457366,31.183501,32.722294,27.828916,29.060474,1.66129,0.27339,119.96304
min,1.0,1.0,5.0,5.0,10.0,20.0,5.0,1.0,0.0,180.0
25%,184.75,50.0,55.0,50.0,49.75,50.0,45.0,2.0,0.0,330.0
50%,364.5,65.0,75.0,70.0,65.0,70.0,65.0,3.0,0.0,450.0
75%,539.25,80.0,100.0,90.0,95.0,90.0,90.0,5.0,0.0,515.0
max,721.0,255.0,190.0,230.0,194.0,230.0,180.0,6.0,1.0,780.0


- 수치 데이터에 함수 적용 (apply 함수)

In [None]:
# 목표: HP가 50미만인 포켓몬들에게 HP+20, Attack+20, Defense+20 하기

In [54]:
# 단계1) HP가 50미만인 포켓몬 선택, low_hp 변수에 저장
low_hp = df[df['HP'] < 50][['HP', 'Attack', 'Defense']]
low_hp

Unnamed: 0_level_0,HP,Attack,Defense
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bulbasaur,45,49,49
Charmander,39,52,43
Squirtle,44,48,65
Caterpie,45,30,35
Weedle,40,35,30
...,...,...,...
Goomy,45,50,35
Phantump,43,70,48
PumpkabooAverage Size,49,66,70
PumpkabooSmall Size,44,66,70


In [55]:
# 단계2) x를 넣으면 x+20을 해주는 함수 f 만들기
f = lambda x : x + 20

In [56]:
# 단계3) 함수 f를 low_hp에 적용
low_hp.apply(f)

Unnamed: 0_level_0,HP,Attack,Defense
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bulbasaur,65,69,69
Charmander,59,72,63
Squirtle,64,68,85
Caterpie,65,50,55
Weedle,60,55,50
...,...,...,...
Goomy,65,70,55
Phantump,63,90,68
PumpkabooAverage Size,69,86,90
PumpkabooSmall Size,64,86,90


- 데이터 그룹화 (groupby 함수)

In [57]:
# Type 1을 기준으로 데이터 그룹화, 그룹별 데이터 개수 확인
df.groupby(['Type 1']).count()

Unnamed: 0_level_0,#,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total
Type 1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Bug,69,52,69,69,69,69,69,69,69,69,69
Dark,31,21,31,31,31,31,31,31,31,31,31
Dragon,32,21,32,32,32,32,32,32,32,32,32
Electric,44,17,44,44,44,44,44,44,44,44,44
Fairy,17,2,17,17,17,17,17,17,17,17,17
Fighting,27,7,27,27,27,27,27,27,27,27,27
Fire,52,24,52,52,52,52,52,52,52,52,52
Flying,4,2,4,4,4,4,4,4,4,4,4
Ghost,32,22,32,32,32,32,32,32,32,32,32
Grass,70,37,70,70,70,70,70,70,70,70,70


In [59]:
# Type 1을 기준으로 데이터 그룹화, 그룹별 데이터 값의 합 확인
df.groupby(['Type 1']).sum()

Unnamed: 0_level_0,#,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total
Type 1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Bug,23080,FlyingPoisonPoisonPoisonPoisonGrassGrassPoison...,3925,4897,4880,3717,4471,4256,222,0,26146
Dark,14302,FlyingIceFireFireFireGhostGhostFlyingIceFighti...,2071,2740,2177,2314,2155,2361,125,2,13818
Dragon,15180,FlyingFlyingFairyFlyingFlyingPsychicPsychicPsy...,2666,3588,2764,3099,2843,2657,124,12,17617
Electric,15994,SteelSteelFlyingDragonSteelGhostFireWaterIceFl...,2631,3040,2917,3961,3243,3718,144,4,19510
Fairy,7642,FlyingFlying,1260,1046,1117,1335,1440,826,70,1,7024
Fighting,9824,PsychicPsychicPsychicSteelSteelDarkFlying,1886,2613,1780,1434,1747,1784,91,0,11244
Fire,17025,FlyingDragonFlyingFlyingRockFlyingFightingFigh...,3635,4408,3524,4627,3755,3871,167,5,23820
Flying,2711,DragonDragon,283,315,265,377,290,410,22,2,1940
Ghost,15568,PoisonPoisonPoisonPoisonFlyingFlyingDarkDragon...,2062,2361,2598,2539,2447,2059,134,2,14066
Grass,24141,PoisonPoisonPoisonPoisonPoisonPoisonPoisonPois...,4709,5125,4956,5425,4930,4335,235,3,29480


### 5) csv 파일로 저장

- 파일 저장하기

```
df.to_csv('파일경로/파일이름.csv')
```



In [60]:
# df 객체를 'pokemon_analysis.csv'로 저장
df.to_csv('pokemon_analysis.csv')