# DataFrame 모양(shape) 변경
* wide(column) -> long(row)
* long(row) -> wide(column)

1. stack / unstack
2. pivot / melt
3. pivot_table

In [1]:
import numpy as np
import pandas as pd

# pivot / melt

In [2]:
df = pd.DataFrame(data = {'A' : ['one'] * 3 + ['two'] * 3,
                          'B' : ['a', 'b', 'c'] * 2,
                          'C' : np.arange(1, 7),
                          'D' : np.arange(2, 13, 2)})
df

Unnamed: 0,A,B,C,D
0,one,a,1,2
1,one,b,2,4
2,one,c,3,6
3,two,a,4,8
4,two,b,5,10
5,two,c,6,12


## pivot()

**`pivot()` 메서드의 parameter**
* **index** : 원본 데이터프레임을 pivoting 할 때, row index로 사용할 컬럼들의 이름
* **columns** : 원본 데이터프레임을 pivoting 할 때, column 이름으로 사용할 컬럼들의 이름
* **values** : 원본 데이터프레임을 pivoting 할 때, 각 셀에 저장될 값으로 사용할 컬럼들의 이름

### example 1

In [4]:
# row index로 A컬럼을 쓰고, B컬럼은 컬럼으로 남겨두고, C는 각 셀에 저장될 값으로 사용함.
pivoted1 = df.pivot(index = 'A', columns = 'B', values = 'C')
pivoted1

B,a,b,c
A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,1,2,3
two,4,5,6


In [5]:
# row index로 A컬럼, B는 컬럼으로 존속, D를 각 셀에 저장될 값으로 사용
pivoted2 = df.pivot(index = 'A', columns = 'B', values = 'D')
pivoted2

B,a,b,c
A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,2,4,6
two,8,10,12


In [6]:
df

Unnamed: 0,A,B,C,D
0,one,a,1,2
1,one,b,2,4
2,one,c,3,6
3,two,a,4,8
4,two,b,5,10
5,two,c,6,12


In [7]:
pivoted3 = df.pivot(index = 'A', columns = 'B', values = ['C', 'D'])
pivoted3

Unnamed: 0_level_0,C,C,C,D,D,D
B,a,b,c,a,b,c
A,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
one,1,2,3,2,4,6
two,4,5,6,8,10,12


### example 2

In [8]:
countries = ['Korea', 'Korea', 'China', 'China', 'GB', 'GB', 'US', 'US']
continents = ['Asia'] * 4 + ['Europe'] * 2 + ['America'] * 2
years = [2020, 2021] * 4

df = pd.DataFrame(data = {
    'country' : countries,
    'continent' : continents,
    'year' : years,
    'pop' : np.random.randint(10000, size = 8),
    'gdp' : np.random.rand(8)
})

In [9]:
df

Unnamed: 0,country,continent,year,pop,gdp
0,Korea,Asia,2020,3113,0.422755
1,Korea,Asia,2021,1520,0.489921
2,China,Asia,2020,3407,0.023101
3,China,Asia,2021,3916,0.976658
4,GB,Europe,2020,7763,0.545716
5,GB,Europe,2021,693,0.225013
6,US,America,2020,1939,0.66351
7,US,America,2021,4525,0.481227


In [10]:
# 국가별 연도별 인구
df.pivot(index = 'country', columns = 'year', values = 'pop')

year,2020,2021
country,Unnamed: 1_level_1,Unnamed: 2_level_1
China,3407,3916
GB,7763,693
Korea,3113,1520
US,1939,4525


In [12]:
# 국가별 연도별 인구, gdp
df.pivot(index = 'country', columns = 'year', values = ['pop', 'gdp'])

Unnamed: 0_level_0,pop,pop,gdp,gdp
year,2020,2021,2020,2021
country,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
China,3407.0,3916.0,0.023101,0.976658
GB,7763.0,693.0,0.545716,0.225013
Korea,3113.0,1520.0,0.422755,0.489921
US,1939.0,4525.0,0.66351,0.481227


In [14]:
# 연도별(index), 국가별(column), 대륙별(column) pop, gdp
df.pivot(index = 'year', columns = ['country', 'continent'], values = ['pop', 'gdp'])

Unnamed: 0_level_0,pop,pop,pop,pop,gdp,gdp,gdp,gdp
country,Korea,China,GB,US,Korea,China,GB,US
continent,Asia,Asia,Europe,America,Asia,Asia,Europe,America
year,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3
2020,3113.0,3407.0,7763.0,1939.0,0.422755,0.023101,0.545716,0.66351
2021,1520.0,3916.0,693.0,4525.0,0.489921,0.976658,0.225013,0.481227


### example 3

In [15]:
df = pd.DataFrame(data = {
    'gender' : ['Male', 'Female'] * 3,
    'time' : ['lunch'] * 3 + ['Dinner'] * 3,
    'day' : ['Fri', 'Sat', 'Sun'] * 2,
    'bill' : np.arange(10, 70, 10),
    'tip' : np.arange(1, 7)
})
df

Unnamed: 0,gender,time,day,bill,tip
0,Male,lunch,Fri,10,1
1,Female,lunch,Sat,20,2
2,Male,lunch,Sun,30,3
3,Female,Dinner,Fri,40,4
4,Male,Dinner,Sat,50,5
5,Female,Dinner,Sun,60,6


In [16]:
df.pivot(index = 'gender', columns = 'day', values = 'bill')

day,Fri,Sat,Sun
gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,40,20,60
Male,10,50,30


In [17]:
df.pivot(index = 'gender', columns = 'day', values = 'bill')

day,Fri,Sat,Sun
gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,40,20,60
Male,10,50,30


In [18]:
df.pivot(index = ['gender', 'time'], columns = 'day', values = 'bill')

Unnamed: 0_level_0,day,Fri,Sat,Sun
gender,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,Dinner,40.0,,60.0
Female,lunch,,20.0,
Male,Dinner,,50.0,
Male,lunch,10.0,,30.0


In [19]:
df.pivot(index = 'gender', columns = ['day', 'time'], values = 'bill')

day,Fri,Sat,Sun,Fri,Sat,Sun
time,lunch,lunch,lunch,Dinner,Dinner,Dinner
gender,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Female,,20.0,,40.0,,60.0
Male,10.0,,30.0,,50.0,


## melt()

* **`melt()` 메서드의 parameter**

* **id_vars** : melting 시, unpivot되지 않을 컬럼의 이름들
* **var_name** : 변수(variable) 컬럼의 이름 설정
* **value_name** : 값(value) 컬럼의 이름 설정
* **value_vars** : melting할 컬럼들의 ㅇ리ㅡㅁ

In [20]:
df = pd.DataFrame(data = {
    'gender' : ['Female', 'Male'],
    'Breakfast' : [0, 0],
    'Lunch' : [1, 3],
    'Dinner' : [2, 4]
})
df

Unnamed: 0,gender,Breakfast,Lunch,Dinner
0,Female,0,1,2
1,Male,0,3,4


In [21]:
# gender컬럼만 unpivot하지 않음. 나머지는 모두 unpivot
df.melt(id_vars = 'gender')

Unnamed: 0,gender,variable,value
0,Female,Breakfast,0
1,Male,Breakfast,0
2,Female,Lunch,1
3,Male,Lunch,3
4,Female,Dinner,2
5,Male,Dinner,4


In [22]:
# gender컬럼만 unpivot하지 않음. 나머지는 모두 unpivot. 새로 만들어진 컬럼의 이름은 time,  값의 이름은 size로
df.melt(id_vars = 'gender', var_name = 'time', value_name = 'size')

Unnamed: 0,gender,time,size
0,Female,Breakfast,0
1,Male,Breakfast,0
2,Female,Lunch,1
3,Male,Lunch,3
4,Female,Dinner,2
5,Male,Dinner,4


In [23]:
df.melt(id_vars = 'gender', value_vars = ['Lunch', 'Dinner'],
        var_name = 'time', value_name = 'count')

Unnamed: 0,gender,time,count
0,Female,Lunch,1
1,Male,Lunch,3
2,Female,Dinner,2
3,Male,Dinner,4
