### Pivot

- 데이터 프레임 컬럼데이터에서 특정 컬럼의 데이터를 index, columns, values로 선택하여 프레임을 만드는 방법 
- `df.pivot(index, columns, values)`
- 특정 데이터고 다른 데이터 값의 관계에 대해서 알 수 있도록 한다

In [1]:
import pandas as np
import pandas as pd

In [2]:
# 타이타닉 데이터

titanic = pd.read_csv("train.csv")
titanic.tail()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


In [35]:
# 중복 제거 삭제를 위한 groupby

titanic_df1 = titanic.groupby(["Sex", "Pclass"]).size().reset_index(name="Counts")
titanic_df1

Unnamed: 0,Sex,Pclass,Counts
0,female,1,94
1,female,2,76
2,female,3,144
3,male,1,122
4,male,2,108
5,male,3,347


In [38]:
titanic_df1 = titanic_df1.pivot("Sex", "Pclass", "Counts") # Index, Column, Value 순으로 나온다
titanic_df1

Pclass,1,2,3
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,94,76,144
male,122,108,347


In [39]:
# 생존 데이터
# 성별에 따른 생존자 수를 나타내는 데이터프레임 만들기

titanic_df2 = titanic.groupby(["Sex", "Survived"]).size().reset_index(name = "Counts")
titanic_df2

Unnamed: 0,Sex,Survived,Counts
0,female,0,81
1,female,1,233
2,male,0,468
3,male,1,109


In [40]:
titanic_df2 = titanic_df2.pivot("Sex", "Survived", "Counts")
titanic_df2

Survived,0,1
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1
female,81,233
male,468,109


In [34]:
# 객실에 따른 생존자 수 찾기

df3 = titanic.groupby(["Pclass", "Survived"]).size().reset_index(name = "Counts")
df3

Unnamed: 0,Pclass,Survived,Counts
0,1,0,80
1,1,1,136
2,2,0,97
3,2,1,87
4,3,0,372
5,3,1,119


In [31]:
df3 = df3.pivot("Pclass", "Survived", "Counts")
df3

Survived,0,1
Pclass,Unnamed: 1_level_1,Unnamed: 2_level_1
1,80,136
2,97,87
3,372,119


#### pivot table

- `pivot_table(values, index, columns, aggfunc)`
- fill_value : NaN 데이터를, 설정한 데이터로 치환하는 parameter
- dropna(True) : NaN 데이터 Column을 놔둘지 제거할지 결정할 때 사용한다. 전체 Column or Row가 NaN이면 전부 다 사라진다.

In [7]:
titanic["Counts"] = 1

In [8]:
titanic.tail()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Counts
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,,S,1
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S,1
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S,1
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C,1
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q,1


In [9]:
# 객실 등급에 따른 남녀 승객수

titanic.pivot_table("Counts", ["Sex"], ["Pclass"], aggfunc = sum)

Pclass,1,2,3
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,94,76,144
male,122,108,347


In [10]:
# 성별에 따른 생존자 수

titanic.pivot_table("Counts", ["Sex", "Pclass"], ["Survived"], aggfunc = sum)

Unnamed: 0_level_0,Survived,0,1
Sex,Pclass,Unnamed: 2_level_1,Unnamed: 3_level_1
female,1,3,91
female,2,6,70
female,3,72,72
male,1,77,45
male,2,91,17
male,3,300,47


In [11]:
# 좌석 등급에 따른 생존자 수

titanic.pivot_table("Counts", ["Pclass"], ["Survived"], aggfunc = sum)

Survived,0,1
Pclass,Unnamed: 1_level_1,Unnamed: 2_level_1
1,80,136
2,97,87
3,372,119


In [12]:
#
df = titanic.pivot_table("Counts", ["Survived"], ["Sex"], aggfunc = sum)
df

Sex,female,male
Survived,Unnamed: 1_level_1,Unnamed: 2_level_1
0,81,468
1,233,109


In [13]:
# total 추가하기 (1) : column

df["total"] = df["female"] + df["male"]
df

Sex,female,male,total
Survived,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,81,468,549
1,233,109,342


In [14]:
# total 추가하기 (2) : row

df.loc["total"] = df.loc[0] + df.loc[1]
df

Sex,female,male,total
Survived,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,81,468,549
1,233,109,342
total,314,577,891


In [15]:
# total 삭제하기

df.drop("total", inplace = True) # row 삭제하기
df.drop("total", axis = 1, inplace = True) # column 삭제하기

In [78]:
# fill_value

df = titanic.pivot_table("Counts", ["Parch", "Pclass"], ["Survived"], aggfunc = sum, \
                         fill_value = 0, dropna = False)
df

Unnamed: 0_level_0,Survived,0,1
Parch,Pclass,Unnamed: 2_level_1,Unnamed: 3_level_1
0,1,64,99
0,2,86,48
0,3,295,86
1,1,10,21
1,2,8,24
1,3,35,20
2,1,5,16
2,2,3,13
2,3,32,11
3,1,0,0
