pivot table

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

In [2]:
data = {
    "도시": ["서울", "서울", "서울", "부산", "부산", "부산", "인천", "인천"],
    "연도": ["2015", "2010", "2005", "2015", "2010", "2005", "2015", "2010"],
    "인구": [9904312, 9631482, 9762546, 3448737, 3393191, 3512547, 2890451, 263203],
    "지역": ["수도권", "수도권", "수도권", "경상권", "경상권", "경상권", "수도권", "수도권"]
}
columns = ["도시", "연도", "인구", "지역"]
df1 = pd.DataFrame(data, columns=columns)
df1

Unnamed: 0,도시,연도,인구,지역
0,서울,2015,9904312,수도권
1,서울,2010,9631482,수도권
2,서울,2005,9762546,수도권
3,부산,2015,3448737,경상권
4,부산,2010,3393191,경상권
5,부산,2005,3512547,경상권
6,인천,2015,2890451,수도권
7,인천,2010,263203,수도권


In [3]:
df1.pivot(index='도시', values='인구', columns='연도')

연도,2005,2010,2015
도시,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
부산,3512547.0,3393191.0,3448737.0
서울,9762546.0,9631482.0,9904312.0
인천,,263203.0,2890451.0


In [4]:
df1.pivot(index=['지역', '도시'], values='인구', columns='연도',)

Unnamed: 0_level_0,연도,2005,2010,2015
지역,도시,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
경상권,부산,3512547.0,3393191.0,3448737.0
수도권,서울,9762546.0,9631482.0,9904312.0
수도권,인천,,263203.0,2890451.0


pivot table 실습

In [6]:
tips = sns.load_dataset('tips')
iris = sns.load_dataset('iris')

In [7]:
# tips_pct 컬럼생성
tips['tips_pct'] = (tips.tip / tips.total_bill * 100).round(2)

In [9]:
# 성별, 흡연유무별 팁의 비율
tips.pivot_table('tips_pct', 'sex', 'smoker', 'mean')

smoker,Yes,No
sex,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,15.276667,16.066598
Female,18.214545,15.691111


In [14]:
# 성별, 흡연유무별 인원수
tips.pivot_table('tips_pct', 'sex', 'smoker', 'count', margins=True, margins_name='합계')

smoker,Yes,No,합계
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,60,97,157
Female,33,54,87
합계,93,151,244


groupby VS. pivot_table

In [16]:
# 성별 팁 비율의 평균
print(tips.groupby('sex')['tips_pct'].mean())
print(tips.pivot_table('tips_pct', 'sex', aggfunc='mean'))

sex
Male      15.764713
Female    16.648276
Name: tips_pct, dtype: float64
         tips_pct
sex              
Male    15.764713
Female  16.648276


In [17]:
print(tips.pivot_table('tips_pct', 'sex')) # aggfunc의 default는 mean 함수다

         tips_pct
sex              
Male    15.764713
Female  16.648276


In [18]:
# 흡연 유무에 따른 평균 팁 비율과 최소값 최대값
print(tips.groupby('smoker')['tips_pct'].agg(['mean', 'min', 'max']))
print(tips.pivot_table('tips_pct', 'smoker', aggfunc=['mean', 'min', 'max']))

             mean   min    max
smoker                        
Yes     16.319140  3.56  71.03
No      15.932318  5.68  29.20
             mean      min      max
         tips_pct tips_pct tips_pct
smoker                             
Yes     16.319140     3.56    71.03
No      15.932318     5.68    29.20


실습

In [20]:
# 1-1 팁의 비율이 요일과 점심/저녁 여부, 인원수에 어떤 영향을 받는지 살펴본다.
# 요일별로 분석 (목,금,토,일)
# 그룹바이와 피봇테이블로 만들어보기
print(tips.groupby('day')['tips_pct'].agg(['mean', 'std']))
print(tips.pivot_table('tips_pct', 'day', aggfunc=['mean', 'std']))

           mean       std
day                      
Thur  16.126452  3.864958
Fri   16.991579  4.766803
Sat   15.314598  5.129187
Sun   16.689605  8.473462
           mean       std
       tips_pct  tips_pct
day                      
Thur  16.126452  3.864958
Fri   16.991579  4.766803
Sat   15.314598  5.129187
Sun   16.689605  8.473462


In [21]:
# 1- 2시간대별 (lunch / dinner)  그룹바이와 피봇테이블로 만들어보기
print(tips.groupby('time')['tips_pct'].agg(['mean', 'std']))
print(tips.pivot_table('tips_pct', 'time', aggfunc=['mean', 'std']))

             mean       std
time                       
Lunch   16.411765  4.024006
Dinner  15.951477  6.747510
             mean       std
         tips_pct  tips_pct
time                       
Lunch   16.411765  4.024006
Dinner  15.951477  6.747510


In [23]:
# 1-3 인원수별 6개행 1명 6명 까지 팁의 비율 평균
print(tips.groupby('size')['tips_pct'].agg(['mean', 'std']))
print(tips.pivot_table('tips_pct', 'size', aggfunc=['mean', 'std']))

           mean       std
size                     
1     21.727500  8.034203
2     16.571410  6.684833
3     15.214737  4.545196
4     14.594865  4.238842
5     14.152000  6.773143
6     15.620000  4.213716
           mean       std
       tips_pct  tips_pct
size                     
1     21.727500  8.034203
2     16.571410  6.684833
3     15.214737  4.545196
4     14.594865  4.238842
5     14.152000  6.773143
6     15.620000  4.213716


In [25]:
# 2-1. 타이타닉 데이터를 불러와서 qcut 함수를 활용해서 나이그룹을 3개 만들어보세요
# 'age_group' 컬럼을 만들고 A1, A2, A3 로 데이터가 들어가도록 코드를 만드세요
df = sns.load_dataset('titanic')
df['age_group'] = pd.qcut(df.age, 3, labels=['A1', 'A2', 'A3'])
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,age_group
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False,A1
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,A3
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True,A2
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False,A3
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True,A3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True,A2
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True,A1
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False,
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True,A2


In [29]:
# 2-2 성별 선실, 나이 그룹에 의한 생존율을 데이터 프레임으로 계산(age_group 활용)
df.pivot_table('survived', ['sex', 'class'], 'age_group')

Unnamed: 0_level_0,age_group,A1,A2,A3
sex,class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
female,First,0.954545,0.947368,0.977273
female,Second,1.0,0.909091,0.857143
female,Third,0.508475,0.481481,0.25
male,First,0.5,0.5,0.347826
male,Second,0.357143,0.076923,0.0625
male,Third,0.158879,0.195652,0.055556


In [30]:
#2-3 성별 및 선실에 의한 생존율
df.pivot_table('survived', 'sex', 'class')

class,First,Second,Third
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,0.968085,0.921053,0.5
male,0.368852,0.157407,0.135447


In [31]:
df.groupby(['sex', 'class'])[['survived']].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,survived
sex,class,Unnamed: 2_level_1
female,First,0.968085
female,Second,0.921053
female,Third,0.5
male,First,0.368852
male,Second,0.157407
male,Third,0.135447


# 연습문제 - 12.pandas연습문제.pdf (공유폴더)

In [32]:
iris = sns.load_dataset('iris')
titanic = sns.load_dataset('titanic')
mpg = sns.load_dataset('mpg')

In [33]:
# 1-a
iris.groupby('species').agg(['mean', 'std'])

Unnamed: 0_level_0,sepal_length,sepal_length,sepal_width,sepal_width,petal_length,petal_length,petal_width,petal_width
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std
species,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
setosa,5.006,0.35249,3.428,0.379064,1.462,0.173664,0.246,0.105386
versicolor,5.936,0.516171,2.77,0.313798,4.26,0.469911,1.326,0.197753
virginica,6.588,0.63588,2.974,0.322497,5.552,0.551895,2.026,0.27465


In [34]:
# 1-b
s = iris[iris.species =='setosa']['sepal_width']
s.mean(), s.std()

(3.428, 0.3790643690962887)

In [35]:
q1 = s.quantile(.25)
q3 = s.quantile(.75)
iqr = q3 - q1
q1, q3, iqr

(3.2, 3.6750000000000003, 0.4750000000000001)

In [36]:
lower = q1 - 1.5 * iqr
upper = q3 + 1.5 * iqr
s[(s < lower) | (s > upper)] = np.nan
s.isnull().sum()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s[(s < lower) | (s > upper)] = np.nan


2

In [37]:
s.mean(), s.std()

(3.43125, 0.32034306743094004)

In [38]:
def get_new_stat(s):
  q1 = s.quantile(.25)
  q3 = s.quantile(.75)
  iqr = q3 - q1
  lower = q1 - 1.5 * iqr
  upper = q3 + 1.5 * iqr
  s[(s < lower) | (s > upper)] = np.nan
  outlier = s.isnull().sum() != 0
  return outlier, np.round(s.mean(), 2), np.round(s.std(), 2)

In [None]:
species_list, feature_list, mean_list, std_list = [], [], [], []
outlier_list, new_mean_list, new_std_list = [], [], []
for species in iris.species.unique():
  for feature in iris.columns[:-1]:
    s = iris[iris.species == species][feature]
    species_list.append(species)
    feature_list.append(feature)
    mean_list.append(np.round(s.mean(), 2))
    std_list.append(np.round(s.std(), 2))

    outlier, new_mean, new_std = get_new_stat(s)
    outlier_list.append(outlier)
    new_mean_list.append(new_mean)
    new_std_list.append(new_std)

In [44]:
df = pd.DataFrame({'species': species_list,
                   'feature' : feature_list,
                   'mean' : mean_list,
                   'std' : std_list,
                   'outlier': outlier_list,
                   'new_mean': new_mean_list,
                   'new_std' : new_std_list
})
df.set_index(['species', 'feature'], inplace=True)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std,outlier,new_mean,new_std
species,feature,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
setosa,sepal_length,5.01,0.35,False,5.01,0.35
setosa,sepal_width,3.43,0.38,True,3.43,0.32
setosa,petal_length,1.46,0.17,True,1.46,0.13
setosa,petal_width,0.25,0.11,True,0.23,0.09
versicolor,sepal_length,5.94,0.52,False,5.94,0.52
versicolor,sepal_width,2.77,0.31,False,2.77,0.31
versicolor,petal_length,4.26,0.47,True,4.29,0.44
versicolor,petal_width,1.33,0.2,False,1.33,0.2
virginica,sepal_length,6.59,0.64,True,6.62,0.59
virginica,sepal_width,2.97,0.32,True,2.96,0.26


In [50]:
# Titanic
# 2-a
titanic['category1'] = titanic.apply(lambda r : r.sex if r.age > 20 else 'child', axis=1)
titanic.tail()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,category1
886,0,2,male,27.0,0,0,13.0,S,Second,man,True,,Southampton,no,True,male
887,1,1,female,19.0,0,0,30.0,S,First,woman,False,B,Southampton,yes,True,child
888,0,3,female,,1,2,23.45,S,Third,woman,False,,Southampton,no,False,child
889,1,1,male,26.0,0,0,30.0,C,First,man,True,C,Cherbourg,yes,True,male
890,0,3,male,32.0,0,0,7.75,Q,Third,man,True,,Queenstown,no,True,male


In [51]:
#2-b
titanic['age'] = titanic.age.fillna(titanic.age.mean())
titanic.tail()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,category1
886,0,2,male,27.0,0,0,13.0,S,Second,man,True,,Southampton,no,True,male
887,1,1,female,19.0,0,0,30.0,S,First,woman,False,B,Southampton,yes,True,child
888,0,3,female,29.699118,1,2,23.45,S,Third,woman,False,,Southampton,no,False,child
889,1,1,male,26.0,0,0,30.0,C,First,man,True,C,Cherbourg,yes,True,male
890,0,3,male,32.0,0,0,7.75,Q,Third,man,True,,Queenstown,no,True,male


In [52]:
titanic['category1'] = titanic.apply(lambda r : r.sex if r.age > 20 else 'child', axis=1)
titanic.tail()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,category1
886,0,2,male,27.0,0,0,13.0,S,Second,man,True,,Southampton,no,True,male
887,1,1,female,19.0,0,0,30.0,S,First,woman,False,B,Southampton,yes,True,child
888,0,3,female,29.699118,1,2,23.45,S,Third,woman,False,,Southampton,no,False,female
889,1,1,male,26.0,0,0,30.0,C,First,man,True,C,Cherbourg,yes,True,male
890,0,3,male,32.0,0,0,7.75,Q,Third,man,True,,Queenstown,no,True,male


In [54]:
# 2-c
titanic.groupby('sex')[['survived']].mean()

Unnamed: 0_level_0,survived
sex,Unnamed: 1_level_1
female,0.742038
male,0.188908


In [55]:
titanic.pivot_table('survived', 'class')

Unnamed: 0_level_0,survived
class,Unnamed: 1_level_1
First,0.62963
Second,0.472826
Third,0.242363


In [56]:
titanic.pivot_table('survived', ['sex','class'])

Unnamed: 0_level_0,Unnamed: 1_level_0,survived
sex,class,Unnamed: 2_level_1
female,First,0.968085
female,Second,0.921053
female,Third,0.5
male,First,0.368852
male,Second,0.157407
male,Third,0.135447


In [57]:
titanic.pivot_table('survived', ['sex','class'], 'embark_town')

Unnamed: 0_level_0,embark_town,Cherbourg,Queenstown,Southampton
sex,class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
female,First,0.976744,1.0,0.958333
female,Second,1.0,1.0,0.910448
female,Third,0.652174,0.727273,0.375
male,First,0.404762,0.0,0.35443
male,Second,0.2,0.0,0.154639
male,Third,0.232558,0.076923,0.128302


In [None]:
# 2-d
bins = [1, 20, 30, 50, 70, 100]
labels = ["미성년자", "청년", "중년", "장년", "노년"]
titanic['age_cut'] = pd.cut(titanic.age, bins=bins, labels=labels)
titanic.tail()

In [60]:
titanic.pivot_table('survived', 'age_cut')

Unnamed: 0_level_0,survived
age_cut,Unnamed: 1_level_1
미성년자,0.424242
청년,0.334152
중년,0.423237
장년,0.355932
노년,0.2


In [61]:
titanic.pivot_table('survived', ['sex', 'age_cut'])

Unnamed: 0_level_0,Unnamed: 1_level_0,survived
sex,age_cut,Unnamed: 2_level_1
female,미성년자,0.671233
female,청년,0.723881
female,중년,0.77907
female,장년,0.941176
male,미성년자,0.228261
male,청년,0.142857
male,중년,0.225806
male,장년,0.119048
male,노년,0.2


In [62]:
# 2-d
titanic['age_group'] = pd.qcut(titanic.age, 3, labels=["A1", "A2", "A3"])
titanic.tail()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,category1,age_cut,age_group
886,0,2,male,27.0,0,0,13.0,S,Second,man,True,,Southampton,no,True,male,청년,A2
887,1,1,female,19.0,0,0,30.0,S,First,woman,False,B,Southampton,yes,True,child,미성년자,A1
888,0,3,female,29.699118,1,2,23.45,S,Third,woman,False,,Southampton,no,False,female,청년,A2
889,1,1,male,26.0,0,0,30.0,C,First,man,True,C,Cherbourg,yes,True,male,청년,A2
890,0,3,male,32.0,0,0,7.75,Q,Third,man,True,,Queenstown,no,True,male,중년,A3


In [63]:
titanic.groupby('age_group')[['survived']].mean()

Unnamed: 0_level_0,survived
age_group,Unnamed: 1_level_1
A1,0.41196
A2,0.335526
A3,0.405594


In [64]:
# gender = 남성이면 1, 여성이면 0
titanic['gender'] = titanic.apply(lambda r : 1 if r.sex == 'male' else 0, axis =1)
titanic.tail()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,category1,age_cut,age_group,gender
886,0,2,male,27.0,0,0,13.0,S,Second,man,True,,Southampton,no,True,male,청년,A2,1
887,1,1,female,19.0,0,0,30.0,S,First,woman,False,B,Southampton,yes,True,child,미성년자,A1,0
888,0,3,female,29.699118,1,2,23.45,S,Third,woman,False,,Southampton,no,False,female,청년,A2,0
889,1,1,male,26.0,0,0,30.0,C,First,man,True,C,Cherbourg,yes,True,male,청년,A2,1
890,0,3,male,32.0,0,0,7.75,Q,Third,man,True,,Queenstown,no,True,male,중년,A3,1


In [66]:
# 3-a
mpg['hp_per_cc'] = mpg.horsepower / mpg.displacement
mpg.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name,hp_per_cc
0,18.0,8,307.0,130.0,3504,12.0,70,usa,chevrolet chevelle malibu,0.423453
1,15.0,8,350.0,165.0,3693,11.5,70,usa,buick skylark 320,0.471429
2,18.0,8,318.0,150.0,3436,11.0,70,usa,plymouth satellite,0.471698
3,16.0,8,304.0,150.0,3433,12.0,70,usa,amc rebel sst,0.493421
4,17.0,8,302.0,140.0,3449,10.5,70,usa,ford torino,0.463576


In [67]:
# 3-b
mpg['제조사'] = mpg.name.apply(lambda x : x.split()[0])
mpg.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name,hp_per_cc,제조사
0,18.0,8,307.0,130.0,3504,12.0,70,usa,chevrolet chevelle malibu,0.423453,chevrolet
1,15.0,8,350.0,165.0,3693,11.5,70,usa,buick skylark 320,0.471429,buick
2,18.0,8,318.0,150.0,3436,11.0,70,usa,plymouth satellite,0.471698,plymouth
3,16.0,8,304.0,150.0,3433,12.0,70,usa,amc rebel sst,0.493421,amc
4,17.0,8,302.0,140.0,3449,10.5,70,usa,ford torino,0.463576,ford


In [78]:
# 참고 : 리스트원소를 하나로 합치는 방법
a = "abcd"
",".join(a)
# 출력물 : a,b,c,d

'a,b,c,d'

In [79]:
mpg['model'] = mpg.name.apply(lambda x : " ".join(x.split()[1:]))
mpg.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name,hp_per_cc,제조사,model
0,18.0,8,307.0,130.0,3504,12.0,70,usa,chevrolet chevelle malibu,0.423453,chevrolet,chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,usa,buick skylark 320,0.471429,buick,skylark 320
2,18.0,8,318.0,150.0,3436,11.0,70,usa,plymouth satellite,0.471698,plymouth,satellite
3,16.0,8,304.0,150.0,3433,12.0,70,usa,amc rebel sst,0.493421,amc,rebel sst
4,17.0,8,302.0,140.0,3449,10.5,70,usa,ford torino,0.463576,ford,torino


In [84]:
mpg.drop('name', axis=1)
# del mpg['name']

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,hp_per_cc,제조사,model
0,18.0,8,307.0,130.0,3504,12.0,70,usa,0.423453,chevrolet,chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,usa,0.471429,buick,skylark 320
2,18.0,8,318.0,150.0,3436,11.0,70,usa,0.471698,plymouth,satellite
3,16.0,8,304.0,150.0,3433,12.0,70,usa,0.493421,amc,rebel sst
4,17.0,8,302.0,140.0,3449,10.5,70,usa,0.463576,ford,torino
...,...,...,...,...,...,...,...,...,...,...,...
393,27.0,4,140.0,86.0,2790,15.6,82,usa,0.614286,ford,mustang gl
394,44.0,4,97.0,52.0,2130,24.6,82,europe,0.536082,vw,pickup
395,32.0,4,135.0,84.0,2295,11.6,82,usa,0.622222,dodge,rampage
396,28.0,4,120.0,79.0,2625,18.6,82,usa,0.658333,ford,ranger


In [None]:
# 3-c
mpg.groupby('cylinders')[['mpg']].mean()

In [None]:
mpg[['mpg','cylinders' ]].groupby('cylinders').mean()

In [None]:
mpg.pivot_table('mpg', 'cylinders')

In [91]:
#3-d
mpg.pivot_table('hp_per_cc', 'origin')

Unnamed: 0_level_0,hp_per_cc
origin,Unnamed: 1_level_1
europe,0.740005
japan,0.789485
usa,0.512063


In [93]:
# 3-e
mpg.제조사.value_counts()

ford             51
chevrolet        43
plymouth         31
amc              28
dodge            28
toyota           25
datsun           23
buick            17
pontiac          16
volkswagen       15
honda            13
mercury          11
mazda            10
oldsmobile       10
fiat              8
peugeot           8
audi              7
chrysler          6
vw                6
volvo             6
renault           5
saab              4
subaru            4
opel              4
chevy             3
bmw               2
cadillac          2
maxda             2
mercedes-benz     2
triumph           1
vokswagen         1
mercedes          1
hi                1
capri             1
chevroelt         1
toyouta           1
nissan            1
Name: 제조사, dtype: int64

In [95]:
s = mpg.제조사.value_counts()
big = s[s >= 5].index.tolist()
print(big)

['ford', 'chevrolet', 'plymouth', 'amc', 'dodge', 'toyota', 'datsun', 'buick', 'pontiac', 'volkswagen', 'honda', 'mercury', 'mazda', 'oldsmobile', 'fiat', 'peugeot', 'audi', 'chrysler', 'vw', 'volvo', 'renault']


In [None]:
big_mpg = mpg[mpg.제조사.isin(big)]
print(big_mpg)

In [None]:
big_mpg.pivot_table('mpg', '제조사')

In [100]:
big_mpg.pivot_table('mpg', '제조사').sort_values(by='mpg', ascending=False).head()

Unnamed: 0_level_0,mpg
제조사,Unnamed: 1_level_1
vw,39.016667
honda,33.761538
renault,32.88
datsun,31.113043
mazda,30.86


In [None]:
mpg

In [103]:
mpg.제조사.unique()

array(['chevrolet', 'buick', 'plymouth', 'amc', 'ford', 'pontiac',
       'dodge', 'toyota', 'datsun', 'volkswagen', 'peugeot', 'audi',
       'saab', 'bmw', 'chevy', 'hi', 'mercury', 'opel', 'fiat',
       'oldsmobile', 'chrysler', 'mazda', 'volvo', 'renault', 'toyouta',
       'maxda', 'honda', 'subaru', 'chevroelt', 'capri', 'vw',
       'mercedes-benz', 'cadillac', 'mercedes', 'vokswagen', 'triumph',
       'nissan'], dtype=object)