# Pandas Practice

In [1]:
import pandas as pd
import numpy as np

In [2]:
arr = np.arange(100, 105)
arr

array([100, 101, 102, 103, 104])

In [3]:
s = pd.Series(arr)
s

0    100
1    101
2    102
3    103
4    104
dtype: int32

In [4]:
s = pd.Series(arr, dtype='int64')
s

0    100
1    101
2    102
3    103
4    104
dtype: int64

In [5]:
s = pd.Series(['부장', '차장', '대리', '사원', '인턴'])
s

0    부장
1    차장
2    대리
3    사원
4    인턴
dtype: object

In [6]:
f = ['apple', np.nan, 'banana', 'kiwi', 'gubong']
s2 = pd.Series(f, index = list('가나다라마'))
s2

가     apple
나       NaN
다    banana
라      kiwi
마    gubong
dtype: object

In [7]:
s = pd.Series(['손흥민', '김연아', '박세리', '박찬호', '김연경'], index = ['a', 'b', 'c', 'd', 'e'])
s

a    손흥민
b    김연아
c    박세리
d    박찬호
e    김연경
dtype: object

In [8]:
s[['a', 'c']]

a    손흥민
c    박세리
dtype: object

In [9]:
s.isnull()

a    False
b    False
c    False
d    False
e    False
dtype: bool

In [10]:
s.isna()

a    False
b    False
c    False
d    False
e    False
dtype: bool

In [11]:
data = {
    'name': ['Kim', 'Lee', 'Park'], 
    'age': [24, 27, 34], 
    'children': [2, 1, 3]
}

df = pd.DataFrame(data)
df

Unnamed: 0,name,age,children
0,Kim,24,2
1,Lee,27,1
2,Park,34,3


In [12]:
df[['name', 'children']]

Unnamed: 0,name,children
0,Kim,2
1,Lee,1
2,Park,3


In [13]:
df.rename(columns = {'name':'이름'})

Unnamed: 0,이름,age,children
0,Kim,24,2
1,Lee,27,1
2,Park,34,3


In [14]:
df.rename(columns = {'children':'자녀 수'}, inplace=True)
df

Unnamed: 0,name,age,자녀 수
0,Kim,24,2
1,Lee,27,1
2,Park,34,3


In [15]:
food_dict = {
    'food': ['KFC', 'McDonald', 'SchoolFood'], 
    'price': [1000, 2000, 2500], 
    'rating': [4.5, 3.9, 4.2]
}
df = pd.DataFrame(food_dict)
df

Unnamed: 0,food,price,rating
0,KFC,1000,4.5
1,McDonald,2000,3.9
2,SchoolFood,2500,4.2


In [16]:
df[['food', 'rating']]

Unnamed: 0,food,rating
0,KFC,4.5
1,McDonald,3.9
2,SchoolFood,4.2


In [17]:
df.rename(columns= {'food':'place'}, inplace=True)

In [18]:
df

Unnamed: 0,place,price,rating
0,KFC,1000,4.5
1,McDonald,2000,3.9
2,SchoolFood,2500,4.2


In [19]:
import seaborn as sns

In [20]:
df = sns.load_dataset("titanic")
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [21]:
df['who'].value_counts()

who
man      537
woman    271
child     83
Name: count, dtype: int64

In [22]:
df['embark_town'].value_counts()

embark_town
Southampton    644
Cherbourg      168
Queenstown      77
Name: count, dtype: int64

In [23]:
df.columns

Index(['survived', 'pclass', 'sex', 'age', 'sibsp', 'parch', 'fare',
       'embarked', 'class', 'who', 'adult_male', 'deck', 'embark_town',
       'alive', 'alone'],
      dtype='object')

In [24]:
df['pclass'].astype('int32').head()

0    3
1    1
2    3
3    1
4    3
Name: pclass, dtype: int32

In [25]:
df['pclass'].astype('float32').head()

0    3.0
1    1.0
2    3.0
3    1.0
4    3.0
Name: pclass, dtype: float32

In [26]:
df.sort_index().head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [27]:
df.sort_index(ascending=False).head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
890,0,3,male,32.0,0,0,7.75,Q,Third,man,True,,Queenstown,no,True
889,1,1,male,26.0,0,0,30.0,C,First,man,True,C,Cherbourg,yes,True
888,0,3,female,,1,2,23.45,S,Third,woman,False,,Southampton,no,False
887,1,1,female,19.0,0,0,30.0,S,First,woman,False,B,Southampton,yes,True
886,0,2,male,27.0,0,0,13.0,S,Second,man,True,,Southampton,no,True


In [28]:
df.sort_values(by='age').head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
803,1,3,male,0.42,0,1,8.5167,C,Third,child,False,,Cherbourg,yes,False
755,1,2,male,0.67,1,1,14.5,S,Second,child,False,,Southampton,yes,False
644,1,3,female,0.75,2,1,19.2583,C,Third,child,False,,Cherbourg,yes,False
469,1,3,female,0.75,2,1,19.2583,C,Third,child,False,,Cherbourg,yes,False
78,1,2,male,0.83,0,2,29.0,S,Second,child,False,,Southampton,yes,False


In [29]:
df.sort_values(by='age', ascending=False).head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
630,1,1,male,80.0,0,0,30.0,S,First,man,True,A,Southampton,yes,True
851,0,3,male,74.0,0,0,7.775,S,Third,man,True,,Southampton,no,True
493,0,1,male,71.0,0,0,49.5042,C,First,man,True,,Cherbourg,no,True
96,0,1,male,71.0,0,0,34.6542,C,First,man,True,A,Cherbourg,no,True
116,0,3,male,70.5,0,0,7.75,Q,Third,man,True,,Queenstown,no,True


In [30]:
df.sort_values(by= ['fare', 'age']).head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
302,0,3,male,19.0,0,0,0.0,S,Third,man,True,,Southampton,no,True
271,1,3,male,25.0,0,0,0.0,S,Third,man,True,,Southampton,yes,True
179,0,3,male,36.0,0,0,0.0,S,Third,man,True,,Southampton,no,True
822,0,1,male,38.0,0,0,0.0,S,First,man,True,,Southampton,no,True
806,0,1,male,39.0,0,0,0.0,S,First,man,True,A,Southampton,no,True


In [31]:
df.sort_values(by= ['fare', 'age'], ascending=[False, True]).head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
258,1,1,female,35.0,0,0,512.3292,C,First,woman,False,,Cherbourg,yes,True
737,1,1,male,35.0,0,0,512.3292,C,First,man,True,B,Cherbourg,yes,True
679,1,1,male,36.0,0,1,512.3292,C,First,man,True,B,Cherbourg,yes,False
27,0,1,male,19.0,3,2,263.0,S,First,man,True,C,Southampton,no,False
88,1,1,female,23.0,3,2,263.0,S,First,woman,False,C,Southampton,yes,False


In [32]:
tips = sns.load_dataset('tips')
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [33]:
tips.sort_values(by=['total_bill', 'tip'], ascending=False).head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
170,50.81,10.0,Male,Yes,Sat,Dinner,3
212,48.33,9.0,Male,No,Sat,Dinner,4
59,48.27,6.73,Male,No,Sat,Dinner,4
156,48.17,5.0,Male,No,Sun,Dinner,6
182,45.35,3.5,Male,Yes,Sun,Dinner,3


In [34]:
tips.sort_values(by=['size', 'tip'], ascending=[False, True]).head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
125,29.8,4.2,Female,No,Thur,Lunch,6
143,27.05,5.0,Female,No,Thur,Lunch,6
156,48.17,5.0,Male,No,Sun,Dinner,6
141,34.3,6.7,Male,No,Thur,Lunch,6
187,30.46,2.0,Male,Yes,Sun,Dinner,5


In [35]:
df.loc[4, 'class']

'Third'

In [36]:
df.loc[2:5, ['age', 'fare', 'who']]

Unnamed: 0,age,fare,who
2,26.0,7.925,woman
3,35.0,53.1,woman
4,35.0,8.05,man
5,,8.4583,man


In [37]:
df.loc[:6, 'class':'deck']

Unnamed: 0,class,who,adult_male,deck
0,Third,man,True,
1,First,woman,False,C
2,Third,woman,False,
3,First,woman,False,C
4,Third,man,True,
5,Third,man,True,
6,First,man,True,E


In [38]:
condition = df['who'] == 'man'
condition

0       True
1      False
2      False
3      False
4       True
       ...  
886     True
887    False
888    False
889     True
890     True
Name: who, Length: 891, dtype: bool

In [40]:
df[condition].head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True
5,0,3,male,,0,0,8.4583,Q,Third,man,True,,Queenstown,no,True
6,0,1,male,54.0,0,0,51.8625,S,First,man,True,E,Southampton,no,True
12,0,3,male,20.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [41]:
df.loc[condition].head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True
5,0,3,male,,0,0,8.4583,Q,Third,man,True,,Queenstown,no,True
6,0,1,male,54.0,0,0,51.8625,S,First,man,True,E,Southampton,no,True
12,0,3,male,20.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [42]:
condition1 = (df['fare']>30)

condition2 = (df['who']=='woman')

In [43]:
df.loc[condition1 & condition2]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
25,1,3,female,38.0,1,5,31.3875,S,Third,woman,False,,Southampton,yes,False
31,1,1,female,,1,0,146.5208,C,First,woman,False,B,Cherbourg,yes,False
52,1,1,female,49.0,1,0,76.7292,C,First,woman,False,D,Cherbourg,yes,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
853,1,1,female,16.0,0,1,39.4000,S,First,woman,False,D,Southampton,yes,False
856,1,1,female,45.0,1,1,164.8667,S,First,woman,False,,Southampton,yes,False
863,0,3,female,,8,2,69.5500,S,Third,woman,False,,Southampton,no,False
871,1,1,female,47.0,1,1,52.5542,S,First,woman,False,D,Southampton,yes,False


In [44]:
df.loc[condition1 | condition2]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
6,0,1,male,54.0,0,0,51.8625,S,First,man,True,E,Southampton,no,True
8,1,3,female,27.0,0,2,11.1333,S,Third,woman,False,,Southampton,yes,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
880,1,2,female,25.0,0,1,26.0000,S,Second,woman,False,,Southampton,yes,False
882,0,3,female,22.0,0,0,10.5167,S,Third,woman,False,,Southampton,no,True
885,0,3,female,39.0,0,5,29.1250,Q,Third,woman,False,,Queenstown,no,False
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True


In [45]:
df = sns.load_dataset("titanic")
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [47]:
condition1 = (df['age'] >= 30)
condition2 = (df['who'] == 'man')

df.loc[condition1 & condition2].sort_values(by='fare', ascending=False).head(10)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
679,1,1,male,36.0,0,1,512.3292,C,First,man,True,B,Cherbourg,yes,False
737,1,1,male,35.0,0,0,512.3292,C,First,man,True,B,Cherbourg,yes,True
438,0,1,male,64.0,1,4,263.0,S,First,man,True,C,Southampton,no,False
332,0,1,male,38.0,0,1,153.4625,S,First,man,True,C,Southampton,no,False
660,1,1,male,50.0,2,0,133.65,S,First,man,True,,Southampton,yes,False
390,1,1,male,36.0,1,2,120.0,S,First,man,True,B,Southampton,yes,False
659,0,1,male,58.0,0,2,113.275,C,First,man,True,D,Cherbourg,no,False
698,0,1,male,49.0,1,1,110.8833,C,First,man,True,C,Cherbourg,no,False
544,0,1,male,50.0,1,0,106.425,C,First,man,True,C,Cherbourg,no,False
224,1,1,male,38.0,1,0,90.0,S,First,man,True,C,Southampton,yes,False


In [52]:
condition1 = (df['age'] >= 20) & (df['age'] <40) 
condition2 = (df['pclass'] != 3)

df.loc[condition1 & condition2][['survived', 'pclass', 'age', 'fare']].head()

Unnamed: 0,survived,pclass,age,fare
1,1,1,38.0,71.2833
3,1,1,35.0,53.1
20,0,2,35.0,26.0
21,1,2,34.0,13.0
23,1,1,28.0,35.5


In [54]:
condition1 = (df['age'] >= 20) & (df['age'] <40)
condition2 = (df['pclass'] != 3)

df.loc[condition1 & condition2][['survived', 'pclass', 'age', 'fare']].head(10)

Unnamed: 0,survived,pclass,age,fare
1,1,1,38.0,71.2833
3,1,1,35.0,53.1
20,0,2,35.0,26.0
21,1,2,34.0,13.0
23,1,1,28.0,35.5
34,0,1,28.0,82.1708
41,0,2,27.0,21.0
53,1,2,29.0,26.0
56,1,2,21.0,10.5
61,1,1,38.0,80.0


In [56]:
condition1 = (df['pclass'] != 3)
condition2 = df['age'].isnull()
age_mean = df['age'].mean()

df.loc[condition1 & condition2, 'age'] = age_mean

In [57]:
df['age'].isnull().sum()

136

In [58]:
df.iloc[1, 3]

38.0

In [59]:
df.iloc[[0, 3, 4], [0, 1, 5, 6]]

Unnamed: 0,survived,pclass,parch,fare
0,0,3,0,7.25
3,1,1,0,53.1
4,0,3,0,8.05


In [60]:
c1 = df['fare'] < 20
df['fare'].where(df['fare'] < 20, 0).tail(20)

871     0.0000
872     5.0000
873     9.0000
874     0.0000
875     7.2250
876     9.8458
877     7.8958
878     7.8958
879     0.0000
880     0.0000
881     7.8958
882    10.5167
883    10.5000
884     7.0500
885     0.0000
886    13.0000
887     0.0000
888     0.0000
889     0.0000
890     7.7500
Name: fare, dtype: float64

In [61]:
df.isnull().count()

survived       891
pclass         891
sex            891
age            891
sibsp          891
parch          891
fare           891
embarked       891
class          891
who            891
adult_male     891
deck           891
embark_town    891
alive          891
alone          891
dtype: int64

In [62]:
c1 = df['age'].isnull()

df.loc[c1]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
5,0,3,male,,0,0,8.4583,Q,Third,man,True,,Queenstown,no,True
19,1,3,female,,0,0,7.2250,C,Third,woman,False,,Cherbourg,yes,True
26,0,3,male,,0,0,7.2250,C,Third,man,True,,Cherbourg,no,True
28,1,3,female,,0,0,7.8792,Q,Third,woman,False,,Queenstown,yes,True
29,0,3,male,,0,0,7.8958,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
859,0,3,male,,0,0,7.2292,C,Third,man,True,,Cherbourg,no,True
863,0,3,female,,8,2,69.5500,S,Third,woman,False,,Southampton,no,False
868,0,3,male,,0,0,9.5000,S,Third,man,True,,Southampton,no,True
878,0,3,male,,0,0,7.8958,S,Third,man,True,,Southampton,no,True


In [63]:
df1 = df.copy()
df1

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [64]:
df1.drop('class', axis=1).head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,man,True,,Southampton,no,True


In [65]:
df1.drop(['who', 'deck'], axis=1)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,adult_male,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,True,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,False,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,False,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,False,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,True,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,True,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,False,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,False,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,True,Cherbourg,yes,True


In [66]:
df2 = pd.read_csv('./data/seoul_bicycle.csv')
df2.head()

Unnamed: 0,대여일자,대여소번호,대여소명,대여구분코드,성별,연령대코드,이용건수,운동량,탄소량,이동거리,이용시간
0,Jan-20-2020,3,중랑센터,일일(회원),M,AGE_003,3,61.82,0.52,2230.0,75
1,Jan-20-2020,3,중랑센터,일일(회원),M,AGE_004,1,39.62,0.28,1220.0,15
2,Jan-20-2020,3,중랑센터,정기,M,AGE_005,3,430.85,4.01,17270.0,53
3,Jan-20-2020,5,상암센터 정비실,일일(회원),\N,AGE_005,2,1.79,0.02,90.0,33
4,Jan-20-2020,5,상암센터 정비실,정기,F,AGE_003,1,4501.96,45.47,196010.0,64


In [67]:
pd.to_datetime(df2['대여일자'])

0        2020-01-20
1        2020-01-20
2        2020-01-20
3        2020-01-20
4        2020-01-20
            ...    
327226   2020-05-20
327227   2020-05-20
327228   2020-05-20
327229   2020-05-20
327230   2020-05-20
Name: 대여일자, Length: 327231, dtype: datetime64[ns]

In [68]:
df2['대여일자'] = pd.to_datetime(df2['대여일자'])

In [69]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 327231 entries, 0 to 327230
Data columns (total 11 columns):
 #   Column  Non-Null Count   Dtype         
---  ------  --------------   -----         
 0   대여일자    327231 non-null  datetime64[ns]
 1   대여소번호   327231 non-null  int64         
 2   대여소명    327231 non-null  object        
 3   대여구분코드  327231 non-null  object        
 4   성별      272841 non-null  object        
 5   연령대코드   327231 non-null  object        
 6   이용건수    327231 non-null  int64         
 7   운동량     327231 non-null  object        
 8   탄소량     327231 non-null  object        
 9   이동거리    327231 non-null  float64       
 10  이용시간    327231 non-null  int64         
dtypes: datetime64[ns](1), float64(1), int64(3), object(6)
memory usage: 27.5+ MB


In [70]:
df2['대여일자'].dt.year

0         2020
1         2020
2         2020
3         2020
4         2020
          ... 
327226    2020
327227    2020
327228    2020
327229    2020
327230    2020
Name: 대여일자, Length: 327231, dtype: int32

In [71]:
df.groupby('sex')['survived'].mean()

sex
female    0.742038
male      0.188908
Name: survived, dtype: float64

In [72]:
df.groupby(['sex', 'pclass'])['survived'].mean()

sex     pclass
female  1         0.968085
        2         0.921053
        3         0.500000
male    1         0.368852
        2         0.157407
        3         0.135447
Name: survived, dtype: float64

In [73]:
df.groupby(['sex', 'pclass'])[['survived', 'age']].agg(['mean', 'sum'])

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,survived,age,age
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,sum,mean,sum
sex,pclass,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
female,1,0.968085,91,34.141405,3209.292059
female,2,0.921053,70,28.748661,2184.898235
female,3,0.5,72,21.75,2218.5
male,1,0.368852,45,39.287717,4793.101471
male,2,0.157407,17,30.653908,3310.622059
male,3,0.135447,47,26.507589,6706.42


In [74]:
df1 = df.copy()

In [75]:
df1['age'] = df1.groupby('sex', group_keys=False)['age'].apply(lambda x: x.fillna(x.mean()))
df1['age']

0      22.000000
1      38.000000
2      26.000000
3      35.000000
4      35.000000
         ...    
886    27.000000
887    19.000000
888    27.987832
889    26.000000
890    32.000000
Name: age, Length: 891, dtype: float64