In [1]:
import pandas as pd
import numpy as np

# 제곱 함수와 n 제곱 함수 만들기

In [9]:
def sq(x):
    return x ** 2

def n_sq(x, n):
    return x ** n

# 시리즈와 apply 메서드

In [12]:
df = pd.DataFrame({
    'a' : [10, 20, 30],
    'b' : [20, 30, 40]
})

In [13]:
print(df.a.apply(sq))
print(df.a.apply(n_sq, n=5))

0    100
1    400
2    900
Name: a, dtype: int64
0      100000
1     3200000
2    24300000
Name: a, dtype: int64


# 데이터 프레임과 apply 메서드

In [14]:
print(df.apply(sq))
print(df.apply(n_sq, n=5))

a     b
0  100   400
1  400   900
2  900  1600
          a          b
0    100000    3200000
1   3200000   24300000
2  24300000  102400000


# 데이터프레임의 누락값을 처리한 다음 apply 메서드 사용하기 - 열 방향

In [15]:
import seaborn as sns

In [16]:
# 타이타닉 데이터셋 불러오기
titanic = sns.load_dataset('titanic')
print(titanic.head())

survived  pclass     sex   age  sibsp  parch     fare embarked  class  \
0         0       3    male  22.0      1      0   7.2500        S  Third   
1         1       1  female  38.0      1      0  71.2833        C  First   
2         1       3  female  26.0      0      0   7.9250        S  Third   
3         1       1  female  35.0      1      0  53.1000        S  First   
4         0       3    male  35.0      0      0   8.0500        S  Third   

     who  adult_male deck  embark_town alive  alone  
0    man        True  NaN  Southampton    no  False  
1  woman       False    C    Cherbourg   yes  False  
2  woman       False  NaN  Southampton   yes   True  
3  woman       False    C  Southampton   yes  False  
4    man        True  NaN  Southampton    no   True  


In [17]:
# 누락값 갯수 구하기
def count_null(vec):
    return np.sum(pd.isnull(vec))

nul_col = titanic.apply(count_null)
print(nul_col)

survived         0
pclass           0
sex              0
age            177
sibsp            0
parch            0
fare             0
embarked         2
class            0
who              0
adult_male       0
deck           688
embark_town      2
alive            0
alone            0
dtype: int64


In [18]:
# 누락값의 비율 구하기
def get_perOfnul(vec):
    count = count_null(vec)
    return count / vec.size

nulPer_col = titanic.apply(get_perOfnul)
print(nulPer_col)

survived       0.000000
pclass         0.000000
sex            0.000000
age            0.198653
sibsp          0.000000
parch          0.000000
fare           0.000000
embarked       0.002245
class          0.000000
who            0.000000
adult_male     0.000000
deck           0.772166
embark_town    0.002245
alive          0.000000
alone          0.000000
dtype: float64


In [24]:
# 누락값이 아닌 비율
def get_perOfnotnul(vec):
    return 1 - get_perOfnul(vec)

notnulPer_col = titanic.apply(get_perOfnotnul)
print(notnulPer_col)

survived       1.000000
pclass         1.000000
sex            1.000000
age            0.801347
sibsp          1.000000
parch          1.000000
fare           1.000000
embarked       0.997755
class          1.000000
who            1.000000
adult_male     1.000000
deck           0.227834
embark_town    0.997755
alive          1.000000
alone          1.000000
dtype: float64


# 데이터프레임의 누락값을 처리한 다음 apply 메서드 사용하기 - 행 방뱡

In [21]:
# 누락값 갯수 구하기
nul_row = titanic.apply(count_null, axis=1)
print(nul_row.head())

0    1
1    0
2    1
3    0
4    1
dtype: int64


In [23]:
# 누락값의 비율 구하기
nulPer_row = titanic.apply(get_perOfnul, axis=1)
print(nulPer_row.head())

0    0.066667
1    0.000000
2    0.066667
3    0.000000
4    0.066667
dtype: float64


In [25]:
# 누락값이 아닌 비율 구하기
notnulPer_row = titanic.apply(get_perOfnotnul, axis=1)
print(notnulPer_row.head())

0    0.933333
1    1.000000
2    0.933333
3    1.000000
4    0.933333
dtype: float64
