### [참고] <a href="https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf">Pandas Cheat Sheet</a>

In [1]:
import pandas as pd
import numpy as np

In [3]:
df = pd.DataFrame(
    {
        "영어" : [60,70],
        "수학":[100,50]
    },index=["Hong","Kim"]
)
df

Unnamed: 0,영어,수학
Hong,60,100
Kim,70,50


In [6]:
def func1(data):
    print(data.values)

In [8]:
df.values

array([[ 60, 100],
       [ 70,  50]], dtype=int64)

In [7]:
# apply() : 데이터프레임에 함수 적용
#           함수는 어느 방향으로 적용시킬 것인가?

df.apply(func1)

[60 70]
[100  50]


영어    None
수학    None
dtype: object

<img src='https://www.w3resource.com/w3r_images/pandas-dataframe-apply-1.png' width="300" height="150">

In [9]:
df.apply(func1,axis=1)

[ 60 100]
[70 50]


Hong    None
Kim     None
dtype: object

<img src="https://www.w3resource.com/w3r_images/pandas-dataframe-apply-2.png" width="300" height="200">

In [11]:
# 영어 점수를 80 으로 수정

def func2(data):
    data["영어"] = 80
    return data

In [13]:
df_func2 = df.apply(func2,axis=1)
df_func2

Unnamed: 0,영어,수학
Hong,80,100
Kim,80,50


<img src="https://www.w3resource.com/w3r_images/pandas-dataframe-apply-3.png">

In [14]:
df2 = pd.DataFrame([[9, 25]] * 3, columns=["P","Q"])
df2

Unnamed: 0,P,Q
0,9,25
1,9,25
2,9,25


In [15]:
df2.apply(np.sqrt)

Unnamed: 0,P,Q
0,3.0,5.0
1,3.0,5.0
2,3.0,5.0


In [16]:
df2.apply(np.sum)

P    27
Q    75
dtype: int64

In [17]:
df2.apply(np.sqrt,axis=1)

Unnamed: 0,P,Q
0,3.0,5.0
1,3.0,5.0
2,3.0,5.0


In [18]:
df2.apply(np.sum,axis=1)

0    34
1    34
2    34
dtype: int64

In [19]:
date_list = {"yyyy-mm-dd":["2005-09-28","2007-10-05","2012-12-20"]}
df = pd.DataFrame(date_list)
df

Unnamed: 0,yyyy-mm-dd
0,2005-09-28
1,2007-10-05
2,2012-12-20


In [22]:
# 년도만 잘라서 리턴

# "2005-07-20".split("-")[0]

def extract_year(col):
    return col.split("-")[0]

In [26]:
df["yyyy-mm-dd"].apply(extract_year)

0    2005
1    2007
2    2012
Name: yyyy-mm-dd, dtype: object

In [27]:
df["year"] = df["yyyy-mm-dd"].apply(extract_year)

In [28]:
df

Unnamed: 0,yyyy-mm-dd,year
0,2005-09-28,2005
1,2007-10-05,2007
2,2012-12-20,2012


In [31]:
# 나이 컬럼 추가

def get_age(year, cur_year):
    return cur_year - int(year)

In [32]:
df['age'] = df['year'].apply(get_age,cur_year=2023)
df

Unnamed: 0,yyyy-mm-dd,year,age
0,2005-09-28,2005,18
1,2007-10-05,2007,16
2,2012-12-20,2012,11


#### lambda 를 적용한 apply()

In [2]:
import seaborn as sns

In [3]:
df = sns.load_dataset("iris")
df.head(1)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa


In [5]:
# 0번 index 행 리턴

def get_row(df):
    return df[0]

In [6]:
df.apply(get_row)

sepal_length       5.1
sepal_width        3.5
petal_length       1.4
petal_width        0.2
species         setosa
dtype: object

In [7]:
df.apply(lambda x:x[0])

sepal_length       5.1
sepal_width        3.5
petal_length       1.4
petal_width        0.2
species         setosa
dtype: object

In [8]:
# species 컬럼의 첫번쨰 글자만 가져오기
df["species"].apply(lambda x:x[0])

0      s
1      s
2      s
3      s
4      s
      ..
145    v
146    v
147    v
148    v
149    v
Name: species, Length: 150, dtype: object

In [10]:
# df 새로운컬럼 생성 후 species 값 중 3자리를 잘라서 채우기
df["species_3"] = df["species"].apply(lambda x:x[:3])
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_3
0,5.1,3.5,1.4,0.2,setosa,set
1,4.9,3.0,1.4,0.2,setosa,set
2,4.7,3.2,1.3,0.2,setosa,set
3,4.6,3.1,1.5,0.2,setosa,set
4,5.0,3.6,1.4,0.2,setosa,set
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica,vir
146,6.3,2.5,5.0,1.9,virginica,vir
147,6.5,3.0,5.2,2.0,virginica,vir
148,6.2,3.4,5.4,2.3,virginica,vir


#### map(): apply() 개념 동일하나, 시리즈에만 적용함

In [12]:
# df.map(get_row) 에러남
df["species"].map(get_row)

0      s
1      s
2      s
3      s
4      s
      ..
145    v
146    v
147    v
148    v
149    v
Name: species, Length: 150, dtype: object

In [13]:
s = pd.Series(["cat","dog",np.nan,"rabbit"])
s

0       cat
1       dog
2       NaN
3    rabbit
dtype: object

In [14]:
# cat : kitten, dog : puppy 변경하기
s.map({"cat":"kitten","dog":"puppy"})

0    kitten
1     puppy
2       NaN
3       NaN
dtype: object

#### applymap() : 데이터프레임에만 적용, 시리즈 불가

In [16]:
df = pd.DataFrame(
    [
        [1,2.12],
        [3.345,4.567]
    ]
)
df

Unnamed: 0,0,1
0,1.0,2.12
1,3.345,4.567


In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       2 non-null      float64
 1   1       2 non-null      float64
dtypes: float64(2)
memory usage: 164.0 bytes


In [20]:
df.applymap(lambda x:str(x))

Unnamed: 0,0,1
0,1.0,2.12
1,3.345,4.567


In [21]:
df.applymap(lambda x:len(str(x)))

Unnamed: 0,0,1
0,3,4
1,5,5
