# 연산

In [2]:
import pandas as pd
import numpy as np

st1 = pd.Series({'국어':100, '영어':80, '수학':90})
st2 = pd.Series({'수학':80, '국어':90, '영어':80})
print(st1,'\n')
print(st2)

국어    100
영어     80
수학     90
dtype: int64 

수학    80
국어    90
영어    80
dtype: int64


In [3]:
# 두 학생의 과목별 점수로 사칙연산 수행
add = st1 + st2
sub = st1 - st2
mul = st1 * st2
div = round((st1 / st2),2)
df = pd.concat([add,sub,mul,div],axis =1)
df

Unnamed: 0,0,1,2,3
국어,190,10,9000,1.11
수학,170,10,7200,1.12
영어,160,0,6400,1.0


In [4]:
result = pd.DataFrame([add,sub,mul,div],index =['덧셈','뺼셈','곱셈','나눗셈'])
result

Unnamed: 0,국어,수학,영어
덧셈,190.0,170.0,160.0
뺼셈,10.0,10.0,0.0
곱셈,9000.0,7200.0,6400.0
나눗셈,1.11,1.12,1.0


In [10]:
# 배열의 연산
arr = np.arange(12.).reshape(3,4)
print(arr,'\n')
print(arr[0]) # arr 0번째 행

[[ 0.  1.  2.  3.]
 [ 4.  5.  6.  7.]
 [ 8.  9. 10. 11.]] 

[0. 1. 2. 3.]


In [11]:
# broadcasting : 가능한 범위 안에서 자동으로 shape을 맞춰서 진행한다.
arr-arr[0]

array([[0., 0., 0., 0.],
       [4., 4., 4., 4.],
       [8., 8., 8., 8.]])

In [13]:
# DataFrame 과 Series간 연산
# DataFrame 생성
frame = pd.DataFrame(np.arange(12.).reshape(4,3),columns=list('bde'),index=['Utah','Ohio', 'Texas', 'Oregon'])
frame

Unnamed: 0,b,d,e
Utah,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Texas,6.0,7.0,8.0
Oregon,9.0,10.0,11.0


In [16]:
series = frame.iloc[0] # 첫번째 행 불러오기
series

b    0.0
d    1.0
e    2.0
Name: Utah, dtype: float64

In [17]:
frame - series

Unnamed: 0,b,d,e
Utah,0.0,0.0,0.0
Ohio,3.0,3.0,3.0
Texas,6.0,6.0,6.0
Oregon,9.0,9.0,9.0


In [18]:
frame + frame

Unnamed: 0,b,d,e
Utah,0.0,2.0,4.0
Ohio,6.0,8.0,10.0
Texas,12.0,14.0,16.0
Oregon,18.0,20.0,22.0


# 커스텀 함수
커스텀 함수(custom function)를 DataFrame에 적용하려면 map함수, apply함수, applymap함수를 사용

- map함수 : DataFrame 타입이 아니라, 반드시 Series 타입에서만 사용
- apply함수 : 커스텀 함수를 사용하기 위해 DataFrame에서 복수 개의 컬럼이 필요하다면, apply함수를 사용
- applymap함수 : DataFrame클래스의 함수이긴 하나, 위의 apply함수처럼 각 row(axis=1)나 각 column(axis=0)별로 
작동하는 함수가 아니라, 각 요소(element)별로 작동  
마치 선형대수에서 벡터에 스칼라를 연산하면, 벡터의 요소 하나하나에 해당 연산을 해주는 것처럼(elementwise) 
적용하는 DataFrame의 각 요소마다 커스텀 함수(반드시 Single vaule를 반환하는)를 수행한다고 보면 된다. 
applymap에 인자로 전달하는 커스텀함수가 Single value로부터 Single value를 반환한다는 점이 중요하다


In [20]:
f = lambda x:x.max() - x.min()
frame.apply(f) #axis=0 default값 행방향

b    9.0
d    9.0
e    9.0
dtype: float64

In [23]:
frame.apply(f,axis='columns') #축기준으로 열방향 연산

Utah      2.0
Ohio      2.0
Texas     2.0
Oregon    2.0
dtype: float64

In [25]:
# 여러값을 가진 Series 변환
frame

Unnamed: 0,b,d,e
Utah,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Texas,6.0,7.0,8.0
Oregon,9.0,10.0,11.0


In [28]:
def f(x):
    return pd.Series([x.min(),x.max()],index=['min','max'])
frame.apply(f)

Unnamed: 0,b,d,e
min,0.0,1.0,2.0
max,9.0,10.0,11.0


In [30]:
# format
format = lambda x: '%.2f' %x        # x값을 소수점 2자리까지 format
frame.applymap(format)

Unnamed: 0,b,d,e
Utah,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Texas,6.0,7.0,8.0
Oregon,9.0,10.0,11.0


In [31]:
frame['e'].map(format)

Utah       2.00
Ohio       5.00
Texas      8.00
Oregon    11.00
Name: e, dtype: object

6/29 과제 : 최대한 복잡한 사용자 함수를 생성한 후 데이터프레임에 적용하여 결과를 출력하세요.

In [73]:
import warnings
warnings.filterwarnings('ignore')

In [67]:
data = {'name' :['Emma','Emily','Mathius','Dragon','Jihan'],
        'use_month' : [10,9,30,2,21],
       'age' : [28,25,30,35,45],
        'job' : ['musician','teacher','chef','baker','artist'],
        'salary' : [3000,4000,2500,5000,7000],
       'height' : [160,170,180,184,190]}
df = pd.DataFrame(data, columns = ['name','use_month','age','job','salary','height'])
df

Unnamed: 0,name,use_month,age,job,salary,height
0,Emma,10,28,musician,3000,160
1,Emily,9,25,teacher,4000,170
2,Mathius,30,30,chef,2500,180
3,Dragon,2,35,baker,5000,184
4,Jihan,21,45,artist,7000,190


In [68]:
def salary_class(x):
    cat = ''
    if x < 3500 : 
        cat = 'row'
    elif x < 5000 :
        cat = 'mid'
    else :
        cat = 'high'
    return cat

df['salary_class'] =  df.salary.apply(lambda x:salary_class(x))
df

Unnamed: 0,name,use_month,age,job,salary,height,salary_class
0,Emma,10,28,musician,3000,160,row
1,Emily,9,25,teacher,4000,170,mid
2,Mathius,30,30,chef,2500,180,row
3,Dragon,2,35,baker,5000,184,high
4,Jihan,21,45,artist,7000,190,high


In [69]:
def height_class(x):
    cat = ''
    if x < 172 : 
        cat = 'row'
    elif x < 180 :
        cat = 'mid'
    else :
        cat = 'high'
    return cat

df['height_class'] =  df.height.apply(lambda x:height_class(x))
df

Unnamed: 0,name,use_month,age,job,salary,height,salary_class,height_class
0,Emma,10,28,musician,3000,160,row,row
1,Emily,9,25,teacher,4000,170,mid,row
2,Mathius,30,30,chef,2500,180,row,high
3,Dragon,2,35,baker,5000,184,high,high
4,Jihan,21,45,artist,7000,190,high,high


In [70]:
def age_class(x):
    cat = ''
    if x < 30 : 
        cat = 'row'
    elif x < 40 :
        cat = 'mid'
    else :
        cat = 'high'
    return cat

df['age_class'] =  df.age.apply(lambda x:age_class(x))
df

Unnamed: 0,name,use_month,age,job,salary,height,salary_class,height_class,age_class
0,Emma,10,28,musician,3000,160,row,row,row
1,Emily,9,25,teacher,4000,170,mid,row,row
2,Mathius,30,30,chef,2500,180,row,high,mid
3,Dragon,2,35,baker,5000,184,high,high,mid
4,Jihan,21,45,artist,7000,190,high,high,high


In [71]:
df= df[['name','age','age_class','job','salary','salary_class','height','height_class','use_month']]
df

Unnamed: 0,name,age,age_class,job,salary,salary_class,height,height_class,use_month
0,Emma,28,row,musician,3000,row,160,row,10
1,Emily,25,row,teacher,4000,mid,170,row,9
2,Mathius,30,mid,chef,2500,row,180,high,30
3,Dragon,35,mid,baker,5000,high,184,high,2
4,Jihan,45,high,artist,7000,high,190,high,21


In [74]:
def id_class(x):
    cat = ''
    if x < 10 : 
        cat = 'silver'
    elif x < 30:
        cat = 'gold'
    else :
        cat = 'vip'
    return cat

df['id_class'] =  df.use_month.apply(lambda x:id_class(x))
df

Unnamed: 0,name,age,age_class,job,salary,salary_class,height,height_class,use_month,id_class
0,Emma,28,row,musician,3000,row,160,row,10,gold
1,Emily,25,row,teacher,4000,mid,170,row,9,silver
2,Mathius,30,mid,chef,2500,row,180,high,30,vip
3,Dragon,35,mid,baker,5000,high,184,high,2,silver
4,Jihan,45,high,artist,7000,high,190,high,21,gold
