# 6. `lambda` 사용하기
## 1) Python `lambda` 개념 및 사용방법
- 파이썬에서 람다(lambda) 함수는 def 키워드 없이 정의될 수 있는 함수이며 간단한 연산을 수행하는 경우 유용합니다.


- 람다 함수의 기본 구문은 다음과 같습니다:
```python
    lambda arguments: expression
```
- 여기서 `arguments`는 함수에 전달될 매개변수를 나타내며, `expression`은 함수의 반환값을 결정하는 표현식입니다.

In [16]:
# 간단한 덧셈 함수:
add = lambda x, y: x + y
print(add(5,3))

8


In [18]:
# 리스트 정렬 시 key 인자의 함수로 사용
points = [(1,2), (3,1), (5,4)]
points.sort( key=lambda point: point[1] )
print(points)

[(3, 1), (1, 2), (5, 4)]


In [20]:
# `map()` 함수와 함께 사용하여 각 요소에 연산 적용
nums = [1,2,3,4,5]
squared = map(lambda x: x ** 2, nums)
print(list(squared))

[1, 4, 9, 16, 25]


In [21]:
# `filter()` 함수와 함께 사용하여 조건을 만족하는 요소 필터링
nums = [1,2,3,4,5,6,7,8,9]
evens = filter(lambda x: x % 2 == 0, nums)
print( list(evens) )

[2, 4, 6, 8]


## 2) Pandas에서 `lambda` 사용하기
- Pandas에서 `lambda` 함수는 간단한 함수를 만드는 데 사용되며, 일반적으로 `.apply()`, `.map()`, `.filter()`와 같은 Pandas 메서드와 함께 사용됩니다.

In [22]:
import pandas as pd

df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
df

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


- `.apply()`로 특정 컬럼에 적용하기

In [23]:
# 열 'A'의 각 값에 10을 더하기
df["A"] = df["A"].apply(lambda x: x + 10)
df

Unnamed: 0,A,B
0,11,4
1,12,5
2,13,6


- 조건에 따른 새로운 컬럼 생성하기

In [25]:
df["C"] = df["A"].apply( lambda x: "large" if x > 11 else "small")
df

Unnamed: 0,A,B,C
0,11,4,small
1,12,5,large
2,13,6,large


- DataFrame의 여러 컬럼에 함수 적용하기
    - `apply()` 메서드에 `axis=1`을 설정하고 `lambda` 함수를 사용

In [26]:
df

Unnamed: 0,A,B,C
0,11,4,small
1,12,5,large
2,13,6,large


In [27]:
df["sum"] = df.apply(
    lambda row: row["A"] + row["B"], axis=1
)

In [28]:
df

Unnamed: 0,A,B,C,sum
0,11,4,small,15
1,12,5,large,17
2,13,6,large,19


- Series에서 특정 조건을 만족하는 요소 필터링하기

In [29]:
s = pd.Series([1,2,3,4,5])

In [32]:
s.apply(lambda x: x>2)

0    False
1    False
2     True
3     True
4     True
dtype: bool

In [30]:
filtered_s = s[s.apply(lambda x: x>2)]

In [31]:
filtered_s

2    3
3    4
4    5
dtype: int64

- `lambda` 또는 `python 함수`를 사용해서 가격변동률을 계산하기

In [33]:
import pandas as pd
data = pd.read_csv("../dataset/yfinance_aapl.csv", index_col=0)
data.head(3)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2023-07-19,193.100006,198.229996,192.649994,195.100006,194.069351,80507300
1,2023-07-20,195.089996,196.470001,192.5,193.130005,192.109756,59581200
2,2023-07-21,194.100006,194.970001,191.229996,191.940002,190.926041,71917800


In [34]:
# lambda 함수 사용하기
data["PriceChangePercent"] = data.apply(
    lambda row: ((row["Close"] - row["Open"])/row["Open"])*100, axis=1
)
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,PriceChangePercent
0,2023-07-19,193.100006,198.229996,192.649994,195.100006,194.069351,80507300,1.035733
1,2023-07-20,195.089996,196.470001,192.5,193.130005,192.109756,59581200,-1.00466
2,2023-07-21,194.100006,194.970001,191.229996,191.940002,190.926041,71917800,-1.11283
3,2023-07-24,193.410004,194.910004,192.25,192.75,191.731766,45377800,-0.341246
4,2023-07-25,193.330002,194.440002,192.919998,193.619995,192.597153,37283200,0.149999


In [35]:
import pandas as pd
data = pd.read_csv("../dataset/yfinance_aapl.csv", index_col=0)
data.head(3)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2023-07-19,193.100006,198.229996,192.649994,195.100006,194.069351,80507300
1,2023-07-20,195.089996,196.470001,192.5,193.130005,192.109756,59581200
2,2023-07-21,194.100006,194.970001,191.229996,191.940002,190.926041,71917800


In [36]:
# 파이썬 함수 만들어서 사용하기
def calculate_price_change_percent(row):
    return ((row["Close"] - row["Open"])/row["Open"])*100

In [37]:
data["PriceChangePercent"] = data.apply(calculate_price_change_percent, axis=1)
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,PriceChangePercent
0,2023-07-19,193.100006,198.229996,192.649994,195.100006,194.069351,80507300,1.035733
1,2023-07-20,195.089996,196.470001,192.5,193.130005,192.109756,59581200,-1.00466
2,2023-07-21,194.100006,194.970001,191.229996,191.940002,190.926041,71917800,-1.11283
3,2023-07-24,193.410004,194.910004,192.25,192.75,191.731766,45377800,-0.341246
4,2023-07-25,193.330002,194.440002,192.919998,193.619995,192.597153,37283200,0.149999


# 7. 기타 중요한 pandas 기능들

## 1) `reset_index()`
- `reset_index()` 함수는 Dataframe의 index를 기본 정수 index로 재설정할 때 사용합니다.

In [38]:
import pandas as pd

df = pd.read_csv("../dataset/yfinance_aapl.csv", index_col=0).head(10)
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2023-07-19,193.100006,198.229996,192.649994,195.100006,194.069351,80507300
1,2023-07-20,195.089996,196.470001,192.5,193.130005,192.109756,59581200
2,2023-07-21,194.100006,194.970001,191.229996,191.940002,190.926041,71917800
3,2023-07-24,193.410004,194.910004,192.25,192.75,191.731766,45377800
4,2023-07-25,193.330002,194.440002,192.919998,193.619995,192.597153,37283200


In [39]:
df = df.set_index("Date")
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-07-19,193.100006,198.229996,192.649994,195.100006,194.069351,80507300
2023-07-20,195.089996,196.470001,192.5,193.130005,192.109756,59581200
2023-07-21,194.100006,194.970001,191.229996,191.940002,190.926041,71917800
2023-07-24,193.410004,194.910004,192.25,192.75,191.731766,45377800
2023-07-25,193.330002,194.440002,192.919998,193.619995,192.597153,37283200
2023-07-26,193.669998,195.639999,193.320007,194.5,193.472519,47471900
2023-07-27,196.020004,197.199997,192.550003,193.220001,192.19928,47460200
2023-07-28,194.669998,196.630005,194.139999,195.830002,194.795486,48291400
2023-07-31,196.059998,196.490005,195.259995,196.449997,195.412201,38824100
2023-08-01,196.240005,196.729996,195.279999,195.610001,194.576645,35175100


In [40]:
# reset_index()로 정수 index로 치환
# 이때 기존의 index은 새로운 열로 추가됨
df = df.reset_index()
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2023-07-19,193.100006,198.229996,192.649994,195.100006,194.069351,80507300
1,2023-07-20,195.089996,196.470001,192.5,193.130005,192.109756,59581200
2,2023-07-21,194.100006,194.970001,191.229996,191.940002,190.926041,71917800
3,2023-07-24,193.410004,194.910004,192.25,192.75,191.731766,45377800
4,2023-07-25,193.330002,194.440002,192.919998,193.619995,192.597153,37283200


In [45]:
# 특정 행을 지운 다음에 reset_index로 index 재조정하기
df.drop(index=[1,2]).reset_index()

Unnamed: 0,index,Date,Open,High,Low,Close,Adj Close,Volume
0,0,2023-07-19,193.100006,198.229996,192.649994,195.100006,194.069351,80507300
1,3,2023-07-24,193.410004,194.910004,192.25,192.75,191.731766,45377800
2,4,2023-07-25,193.330002,194.440002,192.919998,193.619995,192.597153,37283200
3,5,2023-07-26,193.669998,195.639999,193.320007,194.5,193.472519,47471900
4,6,2023-07-27,196.020004,197.199997,192.550003,193.220001,192.19928,47460200
5,7,2023-07-28,194.669998,196.630005,194.139999,195.830002,194.795486,48291400
6,8,2023-07-31,196.059998,196.490005,195.259995,196.449997,195.412201,38824100
7,9,2023-08-01,196.240005,196.729996,195.279999,195.610001,194.576645,35175100


In [48]:
# reset_index(drop=True)로 기존 index가 새로운 열로 추가되지 않게 하기
df.drop(index=[1,2]).reset_index(drop=True)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2023-07-19,193.100006,198.229996,192.649994,195.100006,194.069351,80507300
1,2023-07-24,193.410004,194.910004,192.25,192.75,191.731766,45377800
2,2023-07-25,193.330002,194.440002,192.919998,193.619995,192.597153,37283200
3,2023-07-26,193.669998,195.639999,193.320007,194.5,193.472519,47471900
4,2023-07-27,196.020004,197.199997,192.550003,193.220001,192.19928,47460200
5,2023-07-28,194.669998,196.630005,194.139999,195.830002,194.795486,48291400
6,2023-07-31,196.059998,196.490005,195.259995,196.449997,195.412201,38824100
7,2023-08-01,196.240005,196.729996,195.279999,195.610001,194.576645,35175100


## 2) df.concat()
- `df.concat()` 함수는 하나 이상의 Dataframe을 행 또는 열을 기준으로 합칠 때 사용합니다

In [50]:
import pandas as pd

df_aapl = pd.read_csv("../dataset/yfinance_aapl_3year.csv", index_col=0).head(5)
df_nvda = pd.read_csv("../dataset/yfinance_nvda_3year.csv", index_col=0).head(5)

In [51]:
df_aapl

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-07-19,143.75,144.070007,141.669998,142.449997,140.056213,121434600
1,2021-07-20,143.460007,147.100006,142.960007,146.149994,143.694046,96350000
2,2021-07-21,145.529999,146.130005,144.630005,145.399994,142.956619,74993500
3,2021-07-22,145.940002,148.199997,145.809998,146.800003,144.333099,77338200
4,2021-07-23,147.550003,148.720001,146.919998,148.559998,146.063553,71447400


In [52]:
df_nvda

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-07-20,18.73,18.837999,18.164,18.612,18.578419,434687000
1,2021-07-21,18.882,19.527,18.742001,19.41,19.374977,371017000
2,2021-07-22,19.642,19.886999,19.275999,19.594,19.558643,323826000
3,2021-07-23,19.656,19.700001,19.25,19.558001,19.522709,195672000
4,2021-07-26,19.311001,19.441999,18.914,19.294001,19.259186,203943000


In [54]:
# Dataframe을 가로로 연결하기
pd.concat( [ df_aapl, df_nvda ], axis=1 )

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Date.1,Open.1,High.1,Low.1,Close.1,Adj Close.1,Volume.1
0,2021-07-19,143.75,144.070007,141.669998,142.449997,140.056213,121434600,2021-07-20,18.73,18.837999,18.164,18.612,18.578419,434687000
1,2021-07-20,143.460007,147.100006,142.960007,146.149994,143.694046,96350000,2021-07-21,18.882,19.527,18.742001,19.41,19.374977,371017000
2,2021-07-21,145.529999,146.130005,144.630005,145.399994,142.956619,74993500,2021-07-22,19.642,19.886999,19.275999,19.594,19.558643,323826000
3,2021-07-22,145.940002,148.199997,145.809998,146.800003,144.333099,77338200,2021-07-23,19.656,19.700001,19.25,19.558001,19.522709,195672000
4,2021-07-23,147.550003,148.720001,146.919998,148.559998,146.063553,71447400,2021-07-26,19.311001,19.441999,18.914,19.294001,19.259186,203943000


In [55]:
# Dataframe을 세로로 연결하기
pd.concat([ df_aapl, df_nvda ], axis=0)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-07-19,143.75,144.070007,141.669998,142.449997,140.056213,121434600
1,2021-07-20,143.460007,147.100006,142.960007,146.149994,143.694046,96350000
2,2021-07-21,145.529999,146.130005,144.630005,145.399994,142.956619,74993500
3,2021-07-22,145.940002,148.199997,145.809998,146.800003,144.333099,77338200
4,2021-07-23,147.550003,148.720001,146.919998,148.559998,146.063553,71447400
0,2021-07-20,18.73,18.837999,18.164,18.612,18.578419,434687000
1,2021-07-21,18.882,19.527,18.742001,19.41,19.374977,371017000
2,2021-07-22,19.642,19.886999,19.275999,19.594,19.558643,323826000
3,2021-07-23,19.656,19.700001,19.25,19.558001,19.522709,195672000
4,2021-07-26,19.311001,19.441999,18.914,19.294001,19.259186,203943000


In [56]:
# 새로운 인덱스를 자동으로 붙이기
pd.concat([ df_aapl, df_nvda ], axis=0, ignore_index=True)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-07-19,143.75,144.070007,141.669998,142.449997,140.056213,121434600
1,2021-07-20,143.460007,147.100006,142.960007,146.149994,143.694046,96350000
2,2021-07-21,145.529999,146.130005,144.630005,145.399994,142.956619,74993500
3,2021-07-22,145.940002,148.199997,145.809998,146.800003,144.333099,77338200
4,2021-07-23,147.550003,148.720001,146.919998,148.559998,146.063553,71447400
5,2021-07-20,18.73,18.837999,18.164,18.612,18.578419,434687000
6,2021-07-21,18.882,19.527,18.742001,19.41,19.374977,371017000
7,2021-07-22,19.642,19.886999,19.275999,19.594,19.558643,323826000
8,2021-07-23,19.656,19.700001,19.25,19.558001,19.522709,195672000
9,2021-07-26,19.311001,19.441999,18.914,19.294001,19.259186,203943000
