# [10 Minutes to pandas](https://pandas.pydata.org/pandas-docs/stable/10min.html)

In [1]:
import pandas as pd

In [2]:
import numpy as np

In [3]:
import matplotlib.pyplot as plt

## Object Creation(객체 생성)
- 참고 : [Intro to Data Structures](https://pandas.pydata.org/pandas-docs/stable/dsintro.html)

### [Series](https://pandas.pydata.org/pandas-docs/stable/dsintro.html#series)
- pandas는 값을 가지고 있는 리스트를 통해 Series를 생성
- 정수로 가진 인덱스를 기본 값으로 가져온다

In [4]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])

In [5]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

### [Dataframe](https://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe)
- datetime 인덱스와 레이블이 있는 열을 가지고 있는 numpy 배열을 전달하여 데이터프레임 생성

#### 오늘 날짜 추출

In [6]:
from datetime import datetime

In [7]:
datetime.today()

datetime.datetime(2018, 8, 29, 16, 41, 19, 136636)

In [8]:
datetime.today().strftime("%Y%m%d")

'20180829'

In [9]:
today = datetime.today().strftime("%Y%m%d")

#### 날짜 배열 생성

In [10]:
dates = pd.date_range(today, periods=6)

In [11]:
dates

DatetimeIndex(['2018-08-29', '2018-08-30', '2018-08-31', '2018-09-01',
               '2018-09-02', '2018-09-03'],
              dtype='datetime64[ns]', freq='D')

#### numpy를 사용하여 2차원 배열 데이터 생성

In [12]:
datas = np.random.randn(6, 4)

In [13]:
datas

array([[-0.42191386, -0.50220933, -0.69251315,  1.11818748],
       [ 0.39490268, -0.00457177, -0.69822453, -0.38735307],
       [ 0.98306916, -0.36030944, -1.28088953,  1.08822596],
       [-0.47927972,  1.90868364,  0.99534672, -0.48972571],
       [ 0.36487802, -0.52324806,  1.32761886,  0.15291502],
       [ 0.96534367,  0.52341211,  0.40692508,  1.49619134]])

#### Dataframe 생성

In [14]:
df = pd.DataFrame(datas, index=dates, columns=list('ABCD'))

In [15]:
df

Unnamed: 0,A,B,C,D
2018-08-29,-0.421914,-0.502209,-0.692513,1.118187
2018-08-30,0.394903,-0.004572,-0.698225,-0.387353
2018-08-31,0.983069,-0.360309,-1.28089,1.088226
2018-09-01,-0.47928,1.908684,0.995347,-0.489726
2018-09-02,0.364878,-0.523248,1.327619,0.152915
2018-09-03,0.965344,0.523412,0.406925,1.496191


#### dict로 Dataframe 생성

In [16]:
df2 = pd.DataFrame({
    'A' : 1.,
    'B' : pd.Timestamp(today),
    'C' : pd.Series(1, index=list(range(4)), dtype='float32'),
    'D' : np.array([3] * 4, dtype='int32'),
    'E' : pd.Categorical(["test", "train", "test", "train"]),
    'F' : 'foo'
})

In [17]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2018-08-29,1.0,3,test,foo
1,1.0,2018-08-29,1.0,3,train,foo
2,1.0,2018-08-29,1.0,3,test,foo
3,1.0,2018-08-29,1.0,3,train,foo


Dataframe의 열은 다양한 데이터 타입(dtype)으로 구성

In [18]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

## Viewing Data (데이터 확인하기)
- 참고 : [Essential Basic Functionality](https://pandas.pydata.org/pandas-docs/stable/basics.html#basics)

### Head and Tail

In [19]:
df.tail(3) # 끝에서 마지막 3줄

Unnamed: 0,A,B,C,D
2018-09-01,-0.47928,1.908684,0.995347,-0.489726
2018-09-02,0.364878,-0.523248,1.327619,0.152915
2018-09-03,0.965344,0.523412,0.406925,1.496191


In [20]:
df.tail() # 끝에서 마지막 5줄

Unnamed: 0,A,B,C,D
2018-08-30,0.394903,-0.004572,-0.698225,-0.387353
2018-08-31,0.983069,-0.360309,-1.28089,1.088226
2018-09-01,-0.47928,1.908684,0.995347,-0.489726
2018-09-02,0.364878,-0.523248,1.327619,0.152915
2018-09-03,0.965344,0.523412,0.406925,1.496191


In [21]:
df.head()

Unnamed: 0,A,B,C,D
2018-08-29,-0.421914,-0.502209,-0.692513,1.118187
2018-08-30,0.394903,-0.004572,-0.698225,-0.387353
2018-08-31,0.983069,-0.360309,-1.28089,1.088226
2018-09-01,-0.47928,1.908684,0.995347,-0.489726
2018-09-02,0.364878,-0.523248,1.327619,0.152915


### Attributes

In [22]:
df.index

DatetimeIndex(['2018-08-29', '2018-08-30', '2018-08-31', '2018-09-01',
               '2018-09-02', '2018-09-03'],
              dtype='datetime64[ns]', freq='D')

In [23]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [24]:
df.values

array([[-0.42191386, -0.50220933, -0.69251315,  1.11818748],
       [ 0.39490268, -0.00457177, -0.69822453, -0.38735307],
       [ 0.98306916, -0.36030944, -1.28088953,  1.08822596],
       [-0.47927972,  1.90868364,  0.99534672, -0.48972571],
       [ 0.36487802, -0.52324806,  1.32761886,  0.15291502],
       [ 0.96534367,  0.52341211,  0.40692508,  1.49619134]])

### [Summarizing data: describe](https://pandas.pydata.org/pandas-docs/stable/basics.html#summarizing-data-describe)
- 참고 : [pandas.DataFrame.describe](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.describe.html#pandas.DataFrame.describe)
- 대략적인 통계적 정보

In [25]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.301167,0.173626,0.009711,0.496407
std,0.640454,0.93723,1.051292,0.849459
min,-0.47928,-0.523248,-1.28089,-0.489726
25%,-0.225216,-0.466734,-0.696797,-0.252286
50%,0.37989,-0.182441,-0.142794,0.62057
75%,0.822733,0.391416,0.848241,1.110697
max,0.983069,1.908684,1.327619,1.496191


### Transposition

In [26]:
df.T

Unnamed: 0,2018-08-29 00:00:00,2018-08-30 00:00:00,2018-08-31 00:00:00,2018-09-01 00:00:00,2018-09-02 00:00:00,2018-09-03 00:00:00
A,-0.421914,0.394903,0.983069,-0.47928,0.364878,0.965344
B,-0.502209,-0.004572,-0.360309,1.908684,-0.523248,0.523412
C,-0.692513,-0.698225,-1.28089,0.995347,1.327619,0.406925
D,1.118187,-0.387353,1.088226,-0.489726,0.152915,1.496191


### Sort

#### by index

In [27]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2018-08-29,1.118187,-0.692513,-0.502209,-0.421914
2018-08-30,-0.387353,-0.698225,-0.004572,0.394903
2018-08-31,1.088226,-1.28089,-0.360309,0.983069
2018-09-01,-0.489726,0.995347,1.908684,-0.47928
2018-09-02,0.152915,1.327619,-0.523248,0.364878
2018-09-03,1.496191,0.406925,0.523412,0.965344


In [28]:
df.sort_index(axis=0, ascending=False)

Unnamed: 0,A,B,C,D
2018-09-03,0.965344,0.523412,0.406925,1.496191
2018-09-02,0.364878,-0.523248,1.327619,0.152915
2018-09-01,-0.47928,1.908684,0.995347,-0.489726
2018-08-31,0.983069,-0.360309,-1.28089,1.088226
2018-08-30,0.394903,-0.004572,-0.698225,-0.387353
2018-08-29,-0.421914,-0.502209,-0.692513,1.118187


#### by value

In [29]:
df.sort_values('B')

Unnamed: 0,A,B,C,D
2018-09-02,0.364878,-0.523248,1.327619,0.152915
2018-08-29,-0.421914,-0.502209,-0.692513,1.118187
2018-08-31,0.983069,-0.360309,-1.28089,1.088226
2018-08-30,0.394903,-0.004572,-0.698225,-0.387353
2018-09-03,0.965344,0.523412,0.406925,1.496191
2018-09-01,-0.47928,1.908684,0.995347,-0.489726


In [30]:
df.sort_values(by=['A', 'B'])

Unnamed: 0,A,B,C,D
2018-09-01,-0.47928,1.908684,0.995347,-0.489726
2018-08-29,-0.421914,-0.502209,-0.692513,1.118187
2018-09-02,0.364878,-0.523248,1.327619,0.152915
2018-08-30,0.394903,-0.004572,-0.698225,-0.387353
2018-09-03,0.965344,0.523412,0.406925,1.496191
2018-08-31,0.983069,-0.360309,-1.28089,1.088226


## Selection (선택)
- 참고
    - [Indexing and Selecting Data](https://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing)
    - [MultiIndex / Advanced Indexing](https://pandas.pydata.org/pandas-docs/stable/advanced.html#advanced)

### Getting (데이터 얻기)

In [31]:
df['A']

2018-08-29   -0.421914
2018-08-30    0.394903
2018-08-31    0.983069
2018-09-01   -0.479280
2018-09-02    0.364878
2018-09-03    0.965344
Freq: D, Name: A, dtype: float64

In [32]:
df[0:3]

Unnamed: 0,A,B,C,D
2018-08-29,-0.421914,-0.502209,-0.692513,1.118187
2018-08-30,0.394903,-0.004572,-0.698225,-0.387353
2018-08-31,0.983069,-0.360309,-1.28089,1.088226


In [33]:
df['2018-08-29' : '2018-09-01']

Unnamed: 0,A,B,C,D
2018-08-29,-0.421914,-0.502209,-0.692513,1.118187
2018-08-30,0.394903,-0.004572,-0.698225,-0.387353
2018-08-31,0.983069,-0.360309,-1.28089,1.088226
2018-09-01,-0.47928,1.908684,0.995347,-0.489726


### Selection by Label (Label을 통한 선택)
- 참고 : [Selection By Label](https://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-label)

In [34]:
df.loc[dates[0]]

A   -0.421914
B   -0.502209
C   -0.692513
D    1.118187
Name: 2018-08-29 00:00:00, dtype: float64

In [35]:
df.loc[:, ['A', 'B']]

Unnamed: 0,A,B
2018-08-29,-0.421914,-0.502209
2018-08-30,0.394903,-0.004572
2018-08-31,0.983069,-0.360309
2018-09-01,-0.47928,1.908684
2018-09-02,0.364878,-0.523248
2018-09-03,0.965344,0.523412


In [36]:
df.loc['2018-08-29' : '2018-09-01', ['A', 'B']]

Unnamed: 0,A,B
2018-08-29,-0.421914,-0.502209
2018-08-30,0.394903,-0.004572
2018-08-31,0.983069,-0.360309
2018-09-01,-0.47928,1.908684


In [37]:
df.loc['2018-08-29', ['A', 'B']]

A   -0.421914
B   -0.502209
Name: 2018-08-29 00:00:00, dtype: float64

In [38]:
df.loc[dates[0], 'A']

-0.42191385726309455

In [39]:
df.at[dates[0],'A']

-0.42191385726309455

### Selection by Position (위치로 선택하기)
- 참고 : [Selection By Position](https://pandas.pydata.org/pandas-docs/stable/indexing.html#selection-by-position)

In [40]:
df.iloc[3]

A   -0.479280
B    1.908684
C    0.995347
D   -0.489726
Name: 2018-09-01 00:00:00, dtype: float64

In [41]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2018-09-01,-0.47928,1.908684
2018-09-02,0.364878,-0.523248


In [42]:
df.iloc[[1,2,4],[0,2]]

Unnamed: 0,A,C
2018-08-30,0.394903,-0.698225
2018-08-31,0.983069,-1.28089
2018-09-02,0.364878,1.327619


In [43]:
df.iloc[1:3,:]

Unnamed: 0,A,B,C,D
2018-08-30,0.394903,-0.004572,-0.698225,-0.387353
2018-08-31,0.983069,-0.360309,-1.28089,1.088226


In [44]:
df.iloc[:,1:3]

Unnamed: 0,B,C
2018-08-29,-0.502209,-0.692513
2018-08-30,-0.004572,-0.698225
2018-08-31,-0.360309,-1.28089
2018-09-01,1.908684,0.995347
2018-09-02,-0.523248,1.327619
2018-09-03,0.523412,0.406925


In [45]:
df.iloc[1,1]

-0.004571771988529306

In [46]:
df.iat[1,1]

-0.004571771988529306

### Boolean Indexing

In [47]:
df[df.A > 0]

Unnamed: 0,A,B,C,D
2018-08-30,0.394903,-0.004572,-0.698225,-0.387353
2018-08-31,0.983069,-0.360309,-1.28089,1.088226
2018-09-02,0.364878,-0.523248,1.327619,0.152915
2018-09-03,0.965344,0.523412,0.406925,1.496191


In [48]:
df[df > 0]

Unnamed: 0,A,B,C,D
2018-08-29,,,,1.118187
2018-08-30,0.394903,,,
2018-08-31,0.983069,,,1.088226
2018-09-01,,1.908684,0.995347,
2018-09-02,0.364878,,1.327619,0.152915
2018-09-03,0.965344,0.523412,0.406925,1.496191


In [49]:
df2 = df.copy()

In [50]:
df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']

In [51]:
df2

Unnamed: 0,A,B,C,D,E
2018-08-29,-0.421914,-0.502209,-0.692513,1.118187,one
2018-08-30,0.394903,-0.004572,-0.698225,-0.387353,one
2018-08-31,0.983069,-0.360309,-1.28089,1.088226,two
2018-09-01,-0.47928,1.908684,0.995347,-0.489726,three
2018-09-02,0.364878,-0.523248,1.327619,0.152915,four
2018-09-03,0.965344,0.523412,0.406925,1.496191,three


In [52]:
df2[df2['E'].isin(['two', 'three'])]

Unnamed: 0,A,B,C,D,E
2018-08-31,0.983069,-0.360309,-1.28089,1.088226,two
2018-09-01,-0.47928,1.908684,0.995347,-0.489726,three
2018-09-03,0.965344,0.523412,0.406925,1.496191,three


In [53]:
df2[df2['E'].isin(['two','four'])]

Unnamed: 0,A,B,C,D,E
2018-08-31,0.983069,-0.360309,-1.28089,1.088226,two
2018-09-02,0.364878,-0.523248,1.327619,0.152915,four


### Setting(설정)

In [54]:
s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range(today, periods=6))

In [55]:
s1

2018-08-29    1
2018-08-30    2
2018-08-31    3
2018-09-01    4
2018-09-02    5
2018-09-03    6
Freq: D, dtype: int64

In [56]:
df['F'] = s1

In [57]:
df

Unnamed: 0,A,B,C,D,F
2018-08-29,-0.421914,-0.502209,-0.692513,1.118187,1
2018-08-30,0.394903,-0.004572,-0.698225,-0.387353,2
2018-08-31,0.983069,-0.360309,-1.28089,1.088226,3
2018-09-01,-0.47928,1.908684,0.995347,-0.489726,4
2018-09-02,0.364878,-0.523248,1.327619,0.152915,5
2018-09-03,0.965344,0.523412,0.406925,1.496191,6


In [58]:
df.at[dates[0], 'A'] = 0

In [59]:
df

Unnamed: 0,A,B,C,D,F
2018-08-29,0.0,-0.502209,-0.692513,1.118187,1
2018-08-30,0.394903,-0.004572,-0.698225,-0.387353,2
2018-08-31,0.983069,-0.360309,-1.28089,1.088226,3
2018-09-01,-0.47928,1.908684,0.995347,-0.489726,4
2018-09-02,0.364878,-0.523248,1.327619,0.152915,5
2018-09-03,0.965344,0.523412,0.406925,1.496191,6


In [60]:
df.iat[0,1] = 0

In [61]:
df

Unnamed: 0,A,B,C,D,F
2018-08-29,0.0,0.0,-0.692513,1.118187,1
2018-08-30,0.394903,-0.004572,-0.698225,-0.387353,2
2018-08-31,0.983069,-0.360309,-1.28089,1.088226,3
2018-09-01,-0.47928,1.908684,0.995347,-0.489726,4
2018-09-02,0.364878,-0.523248,1.327619,0.152915,5
2018-09-03,0.965344,0.523412,0.406925,1.496191,6


In [62]:
df.loc[:, 'D'] = np.array([5] * len(df))

In [63]:
df

Unnamed: 0,A,B,C,D,F
2018-08-29,0.0,0.0,-0.692513,5,1
2018-08-30,0.394903,-0.004572,-0.698225,5,2
2018-08-31,0.983069,-0.360309,-1.28089,5,3
2018-09-01,-0.47928,1.908684,0.995347,5,4
2018-09-02,0.364878,-0.523248,1.327619,5,5
2018-09-03,0.965344,0.523412,0.406925,5,6


In [64]:
df2 = df.copy()

In [65]:
df2[df2 > 0]

Unnamed: 0,A,B,C,D,F
2018-08-29,,,,5,1
2018-08-30,0.394903,,,5,2
2018-08-31,0.983069,,,5,3
2018-09-01,,1.908684,0.995347,5,4
2018-09-02,0.364878,,1.327619,5,5
2018-09-03,0.965344,0.523412,0.406925,5,6


In [66]:
df2[df2 < 0]

Unnamed: 0,A,B,C,D,F
2018-08-29,,,-0.692513,,
2018-08-30,,-0.004572,-0.698225,,
2018-08-31,,-0.360309,-1.28089,,
2018-09-01,-0.47928,,,,
2018-09-02,,-0.523248,,,
2018-09-03,,,,,


In [67]:
df2[df2 > 0] = -df2

In [68]:
df2

Unnamed: 0,A,B,C,D,F
2018-08-29,0.0,0.0,-0.692513,-5,-1
2018-08-30,-0.394903,-0.004572,-0.698225,-5,-2
2018-08-31,-0.983069,-0.360309,-1.28089,-5,-3
2018-09-01,-0.47928,-1.908684,-0.995347,-5,-4
2018-09-02,-0.364878,-0.523248,-1.327619,-5,-5
2018-09-03,-0.965344,-0.523412,-0.406925,-5,-6


### Missing Data (결측치)
- - 참고 : [Missing data section](https://pandas.pydata.org/pandas-docs/stable/missing_data.html#missing-data)

- Pandas는 결측치를 표현하기 위해 주로 np.nan 값을 사용
    - 기본 설정값이지만 계산에는 포함되지 않는다.

#### Reindexing
- 지정된 축 상의 인덱스를 변경/추가/삭제
- 데이터의 복사본을 반환

In [69]:
df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])

In [70]:
df1

Unnamed: 0,A,B,C,D,F,E
2018-08-29,0.0,0.0,-0.692513,5,1,
2018-08-30,0.394903,-0.004572,-0.698225,5,2,
2018-08-31,0.983069,-0.360309,-1.28089,5,3,
2018-09-01,-0.47928,1.908684,0.995347,5,4,


In [71]:
df1.loc[dates[0]:dates[1], 'E'] = 1

In [72]:
df1

Unnamed: 0,A,B,C,D,F,E
2018-08-29,0.0,0.0,-0.692513,5,1,1.0
2018-08-30,0.394903,-0.004572,-0.698225,5,2,1.0
2018-08-31,0.983069,-0.360309,-1.28089,5,3,
2018-09-01,-0.47928,1.908684,0.995347,5,4,


In [73]:
df1.dropna(how='any')

Unnamed: 0,A,B,C,D,F,E
2018-08-29,0.0,0.0,-0.692513,5,1,1.0
2018-08-30,0.394903,-0.004572,-0.698225,5,2,1.0


In [75]:
df1.fillna(value=5)

Unnamed: 0,A,B,C,D,F,E
2018-08-29,0.0,0.0,-0.692513,5,1,1.0
2018-08-30,0.394903,-0.004572,-0.698225,5,2,1.0
2018-08-31,0.983069,-0.360309,-1.28089,5,3,5.0
2018-09-01,-0.47928,1.908684,0.995347,5,4,5.0


- nan인 값에 boolean을 통한 표식을 얻음
    - nan인 값에만 True가 표시되게 하는 함수
    - 데이터프레임의 모든 값이 boolean 형태로 표시

In [76]:
pd.isna(df1)

Unnamed: 0,A,B,C,D,F,E
2018-08-29,False,False,False,False,False,False
2018-08-30,False,False,False,False,False,False
2018-08-31,False,False,False,False,False,True
2018-09-01,False,False,False,False,False,True


### Operation (연산)
- 참고 : [이진 (Binary) 연산의 기본 섹션](https://pandas.pydata.org/pandas-docs/stable/basics.html#basics-binop)

#### Stats (통계)
- 일반적으로 결측치를 제외한 후 연산
- 기술 통계를 수행

In [77]:
df.mean()

A    0.371486
B    0.257328
C    0.009711
D    5.000000
F    3.500000
dtype: float64

In [78]:
df.mean(1)

2018-08-29    1.061497
2018-08-30    1.338421
2018-08-31    1.468374
2018-09-01    2.284950
2018-09-02    2.233850
2018-09-03    2.579136
Freq: D, dtype: float64

- 차원이 다른 객체로 연산
    - pandas는 지정된 치원을 따라 자동으로 브로드 캐스팅

> broadcast란 numpy에서 유래한 용어로, n차원이나 스칼라 값으로 연산을 수행할 때 도출되는 결과의 규칙을 설명하는 것

In [81]:
s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates)

In [82]:
s

2018-08-29    1.0
2018-08-30    3.0
2018-08-31    5.0
2018-09-01    NaN
2018-09-02    6.0
2018-09-03    8.0
Freq: D, dtype: float64

In [83]:
s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2)

In [84]:
s

2018-08-29    NaN
2018-08-30    NaN
2018-08-31    1.0
2018-09-01    3.0
2018-09-02    5.0
2018-09-03    NaN
Freq: D, dtype: float64

In [86]:
df

Unnamed: 0,A,B,C,D,F
2018-08-29,0.0,0.0,-0.692513,5,1
2018-08-30,0.394903,-0.004572,-0.698225,5,2
2018-08-31,0.983069,-0.360309,-1.28089,5,3
2018-09-01,-0.47928,1.908684,0.995347,5,4
2018-09-02,0.364878,-0.523248,1.327619,5,5
2018-09-03,0.965344,0.523412,0.406925,5,6


In [85]:
df.sub(s, axis='index')

Unnamed: 0,A,B,C,D,F
2018-08-29,,,,,
2018-08-30,,,,,
2018-08-31,-0.016931,-1.360309,-2.28089,4.0,2.0
2018-09-01,-3.47928,-1.091316,-2.004653,2.0,1.0
2018-09-02,-4.635122,-5.523248,-3.672381,0.0,0.0
2018-09-03,,,,,


#### Apply (적용)

In [89]:
df

Unnamed: 0,A,B,C,D,F
2018-08-29,0.0,0.0,-0.692513,5,1
2018-08-30,0.394903,-0.004572,-0.698225,5,2
2018-08-31,0.983069,-0.360309,-1.28089,5,3
2018-09-01,-0.47928,1.908684,0.995347,5,4
2018-09-02,0.364878,-0.523248,1.327619,5,5
2018-09-03,0.965344,0.523412,0.406925,5,6


In [87]:
df.apply(np.cumsum)

Unnamed: 0,A,B,C,D,F
2018-08-29,0.0,0.0,-0.692513,5,1
2018-08-30,0.394903,-0.004572,-1.390738,10,3
2018-08-31,1.377972,-0.364881,-2.671627,15,6
2018-09-01,0.898692,1.543802,-1.67628,20,10
2018-09-02,1.26357,1.020554,-0.348662,25,15
2018-09-03,2.228914,1.543966,0.058263,30,21


In [88]:
df.apply(lambda x: x.max() - x.min())

A    1.462349
B    2.431932
C    2.608508
D    0.000000
F    5.000000
dtype: float64

#### Histogramming (히스토그래밍)
- 참고 : [Histogramming and Discretization (히스토그래밍과 이산화)](https://pandas.pydata.org/pandas-docs/stable/basics.html#basics-discretization)

In [90]:
s = pd.Series(np.random.randint(0, 7, size=10))

In [91]:
s

0    1
1    3
2    0
3    5
4    4
5    6
6    2
7    5
8    6
9    2
dtype: int32

In [92]:
s.value_counts()

6    2
5    2
2    2
4    1
3    1
1    1
0    1
dtype: int64

#### String Methods (문자열 메소드)
- 참고 : [벡터화된 문자열 메소드](https://pandas.pydata.org/pandas-docs/stable/text.html#text-string-methods)

- Series는 문자열 처리 메소드 모음(set)을 가지고 있다.
- 배열의 각 요소를 쉽게 조작할 수 있도록 만들어주는 문자열의 속성에 포함
- 문자열의 패턴 일치 확인은 기본적으로 정규 표현식을 사용
- 몇몇의 경우에는 항상 정규 표현식 사용

In [93]:
s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])

In [94]:
s

0       A
1       B
2       C
3    Aaba
4    Baca
5     NaN
6    CABA
7     dog
8     cat
dtype: object

In [95]:
s.str.lower()

0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object