In [93]:
import numpy as np
import pandas as pd

import datetime
from datetime import datetime, date

import matplotlib.pyplot as pyplot
%matplotlib inline

## 파이썬 리스트와 딕셔너리를 사용한 생성

In [94]:
# 시리즈 생성
s = pd.Series([10, 11, 12, 13, 14])
s

0    10
1    11
2    12
3    13
4    14
dtype: int64

In [95]:
# 레이블 3번째 값
s[3]

13

In [96]:
# 정수가 아닌 문자열 시리즈 생성
pd.Series(['Mike', 'Marcia', 'Mikael', 'Bleu'])

0      Mike
1    Marcia
2    Mikael
3      Bleu
dtype: object

In [97]:
# 2의 값을 5개 가진 Series 생성
pd.Series([2] * 5)

0    2
1    2
2    2
3    2
4    2
dtype: int64

In [98]:
# 각 문자를 리스트 아이템으로 사용하는 방법
pd.Series(list('abcde'))

0    a
1    b
2    c
3    d
4    e
dtype: object

In [99]:
# 딕셔너리로 시리즈 생성
pd.Series({'Mike' : 'Dad',
'Marcia' : 'Mom',
'Mikael' : 'Son',
'Bleu' : 'Best doggie ever'})

Mike                   Dad
Marcia                 Mom
Mikael                 Son
Bleu      Best doggie ever
dtype: object

## Numpy 함수를 사용한 생성

In [100]:
# 4 ~ 8 정수
pd.Series(np.arange(4, 9))

0    4
1    5
2    6
3    7
4    8
dtype: int32

In [101]:
pd.Series(np.linspace(0, 9, 5))

0    0.00
1    2.25
2    4.50
3    6.75
4    9.00
dtype: float64

In [102]:
# 정규 분포에 따른 난수 5개 생성
np.random.seed(12345)
pd.Series(np.random.normal(size = 5))

0   -0.204708
1    0.478943
2   -0.519439
3   -0.555730
4    1.965781
dtype: float64

## 스칼라 값을 사용한 생성

In [103]:
s = pd.Series(2)
s

0    2
dtype: int64

In [104]:
s = pd.Series(np.arange(0, 5))
s * 2

0    0
1    2
2    4
3    6
4    8
dtype: int32

## .index와 .values 속성

In [105]:
# get the values in the Series
s = pd.Series([1, 2, 3])
s.values


array([1, 2, 3], dtype=int64)

In [106]:
type(s.values)

numpy.ndarray

In [107]:
# get the index of the Series
s.index

RangeIndex(start=0, stop=3, step=1)

## Series의 크기와 형태

In [108]:
# Example series
s = pd.Series([1, 2, 3, 4])
len(s)

4

In [109]:
# .size is also the # of items in the Series
s.size

4

In [110]:
# .shape is a tuple with one value
s.shape

(4,)

## 생성 시 인덱스 지정

In [111]:
# explicitly create an index
labels = ['Mike', 'Marcia', 'Mikael', 'Bleu']
role = ['Dad', 'Mom', 'Son', 'Dog']
s = pd.Series(labels, index = role)
s

Dad      Mike
Mom    Marcia
Son    Mikael
Dog      Bleu
dtype: object

In [112]:
# examine the index
s.index

Index(['Dad', 'Mom', 'Son', 'Dog'], dtype='object')

In [113]:
# who is the Dad?
s['Dad']

'Mike'

## head, tail, take

In [114]:
# a ten item Series
s = pd.Series(np.arange(1, 10),
index = list('abcdefghi'))

In [115]:
# show the first five
s.head()

a    1
b    2
c    3
d    4
e    5
dtype: int32

In [116]:
# the first three
# n 의 수만큼 보여준다 그냥 숫자만 적어도 된다.
print(s.head(3))
s.head(n = 3) # s.head(3) is equivalent


a    1
b    2
c    3
dtype: int32


a    1
b    2
c    3
dtype: int32

In [117]:
# the last five
s.tail()

e    5
f    6
g    7
h    8
i    9
dtype: int32

In [118]:
# the last 3
s.tail(3)

g    7
h    8
i    9
dtype: int32

In [119]:
# only take specific items by position
s.take([1, 5, 8])

b    2
f    6
i    9
dtype: int32

# 레이블과 포지션으로 값 가져오기
## []연산자와 .ix[]속성을 사용하는 레이블 검색

In [120]:
# we will use this series to examine lookups
s1 = pd.Series(np.arange(10, 15), index = list('abcde'))
s1

a    10
b    11
c    12
d    13
e    14
dtype: int32

In [121]:
# get the value with label 'a
s1['a']

10

In [122]:
# get multiple items 인덱스로 복수 검색
s1[['d', 'b']]

d    13
b    11
dtype: int32

In [123]:
# gets values based upon position 숫자를 사용해 검색
s1[[3, 1]]

d    13
b    11
dtype: int32

In [124]:
# to demo lookup by matching labels as integer values
s2 = pd.Series([1, 2, 3, 4], index = [10, 11, 12, 13])
s2


10    1
11    2
12    3
13    4
dtype: int64

In [125]:
# this is by label not position
s2[[13, 10]]

13    4
10    1
dtype: int64

## iloc[]을 사용하는 명시적 포지션 검색

In [126]:
# explicitly by position
s1.iloc[[0, 2]]

a    10
c    12
dtype: int32

In [127]:
# explicitly by position
s2.iloc[[3, 2]]

13    4
12    3
dtype: int64

## .loc[]을 사용하는 명시적 레이블 검색

In [128]:
# explicit via labels
s1.loc[['a', 'd']]

a    10
d    13
dtype: int32

In [129]:
# get items at position 11 and 12
s2.loc[[11, 12]]

11    2
12    3
dtype: int64

In [130]:
s1

a    10
b    11
c    12
d    13
e    14
dtype: int32

## 서브셋으로 Series 슬라이싱

In [131]:
s = pd.Series(np.arange(100, 110), index = np.arange(10, 20))
s

10    100
11    101
12    102
13    103
14    104
15    105
16    106
17    107
18    108
19    109
dtype: int32

In [132]:
# slice showing items at position 1 through 5
s[1:6]

11    101
12    102
13    103
14    104
15    105
dtype: int32

In [133]:
# lookup via list of positions
s.iloc[[1, 2, 3, 4, 5]]

11    101
12    102
13    103
14    104
15    105
dtype: int32

In [134]:
# items at position 1, 3, 5
s[1: 6: 2]

11    101
13    103
15    105
dtype: int32

In [135]:
# first five by slicing, same as .head(5)
s[:5]

10    100
11    101
12    102
13    103
14    104
dtype: int32

In [136]:
# fourth position to the end
s[4:]

14    104
15    105
16    106
17    107
18    108
19    109
dtype: int32

In [137]:
# every other item in the first five positions
s[ : 5 : 2]

10    100
12    102
14    104
dtype: int32

In [138]:
# every other item starting at the fourth position
s[4::2]

14    104
16    106
18    108
dtype: int32

In [139]:
# reverse the Series
s[::-1]

19    109
18    108
17    107
16    106
15    105
14    104
13    103
12    102
11    101
10    100
dtype: int32

In [140]:
# every other starting at position 4, in reverse
s[4::-2]

14    104
12    102
10    100
dtype: int32

In [141]:
# -4:, which means the last 4 rows
s[-4:]

16    106
17    107
18    108
19    109
dtype: int32

In [142]:
# :-4, all but the last 4
s[:-4]

10    100
11    101
12    102
13    103
14    104
15    105
dtype: int32

In [143]:
# equivalent to s.tail(4).head(3)
s[-4:-1]

16    106
17    107
18    108
dtype: int32

In [144]:
# used to demonstrate the next two slices
s = pd.Series(np.arange(0, 5),
index = ['a', 'b', 'c', 'd', 'e'])

In [145]:
# slices by position as the index is characters
s[1 : 3]

b    1
c    2
dtype: int32

In [146]:
# this slices by the string in the index
s['b':'d']

b    1
c    2
d    3
dtype: int32

## 인덱스 레이블을 통한 정렬 ★★★

In [147]:
# First series for alignment
s1 = pd.Series([1, 2], index = ['a', 'b'])
s1

a    1
b    2
dtype: int64

In [148]:
# Second series for alignment
s2 = pd.Series([4, 3], index = ['b', 'a'])
s2

b    4
a    3
dtype: int64

In [149]:
# add them
s1 + s2

a    4
b    6
dtype: int64

In [150]:
# multiply all values in s3 by 2
s1 * 2

a    2
b    4
dtype: int64

In [151]:
# scalar series using s3's index
t = pd.Series(2, s1.index)
t

a    2
b    2
dtype: int64

In [152]:
# multiply s1 by t
s1 * t

a    2
b    4
dtype: int64

In [153]:
# We will add this to s1
s3 = pd.Series([5, 6], index = ['b', 'c'])
s3

b    5
c    6
dtype: int64

In [154]:
# s1 and s3 have different sets of index labels
# NaN will result for a and c
s1 + s3

a    NaN
b    7.0
c    NaN
dtype: float64

In [155]:
# 2 'a'labels
s1 = pd.Series([1.0, 2.0, 3.0], index = ['a', 'a', 'b'])
s1

a    1.0
a    2.0
b    3.0
dtype: float64

In [156]:
# 3 a labels
s2 = pd.Series([4.0, 5.0, 6.0, 7.0], index = ['a', 'a', 'c', 'a'])
s2

a    4.0
a    5.0
c    6.0
a    7.0
dtype: float64

In [157]:
# will result in 6 'a' index labels, and NaN for b and c
# 같은 인덱스일 경우 2a * 3a = 6a 
s1 + s2

a    5.0
a    6.0
a    8.0
a    6.0
a    7.0
a    9.0
b    NaN
c    NaN
dtype: float64

## 불리언 선택

In [158]:
# which rows have values that are > 5?
s = pd.Series(np.arange(0, 5), index = list('abcde'))
logical_results = s >= 3
logical_results

a    False
b    False
c    False
d     True
e     True
dtype: bool

In [159]:
# select where True
s[logical_results]

d    3
e    4
dtype: int32

In [160]:
# a little shorter version
s[s > 5]

Series([], dtype: int32)

In [161]:
# commented as it throw an exception
# s[s >= 2 and s < 5] 다중 논리식 불가능

In [162]:
# correct syntax
s[(s >= 2) & (s < 5)]

c    2
d    3
e    4
dtype: int32

In [163]:
# are all items >= 0?
(s >= 0 ).all()

True

In [164]:
# any items < 2?
s[s < 2].any()

True

In [165]:
# how many values < 2?
(s < 2).sum()

2