#  <font color=red> Module_00_建立序列與序列的基本操作</font>

## 建立序列(Series)

In [None]:
import pandas as pd
import numpy as np

# data 代入列表
# int64 表示元素的資料型別
# 預設索引標籤
a = pd.Series([10, 11, 12, 13, 14])
a 

In [None]:
a[3] # 此查詢是根據索引標籤

In [None]:
b = pd.Series(['Mike', 'Marcia', 'Mikael', 'Bleu'])
b

In [None]:
c = pd.Series([2]*5)
c

In [None]:
d = pd.Series(list('abcde'))
d

---

In [None]:
# data 代入字典
# 字典的 key 會被當成索引標籤
e = pd.Series({'Mike': 'Dad',
               'Marcia': 'Mom',
               'Mikael': 'Son',
               'Bleu': 'Best doggies ever'})
e

---

In [None]:
# data 代入 ndarray
f = pd.Series(np.arange(4, 9))
f

In [None]:
g = pd.Series(np.linspace(0, 9, 5))
g

In [None]:
# np.random.normal() 是從平均值是 0 標準差是 1 的常態分佈取值(鐘形曲線)
# 一個標準差內的機率大約: 68%
# 兩個標準差內的機率大約: 95%
# 三個標準差內的機率大約: 99.7%
np.random.seed(12345)
h = pd.Series(np.random.normal(size = 5))
h

---

In [None]:
# data 代入數值
i = pd.Series(2)
i

## 序列的屬性

In [None]:
a = pd.Series([1, 2, 3])
a

In [None]:
a.values

In [None]:
type(a.values)

In [None]:
a.index

In [None]:
# 也可用 len() 函式得知
a.size

In [None]:
len(a)

In [None]:
a.shape

---

In [None]:
b = pd.Series({'Mike': 'Dad',
               'Marcia': 'Mom',
               'Mikael': 'Son',
               'Bleu': 'Best doggies ever'}, name = 'family')
b

In [None]:
b.values

In [None]:
b.index

In [None]:
b.size

In [None]:
b.shape

In [None]:
b.name

In [None]:
b.index.name = 'people' # 索引標籤也可以設定 name

In [None]:
b

## 在序列建立時指定索引

In [None]:
people = ['Mike', 'Marcia', 'Mikael', 'Bleu']
role = ['Dad', 'Mom', 'Son', 'Dog']
a = pd.Series(people, index = role)
a

In [None]:
a.index

In [None]:
a['Dad']

## 使用 pd.Series物件.head() 與 pd.Series物件.tail() 與 pd.Series物件.take() 來查詢

In [None]:
a = pd.Series(np.arange(1, 10), index = list('abcdefghi'))
a

In [None]:
a.head()

In [None]:
a.head(3) # 也可以寫 a.head(n = 3)

In [None]:
a.tail()

In [None]:
a.tail(3)

In [None]:
a.take([1, 5, 8]) # 注意這是利用位置 (position) 來查詢

## 利用 [ ] 運算子來查詢

In [None]:
a = pd.Series(np.arange(10, 15), index = list('abcde'))
a

In [None]:
# 利用索引標籤來查詢
# 查詢一個返回值
a['b']

In [None]:
# 查詢多個要加 list
# 查詢兩個以上返回序列
a[['d', 'e']] 

In [None]:
a[0] # 利用位置來查詢 

In [None]:
a[[1, 3]] # 查詢多個要加 list

---

In [None]:
b = pd.Series([10 ,20, 30, 40], index = [2, 3, 4, 5])
b

In [None]:
b[2] # 注意如果索引標籤是整數，會是以索引標籤查詢為主

In [None]:
b[[3, 2]]

## 以 .iloc[ ] 指明位置查詢

In [None]:
a = pd.Series(np.arange(10, 15), index = list('abcde'))
a

In [None]:
a.iloc[1]

In [None]:
a.iloc[[2, 3]]

In [None]:
a.iloc[[2, 8]] # 會產生例外

In [None]:
a.iloc['a']

## 以 .loc[ ] 指明索引標籤查詢

In [None]:
s = pd.Series(np.arange(10, 15), index = list('abcde'))
s

In [None]:
t = pd.Series([10 ,20, 30, 40], index = [2, 3, 4, 5])
t

In [None]:
s.loc['b']

In [None]:
s.loc[['c', 'd']]

In [None]:
t.loc[[5, 2]]

In [None]:
s.loc[['a', 'f']]  # 會產生例外

In [None]:
s.loc[1]

## 把序列切割(slicing)成子集合

In [None]:
a = pd.Series(np.arange(100, 115), index = np.arange(10, 25))
a

In [None]:
# 在切割時是使用位置
# print(a[10:12])
# print(a[1:6]) 
# print(a[1:6:2])
# print(a[:5])
# print(a[4:])
# print(a[:5:2])
# print(a[4::2])
# print(a[::-1])
# print(a[4::-2])
# print(a[-4:])
# print(a[:-4])
# print(a[-4:-1])

In [None]:
# print(a.iloc[1:6])
# print(a.iloc[1:6:2])
# print(a.iloc[:5])
# print(a.iloc[4:])
# print(a.iloc[:5:2])
# print(a.iloc[4::2])
# print(a.iloc[::-1])
# print(a.iloc[4::-2])
# print(a.iloc[-4:])
# print(a.iloc[:-4])
# print(a.iloc[-4:-1])

In [None]:
# 索引標籤的切片會包含最後一項
# print(a.loc[10:12]) 
# print(a.loc[10:18:2])
print(a.loc[:19])

---

In [None]:
a = pd.Series(np.arange(100, 110), index = np.arange(10, 20))
a

In [None]:
# 切片出來的是 view，修改會影響原始資料
b = a[1:4] 
b

In [None]:
b[11] = 0
b

In [None]:
a 

---

In [None]:
a = pd.Series(np.arange(100, 110), index = np.arange(10, 20))
a

In [None]:
b = a.iloc[1:4] # 這是切片所以也是 view
b

In [None]:
b[11] = 0
b

In [None]:
a

---

In [None]:
a = pd.Series(np.arange(100, 110), index = np.arange(10, 20))
a

In [None]:
b = a.iloc[[1, 2, 3]] # 是 copy
b

In [None]:
b[11] = 0
b

In [None]:
a

---

In [None]:
c = pd.Series(np.arange(0,5), index = ['a', 'b', 'c', 'd', 'e'])
c

In [None]:
c[1:3]

In [None]:
c['a':'c'] # 索引標籤的切割，有包含最後的值

In [None]:
c.loc['a':'c']

## 利用索引標籤實現對齊

In [None]:
a = pd.Series([1, 2], index = ['a', 'b'] )
a

In [None]:
b = pd.Series([4, 3], index = ['b', 'a'] )
b

In [None]:
c = a + b
c

In [None]:
# 類似 broadcasting 
# 先建立 pd.Series(2, a.index)，在乘 a
d = a*2 
d

---

In [None]:
e = pd.Series([5, 6], index = ['b', 'c'])
e

In [None]:
a + e # 索引標籤無法對齊的會傳回 NaN

---

In [None]:
# 索引標籤可以不唯一
f = pd.Series([1.0, 2.0, 3.0], index = ['a', 'a', 'c'])
f

In [None]:
g = pd.Series([4.0, 5.0, 6.0, 7.0], index = ['a', 'a', 'c', 'a'])
g

In [None]:
# 使用笛卡兒積 # 假設 f 有 n 個 'a' 標籤， g 有 m 個 'a' 標籤，出來會有 n*m 個 'a'
f + g 

## 布林選擇

In [None]:
a = pd.Series(np.arange(0, 5), index = list('abcde'))
a

In [None]:
logical_results = a >= 3
logical_results

In [None]:
a[logical_results]

In [None]:
a[a > 1]

In [None]:
a[(a >= 2) and (a < 5)] # 使用 and、or 會產生例外

In [None]:
a[(a >= 2) & (a < 5)] # 改用 &、| 

In [None]:
(a >= 0).all()

In [None]:
(a < 2).any()

In [None]:
(a < 2).sum()

## 將序列重新索引

In [None]:
np.random.seed(123456)
a = pd.Series(np.random.randn(5))
a

In [None]:
# in-place
# 是直接改索引標籤，值不會跟著變動
a.index = [4, 3, 2, 1, 0]
a

In [None]:
a.index = ['a', 'b', 'c', 'd', 'e'] 
a

---

In [None]:
# 應用
d = pd.Series([0, 1, 2], index = [0, 1, 2])
e = pd.Series([3, 4, 5], index = ['0', '1', '2'])
d

In [None]:
e

In [None]:
d + e

In [None]:
# 也可以用 e.index = e.index.astype('int')
e.index = e.index.values.astype('int')
d + e

---

In [None]:
b = pd.Series(np.random.randn(4), index = ['a', 'b', 'c', 'd'])
b

In [None]:
c = b.reindex(['a', 'c', 'g']) # reindex 方法不是 in-place
c

In [None]:
d = pd.Series(np.random.randn(5), index = list('abced'))
d

In [None]:
e = d.reindex(['a', 'f'], fill_value = 0)
e

In [None]:
f = pd.Series(['red', 'green', 'blue'], index = [0, 3, 5])
f

In [None]:
g = f.reindex(np.arange(0, 7), method = 'ffill')
g

In [None]:
h = f.reindex(np.arange(0, 7), method = 'bfill')
h

## 原地修改序列

In [None]:
np.random.seed(123456)
a = pd.Series(np.random.randn(3), index = ['a', 'b', 'c'])
a

In [None]:
a['d'] = 100 # in-place
a

In [None]:
a['d'] = -100 # in-place
a

In [None]:
del a['a'] # in-place
a

---

In [None]:
b = a.copy()
b

In [None]:
c = b[:2] # view
c

In [None]:
c['b'] = 0
c

In [None]:
b 

In [None]:
a

## 綜合應用

In [None]:
dates = pd.date_range('2016-04-01', '2016-04-06')
dates

In [None]:
temps1 = pd.Series([80, 82, 85, 90, 83, 87], index = dates)
temps1

In [None]:
temps1['2016-04-04']

---

In [None]:
temps2 = pd.Series([70, 75, 69, 83, 79, 77], index = dates)
temps2

In [None]:
# 會進行對齊
temp_diffs = temps1 - temps2
temp_diffs

In [None]:
temp_diffs[2]

In [None]:
temp_diffs.mean()

---

In [None]:
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
a = pd.Series(sdata)
a

In [None]:
'Ohio' in a 

In [None]:
'california' in a

In [None]:
# 會執行對齊
states = ['california', 'Ohio', 'Oregon', 'Texas']
b = pd.Series(a, index = states)
b

In [None]:
pd.isnull(b) # 也可寫成方法 b.isnull()

In [None]:
pd.notnull(b)

In [None]:
a + b 

In [None]:
b.name = 'population'
b

In [None]:
b.index.name = 'state'
b