In [2]:
# 建立 Series
'''
用 Series() 建立 Series
其中 data 可以是：
一個 ndarray
一個 dict
單一資料
'''

import numpy as np
import pandas as pd

arr = np.array(("Monkey D. Luffy", "Roronoa Zoro", "Nami",
                "Usopp", "Vinsmoke Sanji", "Tony Tony Chopper",
                "Nico Robin", "Franky", "Brook"))

print('轉換前: ', type(arr))
ser = pd.Series(arr, index = np.arange(9)+10)   # 預設的索引, 將 arange加 9個索引值
print('轉換後: ', type(ser))
print(ser)

轉換前:  <class 'numpy.ndarray'>
轉換後:  <class 'pandas.core.series.Series'>
10      Monkey D. Luffy
11         Roronoa Zoro
12                 Nami
13                Usopp
14       Vinsmoke Sanji
15    Tony Tony Chopper
16           Nico Robin
17               Franky
18                Brook
dtype: object


In [11]:
import pandas as pd

crew_dict = {
    "captain": "Monkey D. Luffy",
    "swordsman": "Roronoa Zoro",
    "navigator": "Nami",
    "sniper": "Usopp",
    "chef": "Vinsmoke Sanji",
    "doctor": "Tony Tony Chopper",
    "archaeologist": "Nico Robin",
    "shipwright": "Franky",
    "musician": "Brook"
}

# ser = pd.Series(crew_dict)                            # 會依照 dictionary的 key 排序
ser = pd.Series(crew_dict, index = crew_dict.keys())    # 會依照 original的 dic 排序
print(ser)
# print(crew_dict.keys())                               # 查看 keys
print("\n")
print(ser[[0, 4 , 5]])
print(ser[["captian", "sniper", "chef"]])               # 可用索引呼叫出來

captain            Monkey D. Luffy
swordsman             Roronoa Zoro
navigator                     Nami
sniper                       Usopp
chef                Vinsmoke Sanji
doctor           Tony Tony Chopper
archaeologist           Nico Robin
shipwright                  Franky
musician                     Brook
dtype: object


captain      Monkey D. Luffy
chef          Vinsmoke Sanji
doctor     Tony Tony Chopper
dtype: object
captian               NaN
sniper              Usopp
chef       Vinsmoke Sanji
dtype: object


In [17]:
import pandas as pd

crew_dict = {
    "captain": "Monkey D. Luffy",
    "swordsman": "Roronoa Zoro",
    "navigator": "Nami",
    "sniper": "Usopp",
    "chef": "Vinsmoke Sanji",
    "doctor": "Tony Tony Chopper",
    "archaeologist": "Nico Robin",
    "shipwright": "Franky",
    "musician": "Brook"
}

ser = pd.Series(crew_dict, index = crew_dict.keys()) 
print(type(ser))
print("\n")
print(type(ser.values))          # panda裡面的 series, 還是屬於 numpy
print("\n")
print(ser.index)                 # series 有 index 屬性
print("\n")
print(ser[:4])                   # series 可做 slicing
print("\n")

female_filter = ser.isin(["Nami", "Nico Robin"])         # 透過判斷條件進行布林篩選
print(ser[female_filter])

<class 'pandas.core.series.Series'>


<class 'numpy.ndarray'>


Index(['captain', 'swordsman', 'navigator', 'sniper', 'chef', 'doctor',
       'archaeologist', 'shipwright', 'musician'],
      dtype='object')


captain      Monkey D. Luffy
swordsman       Roronoa Zoro
navigator               Nami
sniper                 Usopp
dtype: object


navigator              Nami
archaeologist    Nico Robin
dtype: object


In [19]:
# NumPy 的函數也都適用
import pandas as pd

crew_age = {
    "Monkey D. Luffy": 19,
    "Roronoa Zoro": 21,
    "Nami": 20,
    "Usopp": 19,
    "Vinsmoke Sanji": 21,
    "Tony Tony Chopper": 17,
    "Nico Robin": 30,
    "Franky": 36,
    "Brook": 90
}

ser = pd.Series(crew_age)
print(ser - 2)               # 同樣適用 element-wise 運算
print("草帽海賊團的平均年齡：%.2f" % np.mean(ser))
print("草帽海賊團的年齡標準差：%.2f" % np.std(ser))

Brook                88
Franky               34
Monkey D. Luffy      17
Nami                 18
Nico Robin           28
Roronoa Zoro         19
Tony Tony Chopper    15
Usopp                17
Vinsmoke Sanji       19
dtype: int64
草帽海賊團的平均年齡：30.33
草帽海賊團的年齡標準差：21.88


In [27]:
# 建立 DataFrame
# data 是一個 dict
# 不要從 np.array轉換過來, 要注意布林形態
'''
用 DataFrame() 建立 DataFrame
其中 data 是：
一個 dict
一個 ndarray
'''

import pandas as pd

straw_hat_dict = {"name": ["Monkey D. Luffy", "Roronoa Zoro", "Nami", "Usopp", "Vinsmoke Sanji", "Tony Tony Chopper", "Nico Robin", "Franky", "Brook"],
                  "age": [19, 21, 20, 19, 21, 17, 30, 36, 90],
                  "is_male": [True, True, False, True, True, True, False, True, True]
}

# df = pd.DataFrame(straw_hat_dict)    # 欄標籤預設排序
df = pd.DataFrame(straw_hat_dict, columns = ["name", "age", "is_male"]) # 指定欄標籤排序
print(type(df))                      # 是一個 data.frame的 format
print("\n")
print(df.dtypes)                     # 可容許多個 data的 type, 所以是複數 types

df

<class 'pandas.core.frame.DataFrame'>


name       object
age         int64
is_male      bool
dtype: object


Unnamed: 0,name,age,is_male
0,Monkey D. Luffy,19,True
1,Roronoa Zoro,21,True
2,Nami,20,False
3,Usopp,19,True
4,Vinsmoke Sanji,21,True
5,Tony Tony Chopper,17,True
6,Nico Robin,30,False
7,Franky,36,True
8,Brook,90,True


In [31]:
# 匯入時使用 dictionary, 不要使用 np.array
# 避免造成布林形態錯誤

import numpy as np
import pandas as pd

arr = np.array([
    ["Monkey D. Luffy", 19, True],
    ["Roronoa Zoro", 21, True],
    ["Nami", 20, False],
    ["Usopp", 19, True],
    ["Vinsmoke Sanji", 21, True],
    ["Tony Tony Chopper", 17, True],
    ["Nico Robin", 30, False],
    ["Franky", 36, True],
    ["Brook", 90, True]
])

df = pd.DataFrame(arr, columns = ["name", "age", "is_male"])       # 所有字元都是以字串為主
print(df.dtypes)
df['age'] = df['age'].astype(int)                                  # 轉換成整數型態再輸出

df['is_male'] = df['is_male'].astype(bool)                         # 轉換成布林形態再輸出
print("\n")
print(df.dtypes)
df

name       object
age        object
is_male    object
dtype: object


name       object
age         int32
is_male      bool
dtype: object


Unnamed: 0,name,age,is_male
0,Monkey D. Luffy,19,True
1,Roronoa Zoro,21,True
2,Nami,20,True
3,Usopp,19,True
4,Vinsmoke Sanji,21,True
5,Tony Tony Chopper,17,True
6,Nico Robin,30,True
7,Franky,36,True
8,Brook,90,True


In [42]:
import pandas as pd

straw_hat_dict = {"name": ["Monkey D. Luffy", "Roronoa Zoro", "Nami", "Usopp", "Vinsmoke Sanji", "Tony Tony Chopper", "Nico Robin", "Franky", "Brook"],
                  "age": [19, 21, 20, 19, 21, 17, 30, 36, 90],
                  "is_male": [True, True, False, True, True, True, False, True, True]
}

df = pd.DataFrame(straw_hat_dict, columns = ["name", "age", "is_male"])      # 指定欄標籤排序
df['age_2_yr_ago'] = df['age'] - 2
df['fav_food'] = ["Meat", "Food matches wine", "Orange", "Fish", "Food matches black tea",
                  "Sweets", "Food matches coffee", "Food matches coke", "Milk"]     # 直接指派新增一個變數
# del df['is_male']          # 利用 del 刪除變數
ser = df.pop('is_male')      # 利用 .pop() 將變數刪除後指派給一個 Series
df.insert(0, 'favorite_food', 'fav_food')     # 利用 .insert() 指定變數新增的位置

print(df.name)
df

0      Monkey D. Luffy
1         Roronoa Zoro
2                 Nami
3                Usopp
4       Vinsmoke Sanji
5    Tony Tony Chopper
6           Nico Robin
7               Franky
8                Brook
Name: name, dtype: object


Unnamed: 0,favorite_food,name,age,age_2_yr_ago,fav_food
0,fav_food,Monkey D. Luffy,19,17,Meat
1,fav_food,Roronoa Zoro,21,19,Food matches wine
2,fav_food,Nami,20,18,Orange
3,fav_food,Usopp,19,17,Fish
4,fav_food,Vinsmoke Sanji,21,19,Food matches black tea
5,fav_food,Tony Tony Chopper,17,15,Sweets
6,fav_food,Nico Robin,30,28,Food matches coffee
7,fav_food,Franky,36,34,Food matches coke
8,fav_food,Brook,90,88,Milk
