# Pandas Data Frame

In [8]:
import pandas as pd

groups = ["非人", "一般人", "雲玩家", "作弊狗", "挑戰組"] #群組欄位
player = [9, 69, 29, 164, 87] #人數欄位

ironmen_dict = {
                "組別": groups,
                "人員": player
} #分類欄名
ironmen_df = pd.DataFrame(ironmen_dict)
ironmen_df

Unnamed: 0,組別,人員
0,非人,9
1,一般人,69
2,雲玩家,29
3,作弊狗,164
4,挑戰組,87


In [9]:
print(ironmen_df.ndim)
print("---") # 分隔線
print(ironmen_df.shape)
print("---") # 分隔線
print(ironmen_df.dtypes)

2
---
(5, 2)
---
組別    object
人員     int64
dtype: object


In [10]:
print(ironmen_df.sum()) # 計算總鐵人數
print(ironmen_df.mean()) # 計算平均鐵人數
print(ironmen_df.median()) # 計算中位數
print(ironmen_df.describe()) # 描述統計

組別    非人一般人雲玩家作弊狗挑戰組
人員               358
dtype: object
人員    71.6
dtype: float64
人員    69.0
dtype: float64
               人員
count    5.000000
mean    71.600000
std     60.239522
min      9.000000
25%     29.000000
50%     69.000000
75%     87.000000
max    164.000000


In [11]:
import numpy as np
import pandas as pd
groups = ["Modern Web", "DevOps", np.nan, "Big Data", "Security", "自我挑戰組"]
ironmen = [59, 9, 19, 14, 6, np.nan]
ironmen_dict = {
                "groups": groups,
                "ironmen": ironmen
}
# 建立 data frame
ironmen_df = pd.DataFrame(ironmen_dict)
print(ironmen_df.loc[:, "groups"].isnull()) # 判斷哪些組的組名是遺失值
print(ironmen_df.loc[:, "ironmen"].notnull()) # 判斷哪些組的鐵人數不是遺失值

0    False
1    False
2     True
3    False
4    False
5    False
Name: groups, dtype: bool
0     True
1     True
2     True
3     True
4     True
5    False
Name: ironmen, dtype: bool


In [12]:
ironmen_df_na_dropped = ironmen_df.dropna() # 有遺失值的觀測值都刪除
print(ironmen_df_na_dropped)
print("---") # 分隔線
ironmen_df_na_filled = ironmen_df.fillna(0) # 有遺失值的觀測值填補 0
print(ironmen_df_na_filled)
print("---") # 分隔線
ironmen_df_na_filled = ironmen_df.fillna({"groups": "Cloud", "ironmen": 71}) # 依欄位填補遺失值
print(ironmen_df_na_filled)

       groups  ironmen
0  Modern Web     59.0
1      DevOps      9.0
3    Big Data     14.0
4    Security      6.0
---
       groups  ironmen
0  Modern Web     59.0
1      DevOps      9.0
2           0     19.0
3    Big Data     14.0
4    Security      6.0
5       自我挑戰組      0.0
---
       groups  ironmen
0  Modern Web     59.0
1      DevOps      9.0
2       Cloud     19.0
3    Big Data     14.0
4    Security      6.0
5       自我挑戰組     71.0


In [25]:
import pandas as pd

groups = ["Modern Web", "DevOps", "Cloud", "Big Data", "Security", "自我挑戰組"]
ironmen = [59, 9, 19, 14, 6, 77]

# 建立 data frame
ironmen_df = pd.DataFrame(ironmen, columns = ["ironmen"], index = groups)

# 用索引值排序
ironmen_df.sort_index()

Unnamed: 0,ironmen
Big Data,14
Cloud,19
DevOps,9
Modern Web,59
Security,6
自我挑戰組,77


In [26]:
import pandas as pd

groups = ["Modern Web", "DevOps", "Cloud", "Big Data", "Security", "自我挑戰組"]
ironmen = [59, 9, 19, 14, 6, 77]

# 建立 data frame
ironmen_df = pd.DataFrame(ironmen, columns = ["ironmen"], index = groups)

# 用數值排序
ironmen_df.sort_values(by = "ironmen")

Unnamed: 0,ironmen
Security,6
DevOps,9
Big Data,14
Cloud,19
Modern Web,59
自我挑戰組,77


In [15]:
#  隨機生DataFrame 型別資料
import pandas as pd
import numpy as np
frame = pd.DataFrame(np.random.rand(4,4),index=list('abcd'),columns=list('ABCD'))
frame

Unnamed: 0,A,B,C,D
a,0.829411,0.67644,0.277016,0.119568
b,0.302191,0.188958,0.802161,0.06583
c,0.915393,0.17033,0.376222,0.322793
d,0.045251,0.452647,0.293361,0.351375


In [16]:
# 橫的叫列，直的叫欄
#.loc先列後欄，中間用逗號（,）分割，例如取 a 和 A 對應的資料
frame.loc['a','A']

0.8294114942321494

In [17]:
# 取前兩欄對應資料
frame.loc[:,'A':'B']

Unnamed: 0,A,B
a,0.829411,0.67644
b,0.302191,0.188958
c,0.915393,0.17033
d,0.045251,0.452647


In [18]:
# 上面的例子取的都是連續的列和欄，取第一列和第四列,與第一欄和第四欄對應的資料
frame.loc[['a','d'],['A','D']]

Unnamed: 0,A,D
a,0.829411,0.119568
d,0.045251,0.351375


In [19]:
frame.iloc[0,0]

0.8294114942321494

In [20]:
# 取前兩列對應資料
frame.iloc[0:2,:]

Unnamed: 0,A,B,C,D
a,0.829411,0.67644,0.277016,0.119568
b,0.302191,0.188958,0.802161,0.06583


In [21]:
# 取前兩列和前兩欄對應資料
frame.iloc[0:2,0:2]

Unnamed: 0,A,B
a,0.829411,0.67644
b,0.302191,0.188958


In [22]:
#上面的例子取的都是連續的行和列，若取第一行和第四行、第一列和第四列對應的資料，則
frame.iloc[[0,3],[0,3]]

Unnamed: 0,A,D
a,0.829411,0.119568
d,0.045251,0.351375
