# Pandas（建於numpy之上）

In [1]:
import pandas as pd

## Build Dataframe

In [2]:
# by column

df = pd.DataFrame({'Name':['Amy', 'Ben', 'Albert'], 'Score':[90, 50, 100]})
df

Unnamed: 0,Name,Score
0,Amy,90
1,Ben,50
2,Albert,100


## Build Dataframe

In [3]:
# by row

df1 = pd.DataFrame([{'Name':'Mark', 'Score':88}, {'Name':'Jason', 'Score':34}, {'Name':'Wix', 'Score':99}])
df1

Unnamed: 0,Name,Score
0,Mark,88
1,Jason,34
2,Wix,99


## Get column

In [4]:
# 取 Series

df['Name']   # 或 df.Name

0       Amy
1       Ben
2    Albert
Name: Name, dtype: object

In [5]:
# 取 Dataframe

df[['Name', 'Score']]

Unnamed: 0,Name,Score
0,Amy,90
1,Ben,50
2,Albert,100


## Get row

In [6]:
df[:-1]

Unnamed: 0,Name,Score
0,Amy,90
1,Ben,50


## Get particular value
- loc [ index , column name ]
- iloc [ row , column ]

In [7]:
df.loc[:,'Score']

0     90
1     50
2    100
Name: Score, dtype: int64

## 條件篩選
- |（or）
- &（and）

In [8]:
df[(df['Score']<60) | (df['Score']>95)]

Unnamed: 0,Name,Score
1,Ben,50
2,Albert,100


## Add new value

In [9]:
# by column

df['Time']=[40, 85, 30]
df

Unnamed: 0,Name,Score,Time
0,Amy,90,40
1,Ben,50,85
2,Albert,100,30


In [10]:
# by row

df = df.append(pd.DataFrame([{'Name':'Rober', 'Score':87, 'Time':45}])).reset_index(drop=True)
df

Unnamed: 0,Name,Score,Time
0,Amy,90,40
1,Ben,50,85
2,Albert,100,30
3,Rober,87,45


## 合併 two dataframe

In [11]:
# columns名稱相同

df2 = pd.concat([df, df1]).reset_index(drop=True)
df2

Unnamed: 0,Name,Score,Time
0,Amy,90,40.0
1,Ben,50,85.0
2,Albert,100,30.0
3,Rober,87,45.0
4,Mark,88,
5,Jason,34,
6,Wix,99,


In [12]:
# columns名稱不同（須先使columns名稱相同）

df3 = pd.DataFrame([{'age':23, 'color':'white'}, {'age':33, 'color':'yellow'}])
df4 = pd.DataFrame([{38:45, 'red':'black'}, {38:67, 'red':'green'}])

tmp = pd.DataFrame(df4.columns).transpose()    
df4.columns = df3.columns
tmp.columns = df3.columns
df5 = pd.concat([df3, tmp, df4]).reset_index(drop=True)
df5

Unnamed: 0,age,color
0,23,white
1,33,yellow
2,38,red
3,45,black
4,67,green


## 更改 dataframe 值
- column name
- row name
- column value
- row value
- 特定row, column值

In [13]:
# column name 方法一

df.columns = ['Name', 'Score', 'Confidence']
df

Unnamed: 0,Name,Score,Confidence
0,Amy,90,40
1,Ben,50,85
2,Albert,100,30
3,Rober,87,45


In [14]:
# column name 方法二

df = df.rename(columns={'Confidence':'Time'}).reset_index(drop=True)
df

Unnamed: 0,Name,Score,Time
0,Amy,90,40
1,Ben,50,85
2,Albert,100,30
3,Rober,87,45


In [15]:
# row name

df.index = ['Amy', 'Ben', 'Albert', 'Rober']
df

Unnamed: 0,Name,Score,Time
Amy,Amy,90,40
Ben,Ben,50,85
Albert,Albert,100,30
Rober,Rober,87,45


In [16]:
# column value
df2.loc[:,'Time'] = [20, 85, 60, 45, 30, 35, 21]
df2

Unnamed: 0,Name,Score,Time
0,Amy,90,20
1,Ben,50,85
2,Albert,100,60
3,Rober,87,45
4,Mark,88,30
5,Jason,34,35
6,Wix,99,21


In [17]:
# row value

df2.loc[6,:] = ['Wax', 98, 23]
df2

Unnamed: 0,Name,Score,Time
0,Amy,90,20
1,Ben,50,85
2,Albert,100,60
3,Rober,87,45
4,Mark,88,30
5,Jason,34,35
6,Wax,98,23


## 刪除 row, column
- axis：0（row）, 1（column）

In [18]:
# 刪除 column

df2 = df2.drop(['Time'], axis=1)
df2

Unnamed: 0,Name,Score
0,Amy,90
1,Ben,50
2,Albert,100
3,Rober,87
4,Mark,88
5,Jason,34
6,Wax,98


In [19]:
# 刪除 row

df2 = df2.drop(3, axis=0).reset_index(drop=True)
df2

Unnamed: 0,Name,Score
0,Amy,90
1,Ben,50
2,Albert,100
3,Mark,88
4,Jason,34
5,Wax,98


## 設定

In [20]:
# 顯示欄位最多到50（不會有 ... ）

pd.set_option('display.max_columns', 50)