# Pandas 基本使用

In [14]:
import pandas as pd
import numpy as np

## 1-創建資料表

### 1.1-(np.array)轉成(表格)

In [22]:
arr = np.asarray([[1,2,3,4],
                  [9,8,7,6]])
df = pd.DataFrame(arr)
df

Unnamed: 0,0,1,2,3
0,1,2,3,4
1,9,8,7,6


### 1.2-重新定義(欄位名稱)

In [25]:
arr = np.asarray([[1,2,3,4],
                  [9,8,7,6]])
df = pd.DataFrame(arr,columns=["a","b","c","d"])
df

Unnamed: 0,a,b,c,d
0,1,2,3,4
1,9,8,7,6


### 1.3-載入csv檔

In [26]:
def create_table(file_path):
    return pd.read_csv(file_path, encoding = "utf-8")
df = create_table("./data/dataset_Pandas/ted_main.csv")
df

Unnamed: 0,comments,description,...,url,views
0,4553,Sir Ken Robinson makes an entertaining and pro...,...,https://www.ted.com/talks/ken_robinson_says_sc...,47227110
1,265,With the same humor and humanity he exuded in ...,...,https://www.ted.com/talks/al_gore_on_averting_...,3200520
2,124,New York Times columnist David Pogue takes aim...,...,https://www.ted.com/talks/david_pogue_says_sim...,1636292
3,200,"In an emotionally charged talk, MacArthur-winn...",...,https://www.ted.com/talks/majora_carter_s_tale...,1697550
4,593,You've never seen data presented like this. Wi...,...,https://www.ted.com/talks/hans_rosling_shows_t...,12005869
...,...,...,...,...,...
2545,17,"Between 2008 and 2016, the United States depor...",...,https://www.ted.com/talks/duarte_geraldino_wha...,450430
2546,6,How can you study Mars without a spaceship? He...,...,https://www.ted.com/talks/armando_azua_bustos_...,417470
2547,10,Science fiction visions of the future show us ...,...,https://www.ted.com/talks/radhika_nagpal_what_...,375647
2548,32,In an unmissable talk about race and politics ...,...,https://www.ted.com/talks/theo_e_j_wilson_a_bl...,419309


### 1.3-限制表格顯示的(欄位數)與(資料筆數)

In [27]:
def display_table(row,column):
    pd.set_option('display.max_rows', row)
    pd.set_option('display.max_columns', column)
    
display_table(10,4)
df

Unnamed: 0,comments,description,...,url,views
0,4553,Sir Ken Robinson makes an entertaining and pro...,...,https://www.ted.com/talks/ken_robinson_says_sc...,47227110
1,265,With the same humor and humanity he exuded in ...,...,https://www.ted.com/talks/al_gore_on_averting_...,3200520
2,124,New York Times columnist David Pogue takes aim...,...,https://www.ted.com/talks/david_pogue_says_sim...,1636292
3,200,"In an emotionally charged talk, MacArthur-winn...",...,https://www.ted.com/talks/majora_carter_s_tale...,1697550
4,593,You've never seen data presented like this. Wi...,...,https://www.ted.com/talks/hans_rosling_shows_t...,12005869
...,...,...,...,...,...
2545,17,"Between 2008 and 2016, the United States depor...",...,https://www.ted.com/talks/duarte_geraldino_wha...,450430
2546,6,How can you study Mars without a spaceship? He...,...,https://www.ted.com/talks/armando_azua_bustos_...,417470
2547,10,Science fiction visions of the future show us ...,...,https://www.ted.com/talks/radhika_nagpal_what_...,375647
2548,32,In an unmissable talk about race and politics ...,...,https://www.ted.com/talks/theo_e_j_wilson_a_bl...,419309


## 2-表格資訊

### 2.1-查看(表格)的大小

In [18]:
df.shape

(2, 4)

### 2.2-查看(表格)的欄位

In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 4 columns):
a    2 non-null int32
b    2 non-null int32
c    2 non-null int32
d    2 non-null int32
dtypes: int32(4)
memory usage: 96.0 bytes


### 2.3-查看(表格)的:
 * count = (該欄位)的資料(筆數)
 * mean  = (該欄位)的資料(平均數)
 * std   = (該欄位)的資料(標準差)
 * min   = (該欄位)的資料(最小值)
 * 25%   = (該欄位)的資料(第一四分位數)
 * 50%   = (該欄位)的資料(第二四分位數)(中位數)
 * 75%   = (該欄位)的資料(第三四分位數)
 * max   = (該欄位)的資料(最大值)

In [21]:
df.describe()

Unnamed: 0,a,b,c,d
count,2.0,2.0,2.0,2.0
mean,5.0,5.0,5.0,5.0
std,5.656854,4.242641,2.828427,1.414214
min,1.0,2.0,3.0,4.0
25%,3.0,3.5,4.0,4.5
50%,5.0,5.0,5.0,5.0
75%,7.0,6.5,6.0,5.5
max,9.0,8.0,7.0,6.0


## 3-表格切割

### 3.1-column切割(單個)

In [34]:
df["comments"]

0       4553
1        265
2        124
3        200
4        593
        ... 
2545      17
2546       6
2547      10
2548      32
2549       8
Name: comments, Length: 2550, dtype: int64

### 3.2-column切割(多個)

In [35]:
df[["comments", "description", "url"]]

Unnamed: 0,comments,description,url
0,4553,Sir Ken Robinson makes an entertaining and pro...,https://www.ted.com/talks/ken_robinson_says_sc...
1,265,With the same humor and humanity he exuded in ...,https://www.ted.com/talks/al_gore_on_averting_...
2,124,New York Times columnist David Pogue takes aim...,https://www.ted.com/talks/david_pogue_says_sim...
3,200,"In an emotionally charged talk, MacArthur-winn...",https://www.ted.com/talks/majora_carter_s_tale...
4,593,You've never seen data presented like this. Wi...,https://www.ted.com/talks/hans_rosling_shows_t...
...,...,...,...
2545,17,"Between 2008 and 2016, the United States depor...",https://www.ted.com/talks/duarte_geraldino_wha...
2546,6,How can you study Mars without a spaceship? He...,https://www.ted.com/talks/armando_azua_bustos_...
2547,10,Science fiction visions of the future show us ...,https://www.ted.com/talks/radhika_nagpal_what_...
2548,32,In an unmissable talk about race and politics ...,https://www.ted.com/talks/theo_e_j_wilson_a_bl...


### 3.3-row切割(單)

In [36]:
df.iloc[5]

comments                                                            672
description           Tony Robbins discusses the "invisible forces" ...
duration                                                           1305
event                                                           TED2006
film_date                                                    1138838400
                                            ...                        
speaker_occupation          Life coach; expert in leadership psychology
tags                  ['business', 'culture', 'entertainment', 'goal...
title                                              Why we do what we do
url                   https://www.ted.com/talks/tony_robbins_asks_wh...
views                                                          20685401
Name: 5, Length: 17, dtype: object

### 3.4-row切割(多)

In [31]:
df.iloc[10:15]

Unnamed: 0,comments,description,...,url,views
10,79,"Accepting his 2006 TED Prize, Cameron Sinclair...",...,https://www.ted.com/talks/cameron_sinclair_on_...,1211416
11,55,Jehane Noujaim unveils her 2006 TED Prize wish...,...,https://www.ted.com/talks/jehane_noujaim_inspi...,387877
12,71,"Accepting the 2006 TED Prize, Dr. Larry Brilli...",...,https://www.ted.com/talks/larry_brilliant_want...,693341
13,242,"Jeff Han shows off a cheap, scalable multi-tou...",...,https://www.ted.com/talks/jeff_han_demos_his_b...,4531020
14,99,"Nicholas Negroponte, founder of the MIT Media ...",...,https://www.ted.com/talks/nicholas_negroponte_...,358304


### 3.5-row切割(前5筆)

In [32]:
df.head(5)

Unnamed: 0,comments,description,...,url,views
0,4553,Sir Ken Robinson makes an entertaining and pro...,...,https://www.ted.com/talks/ken_robinson_says_sc...,47227110
1,265,With the same humor and humanity he exuded in ...,...,https://www.ted.com/talks/al_gore_on_averting_...,3200520
2,124,New York Times columnist David Pogue takes aim...,...,https://www.ted.com/talks/david_pogue_says_sim...,1636292
3,200,"In an emotionally charged talk, MacArthur-winn...",...,https://www.ted.com/talks/majora_carter_s_tale...,1697550
4,593,You've never seen data presented like this. Wi...,...,https://www.ted.com/talks/hans_rosling_shows_t...,12005869


### 3.6-row切割(後5筆)

In [33]:
df.tail(5)

Unnamed: 0,comments,description,...,url,views
2545,17,"Between 2008 and 2016, the United States depor...",...,https://www.ted.com/talks/duarte_geraldino_wha...,450430
2546,6,How can you study Mars without a spaceship? He...,...,https://www.ted.com/talks/armando_azua_bustos_...,417470
2547,10,Science fiction visions of the future show us ...,...,https://www.ted.com/talks/radhika_nagpal_what_...,375647
2548,32,In an unmissable talk about race and politics ...,...,https://www.ted.com/talks/theo_e_j_wilson_a_bl...,419309
2549,8,With more than half of the world population li...,...,https://www.ted.com/talks/karoliina_korppoo_ho...,391721


### 3.7-混合切割

In [38]:
df[ ["comments", "description", "duration"] ].iloc[5:10]

Unnamed: 0,comments,description,duration
5,672,"Tony Robbins discusses the ""invisible forces"" ...",1305
6,919,When two young Mormon missionaries knock on Ju...,992
7,46,Architect Joshua Prince-Ramus takes the audien...,1198
8,852,Philosopher Dan Dennett calls for religion -- ...,1485
9,900,"Pastor Rick Warren, author of ""The Purpose-Dri...",1262


## 4-資料過濾

### 4.1-過濾原理

In [51]:
df2 = df.iloc[:3]
df2

Unnamed: 0,comments,description,...,url,views
0,4553,Sir Ken Robinson makes an entertaining and pro...,...,https://www.ted.com/talks/ken_robinson_says_sc...,47227110
1,265,With the same humor and humanity he exuded in ...,...,https://www.ted.com/talks/al_gore_on_averting_...,3200520
2,124,New York Times columnist David Pogue takes aim...,...,https://www.ted.com/talks/david_pogue_says_sim...,1636292


In [52]:
df2[[False,True,False]]
# True: 代表要留下的(row資料)
# False: 代表要刪除的(row資料)

Unnamed: 0,comments,description,...,url,views
1,265,With the same humor and humanity he exuded in ...,...,https://www.ted.com/talks/al_gore_on_averting_...,3200520


### 4.2-創造(過濾陣列)

In [48]:
# 如果 df["description"] 有包含 字串("Sir"),則為 True, 否則為 False
bool_filter = df["description"].str.contains("Sir")
df2 = df[bool_filter]
print("原始資料筆數: ",df.shape[0])
print("過濾後資料筆數: ",df2.shape[0])
print("包含 Sir 字串的資料:")
df2

原始資料筆數:  2550
過濾後資料筆數:  11


Unnamed: 0,comments,description,...,url,views
0,4553,Sir Ken Robinson makes an entertaining and pro...,...,https://www.ted.com/talks/ken_robinson_says_sc...,47227110
15,325,Violinist Sirena Huang gives a technically bri...,...,https://www.ted.com/talks/sirena_huang_dazzles...,2702470
54,203,"Speaking as both an astronomer and ""a concerne...",...,https://www.ted.com/talks/martin_rees_asks_is_...,2121177
692,1234,"In this poignant, funny follow-up to his fable...",...,https://www.ted.com/talks/sir_ken_robinson_bri...,7266316
833,473,"In this talk from RSA Animate, Sir Ken Robinso...",...,https://www.ted.com/talks/ken_robinson_changin...,1854997
...,...,...,...,...,...
1502,634,Sir Ken Robinson outlines 3 principles crucial...,...,https://www.ted.com/talks/ken_robinson_how_to_...,6657858
1802,59,Sir Tim Berners-Lee invented the World Wide We...,...,https://www.ted.com/talks/tim_berners_lee_a_ma...,1054600
1978,64,"The founder of Sirius XM satellite radio, Mart...",...,https://www.ted.com/talks/martine_rothblatt_my...,1304737
2192,61,Trust: How do you earn it? Banks use credit sc...,...,https://www.ted.com/talks/shivani_siroya_a_sma...,1437353


In [56]:
df3 = df[~bool_filter]
print("原始資料筆數: ",df.shape[0])
print("過濾後資料筆數: ",df3.shape[0])
print("不包含 Sir 字串的資料:")
df3 = df[~bool_filter]
df3

原始資料筆數:  2550
過濾後資料筆數:  2539
不包含 Sir 字串的資料:


Unnamed: 0,comments,description,...,url,views
1,265,With the same humor and humanity he exuded in ...,...,https://www.ted.com/talks/al_gore_on_averting_...,3200520
2,124,New York Times columnist David Pogue takes aim...,...,https://www.ted.com/talks/david_pogue_says_sim...,1636292
3,200,"In an emotionally charged talk, MacArthur-winn...",...,https://www.ted.com/talks/majora_carter_s_tale...,1697550
4,593,You've never seen data presented like this. Wi...,...,https://www.ted.com/talks/hans_rosling_shows_t...,12005869
5,672,"Tony Robbins discusses the ""invisible forces"" ...",...,https://www.ted.com/talks/tony_robbins_asks_wh...,20685401
...,...,...,...,...,...
2545,17,"Between 2008 and 2016, the United States depor...",...,https://www.ted.com/talks/duarte_geraldino_wha...,450430
2546,6,How can you study Mars without a spaceship? He...,...,https://www.ted.com/talks/armando_azua_bustos_...,417470
2547,10,Science fiction visions of the future show us ...,...,https://www.ted.com/talks/radhika_nagpal_what_...,375647
2548,32,In an unmissable talk about race and politics ...,...,https://www.ted.com/talks/theo_e_j_wilson_a_bl...,419309


## 5-保存表格

In [57]:
df2.to_csv(".\data\dataset_Pandas\df2.csv", encoding = "utf-8", index = False)

## 6-刪除(column資料)

### 6.1-刪除column資料(單)

In [68]:
df4 = df[["comments", "description", "url"]]
del df4["url"]
df4

Unnamed: 0,comments,description
0,4553,Sir Ken Robinson makes an entertaining and pro...
1,265,With the same humor and humanity he exuded in ...
2,124,New York Times columnist David Pogue takes aim...
3,200,"In an emotionally charged talk, MacArthur-winn..."
4,593,You've never seen data presented like this. Wi...
...,...,...
2545,17,"Between 2008 and 2016, the United States depor..."
2546,6,How can you study Mars without a spaceship? He...
2547,10,Science fiction visions of the future show us ...
2548,32,In an unmissable talk about race and politics ...


### 6.2-刪除column資料(多)

In [70]:
df4 = df[["comments", "description", "url"]]
df4 = df4.drop(["comments", "description"], axis = 1)
df4

Unnamed: 0,url
0,https://www.ted.com/talks/ken_robinson_says_sc...
1,https://www.ted.com/talks/al_gore_on_averting_...
2,https://www.ted.com/talks/david_pogue_says_sim...
3,https://www.ted.com/talks/majora_carter_s_tale...
4,https://www.ted.com/talks/hans_rosling_shows_t...
...,...
2545,https://www.ted.com/talks/duarte_geraldino_wha...
2546,https://www.ted.com/talks/armando_azua_bustos_...
2547,https://www.ted.com/talks/radhika_nagpal_what_...
2548,https://www.ted.com/talks/theo_e_j_wilson_a_bl...


### 6.3-刪除row資料(多)

In [71]:
df4 = df[["comments", "description", "url"]]
df4 = df4.drop(range(10), axis = 0)
df4

Unnamed: 0,comments,description,url
10,79,"Accepting his 2006 TED Prize, Cameron Sinclair...",https://www.ted.com/talks/cameron_sinclair_on_...
11,55,Jehane Noujaim unveils her 2006 TED Prize wish...,https://www.ted.com/talks/jehane_noujaim_inspi...
12,71,"Accepting the 2006 TED Prize, Dr. Larry Brilli...",https://www.ted.com/talks/larry_brilliant_want...
13,242,"Jeff Han shows off a cheap, scalable multi-tou...",https://www.ted.com/talks/jeff_han_demos_his_b...
14,99,"Nicholas Negroponte, founder of the MIT Media ...",https://www.ted.com/talks/nicholas_negroponte_...
...,...,...,...
2545,17,"Between 2008 and 2016, the United States depor...",https://www.ted.com/talks/duarte_geraldino_wha...
2546,6,How can you study Mars without a spaceship? He...,https://www.ted.com/talks/armando_azua_bustos_...
2547,10,Science fiction visions of the future show us ...,https://www.ted.com/talks/radhika_nagpal_what_...
2548,32,In an unmissable talk about race and politics ...,https://www.ted.com/talks/theo_e_j_wilson_a_bl...
