## 查找與拷貝
#### 查找資料條件

In [15]:
# np.where()：會返回輸入陣列中滿足設定的條件之元素索引。參數：（陣列查找資料條件），傳回值是符合條件的欄位索引值。

import numpy as np

data = np.array([1, 2, 3, 4])

print(np.where(data % 2 == 0, 0, 1)) # 根據運算式改變數字，等同於iif
print(np.where(data >= 3, 1, 0))

[1 0 1 0]
[0 0 1 1]


In [16]:
# np.extract()：會返回滿足任何條件的元素。參數：（陣列查找資料條件, 陣列），傳回值是符合條件的欄位內容。

import numpy as np

data = np.array([1, 2, 3, 4])

np.extract(data % 2 == 0, data)

array([2, 4])

In [19]:
# exam

import numpy as np

data = np.array([1, 2, 3, 4])
print(data >= 3)
np.extract(data >= 3, data)

[False False  True  True]


array([3, 4])

In [20]:
# Practice

import numpy as np

a = np.array([0, 30, 45, 60, 75, 90])
print('a > 15', a > 15)
b = np.where(a > 15)
print('b > 15顯示的位置', b)
print()
condition = np.mod(a, 2) == 0
print('condition', condition)
print()
b = np.where(condition)
print('根據T|F對應數據回傳位置', b)
print('boolean seletion', a[b])
print('np.extract', np.extract(condition, a))

a > 15 [False  True  True  True  True  True]
b > 15顯示的位置 (array([1, 2, 3, 4, 5]),)

condition [ True  True False  True False  True]

根據T|F對應數據回傳位置 (array([0, 1, 3, 5]),)
boolean seletion [ 0 30 60 90]
np.extract [ 0 30 60 90]


In [22]:
# 補充：亂數

from numpy import random

data = random.randint(low = 1, high = 7, size = (10, ))
print(data)

# 有多少個值大於3
np.where(data > 3, 1, 0).sum()

[1 3 5 3 3 3 6 6 2 2]


3

#### 陣列指派拷貝

In [31]:
import numpy as np

a = np.arange(6)
print('a:', a)
print('id(a):', id(a))
b = a
print('b:', b)
print('id(b):', id(b))
print()

b.shape = 3, 2 # 改變陣列型態
print('b改變型態後:b', b, id(b), '\n')
print('b改變型態後:a', a, id(a), '\n')
print()

b[0, 0] = 100
print('b改變資料後:b', b, '\n')
print('b改變資料後:a', a, '\n')

a: [0 1 2 3 4 5]
id(a): 140463633015408
b: [0 1 2 3 4 5]
id(b): 140463633015408

b改變型態後:b [[0 1]
 [2 3]
 [4 5]] 140463633015408 

b改變型態後:a [[0 1]
 [2 3]
 [4 5]] 140463633015408 


b改變資料後:b [[100   1]
 [  2   3]
 [  4   5]] 

b改變資料後:a [[100   1]
 [  2   3]
 [  4   5]] 



#### 陣列淺層拷貝

In [32]:
# .view()：將陣列交給另一個，將形狀複製到另一個陣列，兩個陣列記憶體位置不同，形狀變化"不相同"，但資料變化"相同"。

import numpy as np

a = np.arange(6)
print('a:', a)
print('id(a):', id(a))
b = a.view()
print('b:', b)
print('id(b):', id(b))
print()

b.shape = 3, 2
print('b改變型態後:b', b, id(b), '\n')
print('b改變型態後:a', a, id(a), '\n')
print()

b[0, 0] = 100
print('b改變資料後:b', b, '\n')
print('b改變資料後:a', a, '\n')

a: [0 1 2 3 4 5]
id(a): 140463633015888
b: [0 1 2 3 4 5]
id(b): 140463633015984

b改變型態後:b [[0 1]
 [2 3]
 [4 5]] 140463633015984 

b改變型態後:a [0 1 2 3 4 5] 140463633015888 


b改變資料後:b [[100   1]
 [  2   3]
 [  4   5]] 

b改變資料後:a [100   1   2   3   4   5] 



#### 陣列深層拷貝

In [7]:
# 變數的影響
# .copy()：將陣列交給另一個，會將形狀與資料複製到另一個陣列，兩個陣列的記憶體位置不同，形狀與資料變化"不相同"。

lst1 = [1, 2, 3, 4]
lst2 = lst1
lst2.append(5)
print(lst1)
print(lst2)

# 位置相同
print(id(lst1), id(lst2))

[1, 2, 3, 4, 5]
[1, 2, 3, 4, 5]
140463628158608 140463628158608


In [8]:
# 解決方式

lst1 = [1, 2, 3, 4]
lst2 = lst1.copy()
lst2.append(5)
print(lst1)
print(lst2)

# 位置相同
print(id(lst1), id(lst2))

[1, 2, 3, 4]
[1, 2, 3, 4, 5]
140463627217328 140463627846112


In [10]:
# numpy

import numpy as np

np1 = np.array([1, 2, 3, 4])
np2 = np1
print(id(np1), id(np2))
np1[0] = 0
print(id(np1), id(np2))
np1, np2

140463632832240 140463632832240
140463632832240 140463632832240


(array([0, 2, 3, 4]), array([0, 2, 3, 4]))

In [11]:
# numpy解決方式

import numpy as np

np1 = np.array([1, 2, 3, 4])
np2 = np1.copy()
print(id(np1), id(np2))
np1[0] = 0
print(id(np1), id(np2))
np1, np2

140463632832432 140463632831568
140463632832432 140463632831568


(array([0, 2, 3, 4]), array([1, 2, 3, 4]))

In [33]:
# Practice

import numpy as np

a = np.arange(6)
print('a:', a)
print('id(a):', id(a))
b = a.copy()
print('b:', b)
print('id(b):', id(b))
print()

b.shape = 3, 2
print('b改變型態後:b', b, id(b), '\n')
print('b改變型態後:a', a, id(a), '\n')
print()

b[0, 0] = 100
print('b改變資料後:b', b, '\n')
print('b改變資料後:a', a, '\n')

a: [0 1 2 3 4 5]
id(a): 140463633016560
b: [0 1 2 3 4 5]
id(b): 140463633016656

b改變型態後:b [[0 1]
 [2 3]
 [4 5]] 140463633016656 

b改變型態後:a [0 1 2 3 4 5] 140463633016560 


b改變資料後:b [[100   1]
 [  2   3]
 [  4   5]] 

b改變資料後:a [0 1 2 3 4 5] 



In [35]:
# Question

import numpy as np

a = np.array([[1, 2, 3],
              [4, 5, 6]])

b = a.reshape(3, 2) # 這個部分是陣列淺層拷貝（等同以下兩行）

######################
# b = a.view()
# a.shape = 3, 2
######################

print('id(a):' , id(a))
print('id(b):' , id(b))
print()
print('a:', a)
print('b from a with reshape:', b)
print()
a[0, 0] = 0
print('a改變資料後:a', a)
print('a改變資料後:b', b)
print('id(a):' , id(a))
print('id(b):' , id(b))

id(a): 140463633017712
id(b): 140463633017808

a: [[1 2 3]
 [4 5 6]]
b from a with reshape: [[1 2]
 [3 4]
 [5 6]]

a改變資料後:a [[0 2 3]
 [4 5 6]]
a改變資料後:b [[0 2]
 [3 4]
 [5 6]]
id(a): 140463633017712
id(b): 140463633017808


## 活用

In [3]:
# https://rate.bot.com.tw/xrt
# 手動下載儲存

url = 'https://rate.bot.com.tw/xrt/flcsv/0/day'

import pandas as pd

money = pd.read_csv('ExchangeRate@202107011901.csv', index_col = False)
money

Unnamed: 0,Currency,Rate,Cash,Spot,Forward-10Days,Forward-30Days,Forward-60Days,Forward-90Days,Forward-120Days,Forward-150Days,...,Rate.1,Cash.1,Spot.1,Forward-10Days.1,Forward-30Days.1,Forward-60Days.1,Forward-90Days.1,Forward-120Days.1,Forward-150Days.1,Forward-180Days.1
0,USD,Buying,27.495,27.82,27.834,27.832,27.828,27.821,27.813,27.804,...,Selling,28.165,27.97,27.941,27.94,27.939,27.938,27.937,27.935,27.933
1,HKD,Buying,3.437,3.558,3.561,3.56,3.56,3.56,3.559,3.557,...,Selling,3.641,3.628,3.621,3.622,3.622,3.622,3.622,3.622,3.622
2,GBP,Buying,37.3,38.195,38.366,38.359,38.358,38.357,38.348,38.34,...,Selling,39.42,38.825,38.774,38.783,38.783,38.782,38.786,38.791,38.795
3,AUD,Buying,20.51,20.725,20.798,20.794,20.794,20.795,20.789,20.783,...,Selling,21.29,21.07,21.003,21.01,21.012,21.012,21.014,21.015,21.02
4,CAD,Buying,22.02,22.35,22.396,22.389,22.387,22.384,22.378,22.371,...,Selling,22.93,22.68,22.603,22.608,22.607,22.605,22.606,22.606,22.607
5,SGD,Buying,20.14,20.61,20.636,20.628,20.624,20.621,20.612,20.603,...,Selling,21.05,20.83,20.822,20.826,20.824,20.82,20.818,20.816,20.814
6,CHF,Buying,29.35,29.96,29.973,29.978,30.0,30.023,30.039,30.056,...,Selling,30.55,30.35,30.233,30.258,30.281,30.304,30.331,30.358,30.385
7,JPY,Buying,0.2411,0.2481,0.2487,0.2486,0.2487,0.2487,0.2487,0.2488,...,Selling,0.2539,0.2527,0.2528,0.2529,0.253,0.253,0.2531,0.2532,0.2533
8,ZAR,Buying,0.0,1.909,1.911,1.904,1.896,1.888,1.88,1.871,...,Selling,0.0,1.999,1.992,1.987,1.98,1.972,1.964,1.956,1.948
9,SEK,Buying,2.87,3.2,3.21,3.209,3.21,3.21,3.21,3.21,...,Selling,3.39,3.32,3.311,3.313,3.313,3.314,3.315,3.316,3.318


In [7]:
# 不使用下載方式

import pandas as pd

url = 'https://rate.bot.com.tw/xrt/flcsv/0/day'
money = pd.read_csv(url, index_col = False)
money

Unnamed: 0,幣別,匯率,現金,即期,遠期10天,遠期30天,遠期60天,遠期90天,遠期120天,遠期150天,...,匯率.1,現金.1,即期.1,遠期10天.1,遠期30天.1,遠期60天.1,遠期90天.1,遠期120天.1,遠期150天.1,遠期180天.1
0,USD,本行買入,29.385,29.71,29.72,29.69,29.64,29.59,29.51,29.445,...,本行賣出,30.055,29.86,29.827,29.808,29.76,29.715,29.665,29.6,29.52
1,HKD,本行買入,3.639,3.76,3.764,3.764,3.765,3.764,3.76,3.756,...,本行賣出,3.843,3.83,3.825,3.827,3.828,3.827,3.824,3.822,3.819
2,GBP,本行買入,35.43,36.325,36.081,36.062,36.042,36.012,35.97,35.928,...,本行賣出,37.55,36.955,36.494,36.502,36.487,36.455,36.427,36.399,36.37
3,AUD,本行買入,20.24,20.455,20.359,20.338,20.329,20.314,20.288,20.257,...,本行賣出,21.02,20.8,20.583,20.609,20.601,20.586,20.567,20.543,20.518
4,CAD,本行買入,22.52,22.85,22.842,22.826,22.805,22.777,22.741,22.705,...,本行賣出,23.43,23.18,23.05,23.05,23.029,23.005,22.975,22.946,22.917
5,SGD,本行買入,20.88,21.35,21.311,21.294,21.278,21.255,21.231,21.206,...,本行賣出,21.79,21.57,21.501,21.504,21.488,21.469,21.451,21.434,21.417
6,CHF,本行買入,30.27,30.88,30.689,30.708,30.736,30.766,30.789,30.812,...,本行賣出,31.47,31.27,30.949,30.986,31.014,31.045,31.078,31.11,31.143
7,JPY,本行買入,0.2126,0.2194,0.218,0.218,0.2181,0.2181,0.2181,0.2181,...,本行賣出,0.2254,0.2244,0.2221,0.2222,0.2224,0.2226,0.2227,0.2228,0.2229
8,ZAR,本行買入,0.0,1.821,1.819,1.812,1.806,1.798,1.79,1.782,...,本行賣出,0.0,1.911,1.901,1.897,1.889,1.881,1.874,1.867,1.859
9,SEK,本行買入,2.55,2.88,2.89,2.89,2.89,2.89,2.889,2.888,...,本行賣出,3.07,3.0,2.991,2.994,2.995,2.995,2.995,2.995,2.995


In [11]:
# 利用pandas網路爬蟲工具使用_html

import pandas as pd

pd.read_html('https://rate.bot.com.tw/xrt')[0]

Unnamed: 0_level_0,幣別,幣別,現金匯率,現金匯率,Unnamed: 4_level_0,即期匯率,即期匯率,Unnamed: 7_level_0,遠期匯率,歷史匯率,歷史匯率,現金匯率,現金匯率,Unnamed: 13_level_0,即期匯率,即期匯率,Unnamed: 16_level_0
Unnamed: 0_level_1,幣別,Unnamed: 1_level_1,本行買入,本行賣出,本行買入,本行賣出,Unnamed: 6_level_1,本行買入,遠期匯率,歷史匯率,本行賣出,本行買入.1,本行賣出.1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,美金 (USD) 美金 (USD),29.385,30.055,29.71,29.86,查詢,查詢,29.385,30.055,29.71,29.86,,,,,,
1,港幣 (HKD) 港幣 (HKD),3.639,3.843,3.76,3.83,查詢,查詢,3.639,3.843,3.76,3.83,,,,,,
2,英鎊 (GBP) 英鎊 (GBP),35.33,37.45,36.225,36.855,查詢,查詢,35.33,37.45,36.225,36.855,,,,,,
3,澳幣 (AUD) 澳幣 (AUD),20.17,20.95,20.385,20.73,查詢,查詢,20.17,20.95,20.385,20.73,,,,,,
4,加拿大幣 (CAD) 加拿大幣 (CAD),22.47,23.38,22.8,23.13,查詢,查詢,22.47,23.38,22.8,23.13,,,,,,
5,新加坡幣 (SGD) 新加坡幣 (SGD),20.86,21.77,21.33,21.55,查詢,查詢,20.86,21.77,21.33,21.55,,,,,,
6,瑞士法郎 (CHF) 瑞士法郎 (CHF),30.24,31.44,30.85,31.24,查詢,查詢,30.24,31.44,30.85,31.24,,,,,,
7,日圓 (JPY) 日圓 (JPY),0.2121,0.2249,0.2189,0.2239,查詢,查詢,0.2121,0.2249,0.2189,0.2239,,,,,,
8,南非幣 (ZAR) 南非幣 (ZAR),-,-,1.821,1.911,查詢,查詢,-,-,1.821,1.911,,,,,,
9,瑞典幣 (SEK) 瑞典幣 (SEK),2.54,3.06,2.87,2.99,查詢,查詢,2.54,3.06,2.87,2.99,,,,,,


In [20]:
# python抓取網路資料
# crawler
# https://data.taipei/#/dataset/detail?id=c6bc8aed-557d-41d5-bfb1-8da24f78f2fb

import requests as r
import json
import pandas as pd

data = r.get('https://tcgbusfs.blob.core.windows.net/dotapp/youbike/v2/youbike_immediate.json')
bike = pd.DataFrame(json.loads(data.text))
bike

Unnamed: 0,sno,sna,tot,sbi,sarea,mday,lat,lng,ar,sareaen,snaen,aren,bemp,act,srcUpdateTime,updateTime,infoTime,infoDate
0,500101001,YouBike2.0_捷運科技大樓站,29,2,大安區,2022-06-23 22:47:03,25.02605,121.54360,復興南路二段235號前,Daan Dist.,YouBike2.0_MRT Technology Bldg. Sta.,No.235， Sec. 2， Fuxing S. Rd.,27,1,2022-06-23 22:47:12,2022-06-23 22:47:51,2022-06-23 22:47:03,2022-06-23
1,500101002,YouBike2.0_復興南路二段273號前,21,7,大安區,2022-06-23 22:45:37,25.02565,121.54357,復興南路二段273號西側,Daan Dist.,YouBike2.0_No.273， Sec. 2， Fuxing S. Rd.,No.273， Sec. 2， Fuxing S. Rd. (West),14,1,2022-06-23 22:47:12,2022-06-23 22:47:51,2022-06-23 22:45:37,2022-06-23
2,500101003,YouBike2.0_國北教大實小東側門,16,16,大安區,2022-06-23 22:15:04,25.02429,121.54124,和平東路二段96巷7號,Daan Dist.,YouBike2.0_NTUE Experiment Elementary School (...,No. 7， Ln. 96， Sec. 2， Heping E. Rd,0,1,2022-06-23 22:47:12,2022-06-23 22:47:51,2022-06-23 22:15:04,2022-06-23
3,500101004,YouBike2.0_和平公園東側,11,11,大安區,2022-06-23 22:04:03,25.02351,121.54282,和平東路二段118巷33號,Daan Dist.,YouBike2.0_Heping Park (East),No. 33， Ln. 118， Sec. 2， Heping E. Rd,0,1,2022-06-23 22:47:12,2022-06-23 22:47:51,2022-06-23 22:04:03,2022-06-23
4,500101005,YouBike2.0_辛亥復興路口西北側,16,2,大安區,2022-06-23 22:36:04,25.02153,121.54299,復興南路二段368號,Daan Dist.,YouBike2.0_Xinhai Fuxing Rd. Intersection (Nor...,No. 368， Sec. 2， Fuxing S. Rd.,14,1,2022-06-23 22:47:12,2022-06-23 22:47:51,2022-06-23 22:36:04,2022-06-23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1090,500119087,YouBike2.0_臺大總圖書館西南側,30,1,臺大專區,2022-06-23 20:11:38,25.01690,121.54031,臺大圖書館西南側,NTU Dist,YouBike2.0_NTU Main Library(Southwest),NTU Main Library(Southwest),29,1,2022-06-23 22:47:12,2022-06-23 22:47:51,2022-06-23 20:11:38,2022-06-23
1091,500119088,YouBike2.0_臺大黑森林西側,20,9,臺大專區,2022-06-23 22:45:12,25.01995,121.54347,臺大霖澤館南側,NTU Dist,YouBike2.0_NTU Black Forest(West),NTU Tsai Lecture Hall(South),11,1,2022-06-23 22:47:12,2022-06-23 22:47:51,2022-06-23 22:45:12,2022-06-23
1092,500119089,YouBike2.0_臺大獸醫館南側,10,2,臺大專區,2022-06-23 22:39:37,25.01791,121.54242,臺大獸醫系館南側,NTU Dist,YouBike2.0_NTU Dept. of Veterinary Medicine(So...,NTU Dept. of Veterinary Medicine(South),8,1,2022-06-23 22:47:12,2022-06-23 22:47:51,2022-06-23 22:39:37,2022-06-23
1093,500119090,YouBike2.0_臺大新體育館東南側,40,0,臺大專區,2022-06-23 22:09:04,25.02112,121.53591,臺大體育館東側,NTU Dist,YouBike2.0_NTU Sports Center(Southeast),NTU Sports Center(East),40,1,2022-06-23 22:47:12,2022-06-23 22:47:51,2022-06-23 22:09:04,2022-06-23


In [22]:
# 匯出自己的pickle
bike.to_pickle('youbike.pickle')

In [23]:
# 讀取pickle

import pandas as pd

ubike = pd.read_pickle('youbike.pickle')
ubike

Unnamed: 0,sno,sna,tot,sbi,sarea,mday,lat,lng,ar,sareaen,snaen,aren,bemp,act,srcUpdateTime,updateTime,infoTime,infoDate
0,500101001,YouBike2.0_捷運科技大樓站,29,2,大安區,2022-06-23 22:47:03,25.02605,121.54360,復興南路二段235號前,Daan Dist.,YouBike2.0_MRT Technology Bldg. Sta.,No.235， Sec. 2， Fuxing S. Rd.,27,1,2022-06-23 22:47:12,2022-06-23 22:47:51,2022-06-23 22:47:03,2022-06-23
1,500101002,YouBike2.0_復興南路二段273號前,21,7,大安區,2022-06-23 22:45:37,25.02565,121.54357,復興南路二段273號西側,Daan Dist.,YouBike2.0_No.273， Sec. 2， Fuxing S. Rd.,No.273， Sec. 2， Fuxing S. Rd. (West),14,1,2022-06-23 22:47:12,2022-06-23 22:47:51,2022-06-23 22:45:37,2022-06-23
2,500101003,YouBike2.0_國北教大實小東側門,16,16,大安區,2022-06-23 22:15:04,25.02429,121.54124,和平東路二段96巷7號,Daan Dist.,YouBike2.0_NTUE Experiment Elementary School (...,No. 7， Ln. 96， Sec. 2， Heping E. Rd,0,1,2022-06-23 22:47:12,2022-06-23 22:47:51,2022-06-23 22:15:04,2022-06-23
3,500101004,YouBike2.0_和平公園東側,11,11,大安區,2022-06-23 22:04:03,25.02351,121.54282,和平東路二段118巷33號,Daan Dist.,YouBike2.0_Heping Park (East),No. 33， Ln. 118， Sec. 2， Heping E. Rd,0,1,2022-06-23 22:47:12,2022-06-23 22:47:51,2022-06-23 22:04:03,2022-06-23
4,500101005,YouBike2.0_辛亥復興路口西北側,16,2,大安區,2022-06-23 22:36:04,25.02153,121.54299,復興南路二段368號,Daan Dist.,YouBike2.0_Xinhai Fuxing Rd. Intersection (Nor...,No. 368， Sec. 2， Fuxing S. Rd.,14,1,2022-06-23 22:47:12,2022-06-23 22:47:51,2022-06-23 22:36:04,2022-06-23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1090,500119087,YouBike2.0_臺大總圖書館西南側,30,1,臺大專區,2022-06-23 20:11:38,25.01690,121.54031,臺大圖書館西南側,NTU Dist,YouBike2.0_NTU Main Library(Southwest),NTU Main Library(Southwest),29,1,2022-06-23 22:47:12,2022-06-23 22:47:51,2022-06-23 20:11:38,2022-06-23
1091,500119088,YouBike2.0_臺大黑森林西側,20,9,臺大專區,2022-06-23 22:45:12,25.01995,121.54347,臺大霖澤館南側,NTU Dist,YouBike2.0_NTU Black Forest(West),NTU Tsai Lecture Hall(South),11,1,2022-06-23 22:47:12,2022-06-23 22:47:51,2022-06-23 22:45:12,2022-06-23
1092,500119089,YouBike2.0_臺大獸醫館南側,10,2,臺大專區,2022-06-23 22:39:37,25.01791,121.54242,臺大獸醫系館南側,NTU Dist,YouBike2.0_NTU Dept. of Veterinary Medicine(So...,NTU Dept. of Veterinary Medicine(South),8,1,2022-06-23 22:47:12,2022-06-23 22:47:51,2022-06-23 22:39:37,2022-06-23
1093,500119090,YouBike2.0_臺大新體育館東南側,40,0,臺大專區,2022-06-23 22:09:04,25.02112,121.53591,臺大體育館東側,NTU Dist,YouBike2.0_NTU Sports Center(Southeast),NTU Sports Center(East),40,1,2022-06-23 22:47:12,2022-06-23 22:47:51,2022-06-23 22:09:04,2022-06-23


In [25]:
# 查詢pandas版本

import pandas as pd
pd.__version__

'1.2.4'

## Pandas存取分析
#### 存取檔案

In [27]:
# 加入日期資訊

import pandas as pd
pd.to_datetime('20210701')

Timestamp('2021-07-01 00:00:00')

In [28]:
# Question：3016-03-01

import pandas as pd
pd.to_datetime('30160301')

OutOfBoundsDatetime: Out of bounds nanosecond timestamp: 3016-03-01 00:00:00

In [31]:
# 承上，解決方式

from datetime import datetime
datetime.strptime('30160301', '%Y%m%d')

datetime.datetime(3016, 3, 1, 0, 0)

In [32]:
# 2099年還可執行
import pandas as pd
pd.to_datetime('20991231')

Timestamp('2099-12-31 00:00:00')

#### 過濾分析

In [33]:
# 匯出excel檔 

import requests as r
import json
import pandas as pd
data = r.get('https://tcgbusfs.blob.core.windows.net/dotapp/youbike/v2/youbike_immediate.json')
bike = pd.DataFrame(json.loads(data.text))
bike.to_excel('bike.xlsx', index = False)
bike

Unnamed: 0,sno,sna,tot,sbi,sarea,mday,lat,lng,ar,sareaen,snaen,aren,bemp,act,srcUpdateTime,updateTime,infoTime,infoDate
0,500101001,YouBike2.0_捷運科技大樓站,29,10,大安區,2022-06-23 23:17:38,25.02605,121.54360,復興南路二段235號前,Daan Dist.,YouBike2.0_MRT Technology Bldg. Sta.,No.235， Sec. 2， Fuxing S. Rd.,19,1,2022-06-23 23:18:11,2022-06-23 23:18:51,2022-06-23 23:17:38,2022-06-23
1,500101002,YouBike2.0_復興南路二段273號前,21,5,大安區,2022-06-23 23:13:11,25.02565,121.54357,復興南路二段273號西側,Daan Dist.,YouBike2.0_No.273， Sec. 2， Fuxing S. Rd.,No.273， Sec. 2， Fuxing S. Rd. (West),16,1,2022-06-23 23:18:11,2022-06-23 23:18:51,2022-06-23 23:13:11,2022-06-23
2,500101003,YouBike2.0_國北教大實小東側門,16,7,大安區,2022-06-23 23:10:16,25.02429,121.54124,和平東路二段96巷7號,Daan Dist.,YouBike2.0_NTUE Experiment Elementary School (...,No. 7， Ln. 96， Sec. 2， Heping E. Rd,9,1,2022-06-23 23:18:11,2022-06-23 23:18:51,2022-06-23 23:10:16,2022-06-23
3,500101004,YouBike2.0_和平公園東側,11,4,大安區,2022-06-23 23:06:04,25.02351,121.54282,和平東路二段118巷33號,Daan Dist.,YouBike2.0_Heping Park (East),No. 33， Ln. 118， Sec. 2， Heping E. Rd,7,1,2022-06-23 23:18:11,2022-06-23 23:18:51,2022-06-23 23:06:04,2022-06-23
4,500101005,YouBike2.0_辛亥復興路口西北側,16,1,大安區,2022-06-23 23:07:10,25.02153,121.54299,復興南路二段368號,Daan Dist.,YouBike2.0_Xinhai Fuxing Rd. Intersection (Nor...,No. 368， Sec. 2， Fuxing S. Rd.,15,1,2022-06-23 23:18:11,2022-06-23 23:18:51,2022-06-23 23:07:10,2022-06-23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1090,500119087,YouBike2.0_臺大總圖書館西南側,30,1,臺大專區,2022-06-23 20:11:38,25.01690,121.54031,臺大圖書館西南側,NTU Dist,YouBike2.0_NTU Main Library(Southwest),NTU Main Library(Southwest),29,1,2022-06-23 23:18:11,2022-06-23 23:18:51,2022-06-23 20:11:38,2022-06-23
1091,500119088,YouBike2.0_臺大黑森林西側,20,10,臺大專區,2022-06-23 23:03:38,25.01995,121.54347,臺大霖澤館南側,NTU Dist,YouBike2.0_NTU Black Forest(West),NTU Tsai Lecture Hall(South),10,1,2022-06-23 23:18:11,2022-06-23 23:18:51,2022-06-23 23:03:38,2022-06-23
1092,500119089,YouBike2.0_臺大獸醫館南側,10,1,臺大專區,2022-06-23 23:01:37,25.01791,121.54242,臺大獸醫系館南側,NTU Dist,YouBike2.0_NTU Dept. of Veterinary Medicine(So...,NTU Dept. of Veterinary Medicine(South),9,1,2022-06-23 23:18:11,2022-06-23 23:18:51,2022-06-23 23:01:37,2022-06-23
1093,500119090,YouBike2.0_臺大新體育館東南側,40,1,臺大專區,2022-06-23 23:04:03,25.02112,121.53591,臺大體育館東側,NTU Dist,YouBike2.0_NTU Sports Center(Southeast),NTU Sports Center(East),39,1,2022-06-23 23:18:11,2022-06-23 23:18:51,2022-06-23 23:04:03,2022-06-23


In [35]:
# 過濾資料（中山區，數量>=50）

bike[(bike['sarea'] == '中山區') & (bike['tot'] >= 50)]

Unnamed: 0,sno,sna,tot,sbi,sarea,mday,lat,lng,ar,sareaen,snaen,aren,bemp,act,srcUpdateTime,updateTime,infoTime,infoDate
460,500107008,YouBike2.0_捷運劍南路站(2號出口),86,0,中山區,2022-06-23 22:54:04,25.08401,121.55535,敬業三路11號(植福路側對面),Zhongshan Dist,YouBike2.0_MRT Jiannan Rd. Station (Exit 2),No. 11， Jingye 3rd Rd. (Opposite),86,1,2022-06-23 23:18:11,2022-06-23 23:18:51,2022-06-23 22:54:04,2022-06-23
475,500107023,YouBike2.0_興安華城,62,60,中山區,2022-06-23 23:13:11,25.05606,121.54175,興安街53-4號對側,Zhongshan Dist,YouBike2.0_Xing An Hua Cheng,No. 53-4， Xing'an St. (Opposite),1,1,2022-06-23 23:18:11,2022-06-23 23:18:51,2022-06-23 23:13:11,2022-06-23
480,500107028,YouBike2.0_捷運松江南京站(7號出口),62,2,中山區,2022-06-23 23:08:38,25.05246,121.5332,松江路119號前,Zhongshan Dist,YouBike2.0_MRT Songjiang Nanjing Sta. (Exit 7),No. 119， Songjiang Rd.,60,1,2022-06-23 23:18:11,2022-06-23 23:18:51,2022-06-23 23:08:38,2022-06-23
487,500107035,YouBike2.0_捷運中山國小站(2號出口),59,22,中山區,2022-06-23 23:16:38,25.0625,121.52629,捷運中山國小站(2號出口)後方,Zhongshan Dist,YouBike2.0_MRT Zhongshan Elementary School Sta...,MRT Zhongshan Elementary School Sta. (Exit 2) ...,36,1,2022-06-23 23:18:11,2022-06-23 23:18:51,2022-06-23 23:16:38,2022-06-23


In [37]:
# Practice

import pandas as pd

data_url = 'http://bit.ly/2cLzoxH'
gap1 = pd.read_csv(data_url)
gap1.head(3)

Unnamed: 0,country,year,pop,continent,lifeExp,gdpPercap
0,Afghanistan,1952,8425333.0,Asia,28.801,779.445314
1,Afghanistan,1957,9240934.0,Asia,30.332,820.85303
2,Afghanistan,1962,10267083.0,Asia,31.997,853.10071


In [40]:
# 承上，亦可不使用head

import pandas as pd

data_url = 'http://bit.ly/2cLzoxH'
gap1 = pd.read_csv(data_url)
gap1[:3]

Unnamed: 0,country,year,pop,continent,lifeExp,gdpPercap
0,Afghanistan,1952,8425333.0,Asia,28.801,779.445314
1,Afghanistan,1957,9240934.0,Asia,30.332,820.85303
2,Afghanistan,1962,10267083.0,Asia,31.997,853.10071


In [41]:
# Practice

import pandas as pd

data_url = 'http://bit.ly/2cLzoxH'
gap1 = pd.read_csv(data_url)

y2002 = gap1[gap1['year'] == 2002]
y2002.shape

(142, 6)

In [44]:
# 承上，亦可使用.的方式

import pandas as pd

data_url = 'http://bit.ly/2cLzoxH'
gap1 = pd.read_csv(data_url)

y2002 = gap1[gap1.year == 2002]
y2002.shape

(142, 6)

In [45]:
# exam（Asia，2002年）的數據

import pandas as pd

data_url = 'http://bit.ly/2cLzoxH'
gap1 = pd.read_csv(data_url)

gap1[(gap1['continent'] == 'Asia') & (gap1['year'] == 2002)]

Unnamed: 0,country,year,pop,continent,lifeExp,gdpPercap
10,Afghanistan,2002,25268400.0,Asia,42.129,726.734055
94,Bahrain,2002,656397.0,Asia,74.795,23403.55927
106,Bangladesh,2002,135656800.0,Asia,62.013,1136.39043
226,Cambodia,2002,12926710.0,Asia,56.752,896.226015
298,China,2002,1280400000.0,Asia,72.028,3119.280896
670,Hong Kong China,2002,6762476.0,Asia,81.495,30209.01516
706,India,2002,1034173000.0,Asia,62.879,1746.769454
718,Indonesia,2002,211060000.0,Asia,68.588,2873.91287
730,Iran,2002,66907830.0,Asia,69.451,9240.761975
742,Iraq,2002,24001820.0,Asia,57.046,4390.717312


In [49]:
# Practice

import pandas as pd

data = {'name'    : ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
        'year'    : [2012, 2012, 2013, 2014, 2014],
        'reports' : [4, 24, 31, 2, 3],
        'coverage': [25, 94, 57, 62, 70]}
df = pd.DataFrame(data, index = ['Cochice', 'Pima', 'Santa Cruz', 'Maricopa', 'Yuma'])
df[(df.coverage > 50)]

Unnamed: 0,name,year,reports,coverage
Pima,Molly,2012,24,94
Santa Cruz,Tina,2013,31,57
Maricopa,Jake,2014,2,62
Yuma,Amy,2014,3,70


In [50]:
# Practice

import pandas as pd

data = {'name'    : ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
        'year'    : [2012, 2012, 2013, 2014, 2014],
        'reports' : [4, 24, 31, 2, 3],
        'coverage': [25, 94, 57, 62, 70]}
df = pd.DataFrame(data , index = ['Cochice', 'Pima', 'Santa Cruz', 'Maricopa', 'Yuma'])
df[(df['coverage'] > 50) & (df['reports'] < 4)]

Unnamed: 0,name,year,reports,coverage
Maricopa,Jake,2014,2,62
Yuma,Amy,2014,3,70


In [51]:
# Practice

import pandas as pd
import numpy as np

raw_data = {'first_name' : ['Jason', 'Molly', np.nan, np.nan, np.nan],
            'nationality': ['USA', 'USA', 'France', 'UK', 'UK'],
            'age'        : [42, 52, 36, 24, 70]}
df = pd.DataFrame(raw_data, 
                  columns = ['first_name', 'nationality', 'age'])

df

Unnamed: 0,first_name,nationality,age
0,Jason,USA,42
1,Molly,USA,52
2,,France,36
3,,UK,24
4,,UK,70


In [52]:
american = (df['nationality'] == 'USA')
elderly = (df['age'] > 50)
df[(american | elderly)]

Unnamed: 0,first_name,nationality,age
0,Jason,USA,42
1,Molly,USA,52
4,,UK,70


#### in與排序

In [2]:
# Practice_1

import pandas as pd

data_url = 'http://bit.ly/2cLzoxH'
gap1 = pd.read_csv(data_url)
gap1

Unnamed: 0,country,year,pop,continent,lifeExp,gdpPercap
0,Afghanistan,1952,8425333.0,Asia,28.801,779.445314
1,Afghanistan,1957,9240934.0,Asia,30.332,820.853030
2,Afghanistan,1962,10267083.0,Asia,31.997,853.100710
3,Afghanistan,1967,11537966.0,Asia,34.020,836.197138
4,Afghanistan,1972,13079460.0,Asia,36.088,739.981106
...,...,...,...,...,...,...
1699,Zimbabwe,1987,9216418.0,Africa,62.351,706.157306
1700,Zimbabwe,1992,10704340.0,Africa,60.377,693.420786
1701,Zimbabwe,1997,11404948.0,Africa,46.809,792.449960
1702,Zimbabwe,2002,11926563.0,Africa,39.989,672.038623


In [3]:
# 承上，想知道有哪些國家
gap1['country'].unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Angola', 'Argentina',
       'Australia', 'Austria', 'Bahrain', 'Bangladesh', 'Belgium',
       'Benin', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon',
       'Canada', 'Central African Republic', 'Chad', 'Chile', 'China',
       'Colombia', 'Comoros', 'Congo Dem. Rep.', 'Congo Rep.',
       'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Czech Republic',
       'Denmark', 'Djibouti', 'Dominican Republic', 'Ecuador', 'Egypt',
       'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Ethiopia',
       'Finland', 'France', 'Gabon', 'Gambia', 'Germany', 'Ghana',
       'Greece', 'Guatemala', 'Guinea', 'Guinea-Bissau', 'Haiti',
       'Honduras', 'Hong Kong China', 'Hungary', 'Iceland', 'India',
       'Indonesia', 'Iran', 'Iraq', 'Ireland', 'Israel', 'Italy',
       'Jamaica', 'Japan', 'Jordan', 'Kenya', 'Korea Dem. Rep.',
       'Korea Rep.', 'Kuwait', 'Lebanon',

In [4]:
# 想知道有哪些州

gap1['continent'].unique()

array(['Asia', 'Europe', 'Africa', 'Americas', 'Oceania'], dtype=object)

In [9]:
# isin()：如果資料存在於isin方法內的比對對象，就傳回True。

gap1[gap1['continent'].isin(['Asia', 'Africa', 'Oceania'])]

Unnamed: 0,country,year,pop,continent,lifeExp,gdpPercap
0,Afghanistan,1952,8425333.0,Asia,28.801,779.445314
1,Afghanistan,1957,9240934.0,Asia,30.332,820.853030
2,Afghanistan,1962,10267083.0,Asia,31.997,853.100710
3,Afghanistan,1967,11537966.0,Asia,34.020,836.197138
4,Afghanistan,1972,13079460.0,Asia,36.088,739.981106
...,...,...,...,...,...,...
1699,Zimbabwe,1987,9216418.0,Africa,62.351,706.157306
1700,Zimbabwe,1992,10704340.0,Africa,60.377,693.420786
1701,Zimbabwe,1997,11404948.0,Africa,46.809,792.449960
1702,Zimbabwe,2002,11926563.0,Africa,39.989,672.038623


In [11]:
# 承上，如果是not in分析語法，則在函數方法前加入“~”符號。

gap1[~gap1['continent'].isin(['Asia', 'Africa', 'Oceania'])]

Unnamed: 0,country,year,pop,continent,lifeExp,gdpPercap
12,Albania,1952,1282697.0,Europe,55.230,1601.056136
13,Albania,1957,1476505.0,Europe,59.280,1942.284244
14,Albania,1962,1728137.0,Europe,64.820,2312.888958
15,Albania,1967,1984060.0,Europe,66.220,2760.196931
16,Albania,1972,2263554.0,Europe,67.690,3313.422188
...,...,...,...,...,...,...
1639,Venezuela,1987,17910182.0,Americas,70.190,9883.584648
1640,Venezuela,1992,20265563.0,Americas,71.150,10733.926310
1641,Venezuela,1997,22374398.0,Americas,72.146,10165.495180
1642,Venezuela,2002,24287670.0,Americas,72.766,8605.047831


In [14]:
# Practice_2

import pandas as pd

df = pd.DataFrame({'A':[1, 2, 3],    
                   'B':['a', 'b', 'f']})
x = df.isin([1, 3, 12, 'a'])
print(x)

       A      B
0   True   True
1  False  False
2   True  False


In [15]:
# Practice_3

import pandas as pd

df2 = pd.DataFrame({'A':[1, 2, 3], 
                    'B':[1, 4, 7]})

x2 = df2.isin({'A':[1, 3],   
               'B':[4, 7, 12]})
print(x2)

       A      B
0   True  False
1  False   True
2   True   True


In [16]:
# Practice_4

import pandas as pd

df = pd.DataFrame({'A':[1, 2, 3],
                   'B':['a', 'b', 'f']})
x3 = df.isin({'A':[1, 3],
              'C':[4, 7, 12]})
print(x3)

       A      B
0   True  False
1  False  False
2   True  False


In [17]:
# Practice_5

import pandas as pd

df = pd.DataFrame({'A':[1, 2, 3],
                   'B':['a', 'b', 'f']})
other = pd.DataFrame({'A':[1, 3, 3, 2], 
                      'B':['e', 'f', 'f', 'e']})
x4 = df.isin(other)
print(x4)

       A      B
0   True  False
1  False  False
2   True   True


In [18]:
# Practice_6

df = pd.DataFrame({'A':[1, 2, 3],
                   'B':['a', 'b', 'f']})

other = pd.DataFrame({'C':[1, 3, 3, 2],
                      'D':['e', 'f', 'f', 'e']})
x5 = df.isin(other)
print(x5)

       A      B
0  False  False
1  False  False
2  False  False


In [19]:
# Practice_7

import pandas as pd

data_url = 'http://bit.ly/2cLzoxH'
gap1 = pd.read_csv(data_url)
gap1

years = [1952, 2007]

gap1[gap1['year'].isin(years)].sort_values(by = 'year') # 最後使用排序排列

Unnamed: 0,country,year,pop,continent,lifeExp,gdpPercap
0,Afghanistan,1952,8425333.0,Asia,28.801,779.445314
672,Hungary,1952,9504000.0,Europe,64.030,5263.673816
684,Iceland,1952,147962.0,Europe,72.490,7267.688428
1344,Sierra Leone,1952,2143249.0,Africa,30.331,879.787736
696,India,1952,372000000.0,Asia,37.373,546.565749
...,...,...,...,...,...,...
779,Italy,2007,58147733.0,Europe,80.546,28569.719700
791,Jamaica,2007,2780132.0,Americas,72.567,7320.880262
803,Japan,2007,127467972.0,Asia,82.603,31656.068060
695,Iceland,2007,301931.0,Europe,81.757,36180.789190


In [23]:
# Practice_8

import pandas as pd

data_url = 'http://bit.ly/2cLzoxH'
gap1 = pd.read_csv(data_url)
gap1

years = [1952, 2007]

gap1[gap1['year'].isin(years)]['year'].unique()

array([1952, 2007])

In [24]:
# Practice_9

import pandas as pd

df = pd.DataFrame({'A':[1, 2, 3],
                   'B':['a', 'b', 'f']})
x = ~df.isin([1, 3, 12, 'a'])
print(x)

       A      B
0  False  False
1   True   True
2  False   True


In [25]:
# Practice_10

import pandas as pd

df2 = pd.DataFrame({'A':[1, 2, 3],
                    'B':[1, 4, 7]})

x2 = ~df2.isin({'A':[1, 3],
              'B':[4, 7, 12]})
print(x2)

       A      B
0  False   True
1   True  False
2  False  False


In [26]:
# Practice_11

import pandas as pd

df2 = pd.DataFrame({'A':[1, 2, 3],
                    'B':[1, 4, 7]})

x3 = ~df.isin({'A':[1, 3],
               'C':[4, 7, 12]})
print(x3)

       A     B
0  False  True
1   True  True
2  False  True


In [27]:
# Practice_12（針對位置進行比對）

df = pd.DataFrame({'A':[1, 2, 3],
                   'B':['a', 'b', 'f']})
other = pd.DataFrame({'A':[1, 3, 3, 2], 
                      'B':['e', 'f', 'f', 'e']})
x4 = ~df.isin(other)
print(x4)

       A      B
0  False   True
1   True   True
2  False  False


In [28]:
# Practice_13

df = pd.DataFrame({'A':[1, 2, 3],
                   'B':['a', 'b', 'f']})
other = pd.DataFrame({'C':[1, 3, 3, 2],
                      'D':['e', 'f', 'f', 'e']})
x5 = ~df.isin(other)
print(x5)

      A     B
0  True  True
1  True  True
2  True  True


In [29]:
# Practice_14
# sort_values()：選擇依據內容進行排序。

import pandas as pd

data_url = 'http://bit.ly/2cLzoxH'
gap1 = pd.read_csv(data_url)
gap1

years = [1952, 2007]

gap1[gap1['year'].isin(years)].sort_values(by = 'year', ascending = False) # ascending：小到大

Unnamed: 0,country,year,pop,continent,lifeExp,gdpPercap
1703,Zimbabwe,2007,12311143.0,Africa,43.487,469.709298
671,Hong Kong China,2007,6980412.0,Asia,82.208,39724.978670
1139,Nigeria,2007,135031164.0,Africa,46.859,2013.977305
575,Germany,2007,82400996.0,Europe,79.406,32170.374420
1415,South Africa,2007,43997828.0,Africa,49.339,9269.657808
...,...,...,...,...,...,...
768,Italy,1952,47666000.0,Europe,65.940,4931.404155
780,Jamaica,1952,1426095.0,Americas,58.530,2898.530881
792,Japan,1952,86459025.0,Asia,63.030,3216.956347
804,Jordan,1952,607914.0,Asia,43.158,1546.907807


In [30]:
# Practice_14-1
# sort_index()：選擇依據索引值進行排序。

import pandas as pd

data_url = 'http://bit.ly/2cLzoxH'
gap1 = pd.read_csv(data_url)
gap1

years = [1952, 2007]

gap1[gap1['year'].isin(years)].sort_index().reset_index(drop = False) # 重新排列索引

Unnamed: 0,index,country,year,pop,continent,lifeExp,gdpPercap
0,0,Afghanistan,1952,8425333.0,Asia,28.801,779.445314
1,11,Afghanistan,2007,31889923.0,Asia,43.828,974.580338
2,12,Albania,1952,1282697.0,Europe,55.230,1601.056136
3,23,Albania,2007,3600523.0,Europe,76.423,5937.029526
4,24,Algeria,1952,9279525.0,Africa,43.077,2449.008185
...,...,...,...,...,...,...,...
279,1679,Yemen Rep.,2007,22211743.0,Asia,62.698,2280.769906
280,1680,Zambia,1952,2672000.0,Africa,42.038,1147.388831
281,1691,Zambia,2007,11746035.0,Africa,42.384,1271.211593
282,1692,Zimbabwe,1952,3080907.0,Africa,48.451,406.884115


In [32]:
# Practice_15

import pandas as pd
import numpy as np

df1 = pd.DataFrame(np.random.randn(10, 2),
                   index = [1, 4, 6, 2, 3, 5, 9, 8, 0, 7],
                   columns = ['col2', 'col1'])
df1

Unnamed: 0,col2,col1
1,-0.642394,0.622027
4,0.13657,0.366777
6,0.248301,-2.099851
2,-1.859496,0.477345
3,1.280491,-1.181904
5,-0.987486,0.690861
9,0.360633,0.342356
8,-0.234155,-1.462059
0,0.400894,-0.94797
7,2.19883,-0.288763


In [33]:
# Practice_15-1

import pandas as pd
import numpy as np

df1 = pd.DataFrame(np.random.randn(10, 2),
                   index = [1, 4, 6, 2, 3, 5, 9, 8, 0, 7],
                   columns = ['col2', 'col1'])
df2 = df1.sort_index()
df2

Unnamed: 0,col2,col1
0,1.526465,-2.597338
1,0.163265,1.50169
2,-0.731363,-0.098907
3,0.647095,-0.097934
4,0.689025,-1.870898
5,-1.676187,-0.8758
6,0.141365,1.666005
7,-0.510417,0.669876
8,0.014523,1.464536
9,0.459408,-1.481462


In [34]:
# Practice_15-2

import pandas as pd
import numpy as np

df1 = pd.DataFrame(np.random.randn(10, 2),
                   index = [1, 4, 6, 2, 3, 5, 9, 8, 0, 7],
                   columns = ['col2', 'col1'])

df3 = df1.sort_index(ascending = False)
df3

Unnamed: 0,col2,col1
9,-0.801303,-0.713181
8,1.262156,-1.122009
7,0.528101,-0.746381
6,0.285043,-1.965731
5,-1.218886,-0.264742
4,0.478546,0.301502
3,-2.327684,-0.301214
2,-0.728783,0.300184
1,0.419616,0.431435
0,0.140031,0.63862


In [35]:
# Practice_15-3
# sort_index預設為row的index排序，若加入axis = 1，則依據column的index排序。

import pandas as pd
import numpy as np

df1 = pd.DataFrame(np.random.randn(10, 2),
                   index = [1, 4, 6, 2, 3, 5, 9, 8, 0, 7],
                   columns = ['col2', 'col1'])

df4 = df1.sort_index(axis = 1) # 同一row，每一個欄位左右排序
df4

Unnamed: 0,col1,col2
1,-0.630264,0.620089
4,-1.376721,0.052666
6,-0.959511,-0.492965
2,-0.206816,-0.514086
3,-0.552335,0.151082
5,-0.198885,0.614087
9,2.063589,1.559098
8,-0.221976,0.757721
0,-0.053728,0.306088
7,0.648954,0.973473


In [37]:
# Practice_16

import pandas as pd

url = 'http://bit.ly/imdbratings'
df1 = pd.read_csv(url)
df1

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,142,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
3,9.0,The Dark Knight,PG-13,Action,152,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."
4,8.9,Pulp Fiction,R,Crime,154,"[u'John Travolta', u'Uma Thurman', u'Samuel L...."
...,...,...,...,...,...,...
974,7.4,Tootsie,PG,Comedy,116,"[u'Dustin Hoffman', u'Jessica Lange', u'Teri G..."
975,7.4,Back to the Future Part III,PG,Adventure,118,"[u'Michael J. Fox', u'Christopher Lloyd', u'Ma..."
976,7.4,Master and Commander: The Far Side of the World,PG-13,Action,138,"[u'Russell Crowe', u'Paul Bettany', u'Billy Bo..."
977,7.4,Poltergeist,PG,Horror,114,"[u'JoBeth Williams', u""Heather O'Rourke"", u'Cr..."


In [1]:
# Practice_16-1

import pandas as pd

url = 'http://bit.ly/imdbratings'
df1 = pd.read_csv(url)
df2 = df1.title.sort_values()
df2

542     (500) Days of Summer
5               12 Angry Men
201         12 Years a Slave
698                127 Hours
110    2001: A Space Odyssey
               ...          
955         Zero Dark Thirty
677                   Zodiac
615               Zombieland
526                     Zulu
864                    [Rec]
Name: title, Length: 979, dtype: object

In [2]:
# Practice_16-2（根據的是單一欄位）

import pandas as pd

url = 'http://bit.ly/imdbratings'
df1 = pd.read_csv(url)
df3 = df1['title'].sort_values()
df3

542     (500) Days of Summer
5               12 Angry Men
201         12 Years a Slave
698                127 Hours
110    2001: A Space Odyssey
               ...          
955         Zero Dark Thirty
677                   Zodiac
615               Zombieland
526                     Zulu
864                    [Rec]
Name: title, Length: 979, dtype: object

In [40]:
# Practice_16-3
# 顯示方式使用雙中括號才為DataFrame。

df1[['title']].sort_values(by = 'title')

Unnamed: 0,title
542,(500) Days of Summer
5,12 Angry Men
201,12 Years a Slave
698,127 Hours
110,2001: A Space Odyssey
...,...
955,Zero Dark Thirty
677,Zodiac
615,Zombieland
526,Zulu


In [3]:
# Practice_16-4

import pandas as pd

url = 'http://bit.ly/imdbratings'
df1 = pd.read_csv(url)
df4 = df1.title.sort_values(ascending = False)
df4

864                    [Rec]
526                     Zulu
615               Zombieland
677                   Zodiac
955         Zero Dark Thirty
               ...          
110    2001: A Space Odyssey
698                127 Hours
201         12 Years a Slave
5               12 Angry Men
542     (500) Days of Summer
Name: title, Length: 979, dtype: object

In [4]:
# Practice_17（by = 欄位）

import pandas as pd

url = 'http://bit.ly/imdbratings'
df1 = pd.read_csv(url)
df1

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,142,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
3,9.0,The Dark Knight,PG-13,Action,152,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."
4,8.9,Pulp Fiction,R,Crime,154,"[u'John Travolta', u'Uma Thurman', u'Samuel L...."
...,...,...,...,...,...,...
974,7.4,Tootsie,PG,Comedy,116,"[u'Dustin Hoffman', u'Jessica Lange', u'Teri G..."
975,7.4,Back to the Future Part III,PG,Adventure,118,"[u'Michael J. Fox', u'Christopher Lloyd', u'Ma..."
976,7.4,Master and Commander: The Far Side of the World,PG-13,Action,138,"[u'Russell Crowe', u'Paul Bettany', u'Billy Bo..."
977,7.4,Poltergeist,PG,Horror,114,"[u'JoBeth Williams', u""Heather O'Rourke"", u'Cr..."


In [5]:
# Practice_17-1

import pandas as pd

url = 'http://bit.ly/imdbratings'
df1 = pd.read_csv(url)
df2 = df1.sort_values(by = 'title')
df2

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
542,7.8,(500) Days of Summer,PG-13,Comedy,95,"[u'Zooey Deschanel', u'Joseph Gordon-Levitt', ..."
5,8.9,12 Angry Men,NOT RATED,Drama,96,"[u'Henry Fonda', u'Lee J. Cobb', u'Martin Bals..."
201,8.1,12 Years a Slave,R,Biography,134,"[u'Chiwetel Ejiofor', u'Michael Kenneth Willia..."
698,7.6,127 Hours,R,Adventure,94,"[u'James Franco', u'Amber Tamblyn', u'Kate Mara']"
110,8.3,2001: A Space Odyssey,G,Mystery,160,"[u'Keir Dullea', u'Gary Lockwood', u'William S..."
...,...,...,...,...,...,...
955,7.4,Zero Dark Thirty,R,Drama,157,"[u'Jessica Chastain', u'Joel Edgerton', u'Chri..."
677,7.7,Zodiac,R,Crime,157,"[u'Jake Gyllenhaal', u'Robert Downey Jr.', u'M..."
615,7.7,Zombieland,R,Comedy,88,"[u'Jesse Eisenberg', u'Emma Stone', u'Woody Ha..."
526,7.8,Zulu,UNRATED,Drama,138,"[u'Stanley Baker', u'Jack Hawkins', u'Ulla Jac..."


In [6]:
# Practice_17-2

import pandas as pd

url = 'http://bit.ly/imdbratings'
df1 = pd.read_csv(url)
df3 = df1.sort_values(by = 'duration', ascending = False)
df3

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
476,7.8,Hamlet,PG-13,Drama,242,"[u'Kenneth Branagh', u'Julie Christie', u'Dere..."
157,8.2,Gone with the Wind,G,Drama,238,"[u'Clark Gable', u'Vivien Leigh', u'Thomas Mit..."
78,8.4,Once Upon a Time in America,R,Crime,229,"[u'Robert De Niro', u'James Woods', u'Elizabet..."
142,8.3,Lagaan: Once Upon a Time in India,PG,Adventure,224,"[u'Aamir Khan', u'Gracy Singh', u'Rachel Shell..."
445,7.9,The Ten Commandments,APPROVED,Adventure,220,"[u'Charlton Heston', u'Yul Brynner', u'Anne Ba..."
...,...,...,...,...,...,...
293,8.1,Duck Soup,PASSED,Comedy,68,"[u'Groucho Marx', u'Harpo Marx', u'Chico Marx']"
88,8.4,The Kid,NOT RATED,Comedy,68,"[u'Charles Chaplin', u'Edna Purviance', u'Jack..."
258,8.1,The Cabinet of Dr. Caligari,UNRATED,Crime,67,"[u'Werner Krauss', u'Conrad Veidt', u'Friedric..."
338,8.0,Battleship Potemkin,UNRATED,History,66,"[u'Aleksandr Antonov', u'Vladimir Barsky', u'G..."


In [7]:
# Practice_17-3

import pandas as pd

url = 'http://bit.ly/imdbratings'
df1 = pd.read_csv(url)
columns = ['star_rating', 'duration']
df4 = df1.sort_values(by = columns)
df4

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
938,7.4,Alice in Wonderland,G,Animation,75,"[u'Kathryn Beaumont', u'Ed Wynn', u'Richard Ha..."
948,7.4,Frances Ha,R,Comedy,86,"[u'Greta Gerwig', u'Mickey Sumner', u'Adam Dri..."
966,7.4,The Simpsons Movie,PG-13,Animation,87,"[u'Dan Castellaneta', u'Julie Kavner', u'Nancy..."
947,7.4,Eraserhead,UNRATED,Drama,89,"[u'Jack Nance', u'Charlotte Stewart', u'Allen ..."
971,7.4,Death at a Funeral,R,Comedy,90,"[u'Matthew Macfadyen', u'Peter Dinklage', u'Ew..."
...,...,...,...,...,...,...
7,8.9,The Lord of the Rings: The Return of the King,PG-13,Adventure,201,"[u'Elijah Wood', u'Viggo Mortensen', u'Ian McK..."
3,9.0,The Dark Knight,PG-13,Action,152,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"


#### 處理日期資訊

In [8]:
# date_range()：可建立日期資料。（參數：periods天數）
# 日期部分勿使用’07/01/2021‘寫法，統一使用’20210701‘不會有疑慮。

import pandas as pd

pd.date_range(start = '20210701', periods = 10)

DatetimeIndex(['2021-07-01', '2021-07-02', '2021-07-03', '2021-07-04',
               '2021-07-05', '2021-07-06', '2021-07-07', '2021-07-08',
               '2021-07-09', '2021-07-10'],
              dtype='datetime64[ns]', freq='D')

In [9]:
# 可使用此方式顯示

import pandas as pd

for d in pd.date_range(start = '20210701', periods = 10):
    print(d)

2021-07-01 00:00:00
2021-07-02 00:00:00
2021-07-03 00:00:00
2021-07-04 00:00:00
2021-07-05 00:00:00
2021-07-06 00:00:00
2021-07-07 00:00:00
2021-07-08 00:00:00
2021-07-09 00:00:00
2021-07-10 00:00:00


In [10]:
# 承上，可再轉換為此

import pandas as pd

for d in pd.date_range(start = '20210701', periods = 10):
    print(d, '--->', d.strftime('%Y%m%d'))

2021-07-01 00:00:00 ---> 20210701
2021-07-02 00:00:00 ---> 20210702
2021-07-03 00:00:00 ---> 20210703
2021-07-04 00:00:00 ---> 20210704
2021-07-05 00:00:00 ---> 20210705
2021-07-06 00:00:00 ---> 20210706
2021-07-07 00:00:00 ---> 20210707
2021-07-08 00:00:00 ---> 20210708
2021-07-09 00:00:00 ---> 20210709
2021-07-10 00:00:00 ---> 20210710


In [11]:
# bdate_range()：表達工作天，不包含週六日。

import pandas as pd

for d in pd.bdate_range(start = '20210701', periods = 10):
    print(d, '--->', d.strftime('%Y%m%d'))

2021-07-01 00:00:00 ---> 20210701
2021-07-02 00:00:00 ---> 20210702
2021-07-05 00:00:00 ---> 20210705
2021-07-06 00:00:00 ---> 20210706
2021-07-07 00:00:00 ---> 20210707
2021-07-08 00:00:00 ---> 20210708
2021-07-09 00:00:00 ---> 20210709
2021-07-12 00:00:00 ---> 20210712
2021-07-13 00:00:00 ---> 20210713
2021-07-14 00:00:00 ---> 20210714


In [12]:
# date_range()、bdate_range()--->可再加入參數：freq範圍的週期單位。

import pandas as pd

for d in pd.bdate_range(start = '20210701', end = '20211231', freq = 'w'):
    print(d, '--->', d.strftime('%Y%m%d'))

2021-07-04 00:00:00 ---> 20210704
2021-07-11 00:00:00 ---> 20210711
2021-07-18 00:00:00 ---> 20210718
2021-07-25 00:00:00 ---> 20210725
2021-08-01 00:00:00 ---> 20210801
2021-08-08 00:00:00 ---> 20210808
2021-08-15 00:00:00 ---> 20210815
2021-08-22 00:00:00 ---> 20210822
2021-08-29 00:00:00 ---> 20210829
2021-09-05 00:00:00 ---> 20210905
2021-09-12 00:00:00 ---> 20210912
2021-09-19 00:00:00 ---> 20210919
2021-09-26 00:00:00 ---> 20210926
2021-10-03 00:00:00 ---> 20211003
2021-10-10 00:00:00 ---> 20211010
2021-10-17 00:00:00 ---> 20211017
2021-10-24 00:00:00 ---> 20211024
2021-10-31 00:00:00 ---> 20211031
2021-11-07 00:00:00 ---> 20211107
2021-11-14 00:00:00 ---> 20211114
2021-11-21 00:00:00 ---> 20211121
2021-11-28 00:00:00 ---> 20211128
2021-12-05 00:00:00 ---> 20211205
2021-12-12 00:00:00 ---> 20211212
2021-12-19 00:00:00 ---> 20211219
2021-12-26 00:00:00 ---> 20211226


In [13]:
# Practice

import pandas as pd

a = pd.date_range('20181010', periods = 10)
print(a)
print("-------------")
a = pd.date_range('20181010', periods = 10, freq = 'W')
print(a)
print("-------------")
s1 = pd.Series(a)
print(s1)

DatetimeIndex(['2018-10-10', '2018-10-11', '2018-10-12', '2018-10-13',
               '2018-10-14', '2018-10-15', '2018-10-16', '2018-10-17',
               '2018-10-18', '2018-10-19'],
              dtype='datetime64[ns]', freq='D')
-------------
DatetimeIndex(['2018-10-14', '2018-10-21', '2018-10-28', '2018-11-04',
               '2018-11-11', '2018-11-18', '2018-11-25', '2018-12-02',
               '2018-12-09', '2018-12-16'],
              dtype='datetime64[ns]', freq='W-SUN')
-------------
0   2018-10-14
1   2018-10-21
2   2018-10-28
3   2018-11-04
4   2018-11-11
5   2018-11-18
6   2018-11-25
7   2018-12-02
8   2018-12-09
9   2018-12-16
dtype: datetime64[ns]


In [14]:
# 顯示‘使用datetime module‘

import pandas as pd
start = pd.datetime(3016, 3, 1)

from datetime import datetime
datetime.strptime('30160301', '%Y%m%d')

  after removing the cwd from sys.path.


datetime.datetime(3016, 3, 1, 0, 0)

In [15]:
# Practice

import pandas as pd

start = pd.to_datetime('20181010')
end = pd.to_datetime('20181225')
a = pd.date_range(start, end)
b = pd.bdate_range(start, end)
c = pd.date_range(start, end, freq = 'M')
c

DatetimeIndex(['2018-10-31', '2018-11-30'], dtype='datetime64[ns]', freq='M')

In [16]:
# Practice

import pandas as pd
pd.date_range(start = '20210101', end = '20211231', freq = 'M')

DatetimeIndex(['2021-01-31', '2021-02-28', '2021-03-31', '2021-04-30',
               '2021-05-31', '2021-06-30', '2021-07-31', '2021-08-31',
               '2021-09-30', '2021-10-31', '2021-11-30', '2021-12-31'],
              dtype='datetime64[ns]', freq='M')

## HW：開啟檔案計算各種資訊
#### 開啟aapl.csv，列出收盤價與成交量，加權平均代表以成交量作為權重

In [18]:
import pandas as pd

aapl = pd.read_csv('HW13__Data.csv')
aapl

Unnamed: 0,Date,Open,High,Low,Close,Volume,Adj Close
0,2008-10-14,116.26,116.40,103.14,104.08,70749800,104.08
1,2008-10-13,104.55,110.53,101.02,110.26,54967000,110.26
2,2008-10-10,85.70,100.00,85.00,96.80,79260700,96.80
3,2008-10-09,93.35,95.80,86.60,88.74,57763700,88.74
4,2008-10-08,85.91,96.33,85.68,89.79,78847900,89.79
...,...,...,...,...,...,...,...
6076,1984-09-13,27.50,27.62,27.50,27.50,7429600,3.14
6077,1984-09-12,26.87,27.00,26.12,26.12,4773600,2.98
6078,1984-09-11,26.62,27.37,26.62,26.87,5444000,3.07
6079,1984-09-10,26.50,26.62,25.87,26.37,2346400,3.01


In [19]:
# 得出最大值、最小值、平均數、中位數

aapl[['Close']].describe()

Unnamed: 0,Close
count,6081.0
mean,46.798619
std,33.947235
min,12.94
25%,24.69
50%,38.13
75%,53.61
max,199.83


In [20]:
# 加權平均使用numpy執行

import numpy as np

np.average(a = aapl['Close'], weights = aapl['Volume'])

63.11373661448603