In [1]:
import requests
import datetime
import pandas as pd
from datetime import timedelta

date = datetime.datetime.now() - timedelta(days=1)
datestr = date.strftime("%Y%m%d")

res = requests.get('http://www.twse.com.tw/exchangeReport/MI_INDEX?response=json&date={}&type=ALLBUT0999'.format(datestr))

In [46]:
df = pd.DataFrame(res.json()['data9'], columns=res.json()['fields9'])
print(len(df))
df.head()

1125


Unnamed: 0,證券代號,證券名稱,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價,漲跌(+/-),漲跌價差,最後揭示買價,最後揭示買量,最後揭示賣價,最後揭示賣量,本益比
0,50,元大台灣50,5700896,6513,770243301,135.25,135.65,134.5,135.25,<p style= color:red>+</p>,0.55,135.25,51,135.3,70,0.0
1,51,元大中型100,36956,35,1752458,47.49,47.5,47.22,47.45,<p style= color:red>+</p>,0.13,47.34,27,47.45,1,0.0
2,52,富邦科技,920103,814,116518108,126.8,127.5,125.85,126.85,<p style= color:red>+</p>,1.05,126.75,17,126.85,15,0.0
3,53,元大電子,47216,40,3159293,68.8,68.8,65.65,66.2,<p style= color:red>+</p>,0.6,66.2,1,66.25,1,0.0
4,54,元大台商50,21006,9,640952,30.52,30.63,30.49,30.49,<p style= color:green>-</p>,0.05,30.51,53,30.7,3,0.0


In [47]:
up_down_dict = {}
for i in set(df['漲跌(+/-)']):
    if '-' in i:
        up_down_dict[i] = '-'
    elif '+' in i:
        up_down_dict[i] = '+'
    else:
        up_down_dict[i] = 'X'

up_down_dict

{'<p style= color:red>+</p>': '+',
 '<p> </p>': 'X',
 '<p>X</p>': 'X',
 '<p style= color:green>-</p>': '-'}

In [48]:
# 重置漲跌
df['漲跌(+/-)'] = df['漲跌(+/-)'].replace(up_down_dict)
print(len(df))
df.head()

1125


Unnamed: 0,證券代號,證券名稱,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價,漲跌(+/-),漲跌價差,最後揭示買價,最後揭示買量,最後揭示賣價,最後揭示賣量,本益比
0,50,元大台灣50,5700896,6513,770243301,135.25,135.65,134.5,135.25,+,0.55,135.25,51,135.3,70,0.0
1,51,元大中型100,36956,35,1752458,47.49,47.5,47.22,47.45,+,0.13,47.34,27,47.45,1,0.0
2,52,富邦科技,920103,814,116518108,126.8,127.5,125.85,126.85,+,1.05,126.75,17,126.85,15,0.0
3,53,元大電子,47216,40,3159293,68.8,68.8,65.65,66.2,+,0.6,66.2,1,66.25,1,0.0
4,54,元大台商50,21006,9,640952,30.52,30.63,30.49,30.49,-,0.05,30.51,53,30.7,3,0.0


In [55]:
# 刪除當天沒有開盤價的資料
df = df[df['開盤價'] != '--'].reset_index(drop=True)
print(len(df))
df.head()

1114


Unnamed: 0,證券代號,證券名稱,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價,漲跌(+/-),漲跌價差,最後揭示買價,最後揭示買量,最後揭示賣價,最後揭示賣量,本益比
0,50,元大台灣50,5700896,6513,770243301,135.25,135.65,134.5,135.25,+,0.55,135.25,51,135.3,70,0.0
1,51,元大中型100,36956,35,1752458,47.49,47.5,47.22,47.45,+,0.13,47.34,27,47.45,1,0.0
2,52,富邦科技,920103,814,116518108,126.8,127.5,125.85,126.85,+,1.05,126.75,17,126.85,15,0.0
3,53,元大電子,47216,40,3159293,68.8,68.8,65.65,66.2,+,0.6,66.2,1,66.25,1,0.0
4,54,元大台商50,21006,9,640952,30.52,30.63,30.49,30.49,-,0.05,30.51,53,30.7,3,0.0


In [56]:
# 將所有df中的元素都變成字串，並將字串中的逗號「,」刪除
df = df.astype(str)
df = df.applymap(lambda s: s.replace(',', ''))

# 將 df 證券代號變成 index
df = df.set_index('證券代號')

# 將 df 中的元素從字串變成數字
# errors='ignore' 代表遇到錯誤跳過，課程中使用 'coerce' 代表遇到錯誤賦予 NaN
df = df.apply(lambda s: pd.to_numeric(s, errors='ignore'))
df.head()

Unnamed: 0_level_0,證券名稱,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價,漲跌(+/-),漲跌價差,最後揭示買價,最後揭示買量,最後揭示賣價,最後揭示賣量,本益比
證券代號,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
50,元大台灣50,5700896,6513,770243301,135.25,135.65,134.5,135.25,+,0.55,135.25,51,135.3,70,0.0
51,元大中型100,36956,35,1752458,47.49,47.5,47.22,47.45,+,0.13,47.34,27,47.45,1,0.0
52,富邦科技,920103,814,116518108,126.8,127.5,125.85,126.85,+,1.05,126.75,17,126.85,15,0.0
53,元大電子,47216,40,3159293,68.8,68.8,65.65,66.2,+,0.6,66.2,1,66.25,1,0.0
54,元大台商50,21006,9,640952,30.52,30.63,30.49,30.49,-,0.05,30.51,53,30.7,3,0.0


In [57]:
# 要刪除沒有用的columns
# 其中 axis=1 為是說每條columns去檢查有沒有NaN
# how='all' 是說假如全部都是 NaN 則刪除該 column
# （原本的方法） df = df[df.columns[df.isnull().sum() != len(df)]]

df.dropna(axis=1, how='all', inplace=True)
print(len(df))
df.head()

1114


Unnamed: 0_level_0,證券名稱,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價,漲跌(+/-),漲跌價差,最後揭示買價,最後揭示買量,最後揭示賣價,最後揭示賣量,本益比
證券代號,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
50,元大台灣50,5700896,6513,770243301,135.25,135.65,134.5,135.25,+,0.55,135.25,51,135.3,70,0.0
51,元大中型100,36956,35,1752458,47.49,47.5,47.22,47.45,+,0.13,47.34,27,47.45,1,0.0
52,富邦科技,920103,814,116518108,126.8,127.5,125.85,126.85,+,1.05,126.75,17,126.85,15,0.0
53,元大電子,47216,40,3159293,68.8,68.8,65.65,66.2,+,0.6,66.2,1,66.25,1,0.0
54,元大台商50,21006,9,640952,30.52,30.63,30.49,30.49,-,0.05,30.51,53,30.7,3,0.0


In [58]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1114 entries, 0050 to 9958
Data columns (total 15 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   證券名稱     1114 non-null   object 
 1   成交股數     1114 non-null   int64  
 2   成交筆數     1114 non-null   int64  
 3   成交金額     1114 non-null   int64  
 4   開盤價      1114 non-null   float64
 5   最高價      1114 non-null   float64
 6   最低價      1114 non-null   float64
 7   收盤價      1114 non-null   float64
 8   漲跌(+/-)  1114 non-null   object 
 9   漲跌價差     1114 non-null   float64
 10  最後揭示買價   1114 non-null   object 
 11  最後揭示買量   1114 non-null   int64  
 12  最後揭示賣價   1114 non-null   object 
 13  最後揭示賣量   1114 non-null   int64  
 14  本益比      1114 non-null   float64
dtypes: float64(6), int64(5), object(4)
memory usage: 139.2+ KB


In [59]:
df[df['收盤價']/df['開盤價'] > 1.05]

Unnamed: 0_level_0,證券名稱,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價,漲跌(+/-),漲跌價差,最後揭示買價,最後揭示買量,最後揭示賣價,最後揭示賣量,本益比
證券代號,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2331,精英,85891844,30674,2933490058,32.35,35.5,31.95,34.5,+,2.2,34.5,316,34.55,12,90.79
2338,光罩,101380210,36094,5410510525,51.6,54.2,50.3,54.2,+,4.85,54.0,2,54.20,473,35.89
2465,麗臺,8235693,4244,255494756,29.5,32.85,28.8,32.8,+,2.9,32.75,4,32.80,58,0.0
2477,美隆電,8463705,3514,298881127,32.65,35.5,32.55,35.5,+,3.2,35.5,126,--,0,25.18
2727,王品,5672459,4597,1012092665,171.5,183.0,169.0,180.5,+,10.0,180.0,57,180.50,2,39.41
3545,敦泰,73160232,41135,8686023140,115.5,123.0,114.0,123.0,+,11.0,122.5,16,123.00,1903,50.83
4571,鈞興-KY,736503,614,131911177,174.5,184.5,174.5,184.5,+,11.5,184.5,6,185.00,13,37.58
6243,迅杰,4565772,1928,81969049,17.2,18.75,16.9,18.4,+,0.9,18.3,3,18.40,60,0.0
8482,商億-KY,388001,239,32839889,82.0,89.2,81.8,89.2,+,8.1,89.2,22,--,0,14.5
