## Reference

[1] [超簡單台股每日爬蟲教學](https://www.finlab.tw/%E8%B6%85%E7%B0%A1%E5%96%AE%E5%8F%B0%E8%82%A1%E6%AF%8F%E6%97%A5%E7%88%AC%E8%9F%B2%E6%95%99%E5%AD%B8/)

***

# Part I: Get the Taiwan Stock Daily Dataset

 - 到台灣證券交易所抓取資料
 - 可以利用已經寫好的套件庫去抓取。

----
 - [臺灣證券交易所](https://www.twse.com.tw/zh/)
 - [twstock套件](https://github.com/mlouielu/twstock)
 

In [1]:
# loading package

import requests
import pandas as pd
import io
import re
import time
import datetime


In [201]:
## data from TWSE
page_url = 'http://www.twse.com.tw/exchangeReport/MI_INDEX?response=csv&date=' + '20190704' + '&type=ALLBUT0999'
page = requests.get(page_url)
use_text = page.text.splitlines()

In [115]:
use_text

['"108年07月04日 大盤統計資訊"',
 '"指數","收盤指數","漲跌(+/-)","漲跌點數","漲跌百分比(%)",',
 '"寶島股價指數","12,340.56","+","38.83","0.32",',
 '"發行量加權股價指數","10,775.90","+","32.13","0.30",',
 '"臺灣公司治理100指數","6,066.85","+","14.85","0.25",',
 '"臺灣50指數","8,034.43","+","26.22","0.33",',
 '"臺灣中型100指數","8,042.26","+","37.19","0.46",',
 '"臺灣資訊科技指數","10,341.46","+","57.15","0.56",',
 '"臺灣發達指數","8,403.95","+","19.94","0.24",',
 '"臺灣高股息指數","6,272.83","+","10.28","0.16",',
 '"臺灣永續指數","5,219.55","+","16.43","0.32",',
 '"臺灣就業99指數","6,357.14","+","16.02","0.25",',
 '"臺灣高薪100指數","5,999.15","-","2.55","-0.04",',
 '"未含金融指數","9,034.51","+","31.02","0.34",',
 '"未含電子指數","15,214.17","+","29.64","0.20",',
 '"未含金融電子指數","12,822.76","+","34.15","0.27",',
 '"小型股300指數","6,518.18","+","27.92","0.43",',
 '"漲升股利150指數","6,140.52","+","13.88","0.23",',
 '"漲升股利100指數","6,139.02","+","7.62","0.12",',
 '"藍籌30指數","5,696.67","+","8.10","0.14",',
 '"工業菁英30指數","5,913.31","+","5.43","0.09",',
 '"電子菁英30指數","5,768.84","+","1.00","0.02",',
 '"低波動股利精選30指數","

In [116]:
# example of enumerate
for i, data in enumerate([2,4,6]):
    print(i)
    print(data)

0
2
1
4
2
6


In [117]:
for i, text in enumerate(use_text):
    # search correct row data at i_th row with text
    if text == '"證券代號","證券名稱","成交股數","成交筆數","成交金額","開盤價","最高價","最低價","收盤價","漲跌(+/-)","漲跌價差","最後揭示買價","最後揭示買量","最後揭示賣價","最後揭示賣量","本益比",':
        initial_point = i
        

In [118]:
initial_point

170

In [119]:
use_text[initial_point]

'"證券代號","證券名稱","成交股數","成交筆數","成交金額","開盤價","最高價","最低價","收盤價","漲跌(+/-)","漲跌價差","最後揭示買價","最後揭示買量","最後揭示賣價","最後揭示賣量","本益比",'

In [120]:
# after row 170 dataset
use_text[170:]

['"證券代號","證券名稱","成交股數","成交筆數","成交金額","開盤價","最高價","最低價","收盤價","漲跌(+/-)","漲跌價差","最後揭示買價","最後揭示買量","最後揭示賣價","最後揭示賣量","本益比",',
 '="0050","元大台灣50","5,777,556","1,447","473,795,392","81.80","82.20","81.80","82.00","+","0.50","82.00","372","82.05","24","0.00",',
 '="0051","元大中型100","7,000","7","228,750","32.55","32.70","32.55","32.70","+","0.15","32.60","4","32.70","3","0.00",',
 '="0052","富邦科技","103,100","8","5,592,954","54.20","54.25","54.10","54.25","+","0.35","54.10","55","54.35","55","0.00",',
 '="0053","元大電子","4,000","4","139,730","34.94","34.94","34.92","34.92","+","0.31","34.68","30","34.83","2","0.00",',
 '="0054","元大台商50","3,000","3","67,350","22.45","22.45","22.45","22.45","+","0.01","22.46","45","22.61","10","0.00",',
 '="0055","元大MSCI金融","16,164","13","296,642","18.35","18.38","18.30","18.37","+","0.16","18.33","30","18.36","8","0.00",',
 '="0056","元大高股息","4,262,698","1,839","114,828,853","26.79","26.99","26.79","26.93","+","0.15","26.92","816","26.93","127","0.00",',
 '="0057","

In [121]:
# step 1: add "\n"
# step 2: create a sheet

[text[:-1] + '\n'  for text in use_text[initial_point:]]
#format: title + \n + dataset

['"證券代號","證券名稱","成交股數","成交筆數","成交金額","開盤價","最高價","最低價","收盤價","漲跌(+/-)","漲跌價差","最後揭示買價","最後揭示買量","最後揭示賣價","最後揭示賣量","本益比"\n',
 '="0050","元大台灣50","5,777,556","1,447","473,795,392","81.80","82.20","81.80","82.00","+","0.50","82.00","372","82.05","24","0.00"\n',
 '="0051","元大中型100","7,000","7","228,750","32.55","32.70","32.55","32.70","+","0.15","32.60","4","32.70","3","0.00"\n',
 '="0052","富邦科技","103,100","8","5,592,954","54.20","54.25","54.10","54.25","+","0.35","54.10","55","54.35","55","0.00"\n',
 '="0053","元大電子","4,000","4","139,730","34.94","34.94","34.92","34.92","+","0.31","34.68","30","34.83","2","0.00"\n',
 '="0054","元大台商50","3,000","3","67,350","22.45","22.45","22.45","22.45","+","0.01","22.46","45","22.61","10","0.00"\n',
 '="0055","元大MSCI金融","16,164","13","296,642","18.35","18.38","18.30","18.37","+","0.16","18.33","30","18.36","8","0.00"\n',
 '="0056","元大高股息","4,262,698","1,839","114,828,853","26.79","26.99","26.79","26.93","+","0.15","26.92","816","26.93","127","0.00"\n',
 '=

In [122]:
# step 3: from string to io
io.StringIO(''.join([text[:-1] + '\n' for text in use_text[initial_point:]]))

<_io.StringIO at 0x116b74678>

In [123]:
# step 4: by ',' dataset use 'pd.read_csv' open to sheet
pd.read_csv(io.StringIO(''.join([text[:-1] + '\n' for text in use_text[initial_point:]])))

# join is linkage the string

Unnamed: 0,證券代號,證券名稱,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價,漲跌(+/-),漲跌價差,最後揭示買價,最後揭示買量,最後揭示賣價,最後揭示賣量,本益比
0,"=""0050""",元大台灣50,5777556,1447,473795392,81.80,82.20,81.80,82.00,+,0.50,82.00,372,82.05,24,0.00
1,"=""0051""",元大中型100,7000,7,228750,32.55,32.70,32.55,32.70,+,0.15,32.60,4,32.70,3,0.00
2,"=""0052""",富邦科技,103100,8,5592954,54.20,54.25,54.10,54.25,+,0.35,54.10,55,54.35,55,0.00
3,"=""0053""",元大電子,4000,4,139730,34.94,34.94,34.92,34.92,+,0.31,34.68,30,34.83,2,0.00
4,"=""0054""",元大台商50,3000,3,67350,22.45,22.45,22.45,22.45,+,0.01,22.46,45,22.61,10,0.00
5,"=""0055""",元大MSCI金融,16164,13,296642,18.35,18.38,18.30,18.37,+,0.16,18.33,30,18.36,8,0.00
6,"=""0056""",元大高股息,4262698,1839,114828853,26.79,26.99,26.79,26.93,+,0.15,26.92,816,26.93,127,0.00
7,"=""0057""",富邦摩台,21000,3,1087750,51.75,51.80,51.75,51.80,+,0.15,51.70,152,51.90,40,0.00
8,"=""0058""",富邦發達,20000,2,937200,46.86,46.86,46.86,46.86,+,0.09,46.81,1,46.93,20,0.00
9,"=""0059""",富邦金融,3000,3,140230,46.67,46.78,46.67,46.78,+,0.34,46.80,20,46.89,20,0.00


In [124]:
# step 5: 證券代號 ="0050" -> replace -> 0050

test_df = pd.read_csv(io.StringIO(''.join([text[:-1] + '\n' for text in use_text[initial_point:]])))
test_df['證券代號'] = test_df['證券代號'].apply(lambda x:x.replace('"',''))
test_df['證券代號'] = test_df['證券代號'].apply(lambda x:x.replace('=',''))


In [125]:
test_df

Unnamed: 0,證券代號,證券名稱,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價,漲跌(+/-),漲跌價差,最後揭示買價,最後揭示買量,最後揭示賣價,最後揭示賣量,本益比
0,0050,元大台灣50,5777556,1447,473795392,81.80,82.20,81.80,82.00,+,0.50,82.00,372,82.05,24,0.00
1,0051,元大中型100,7000,7,228750,32.55,32.70,32.55,32.70,+,0.15,32.60,4,32.70,3,0.00
2,0052,富邦科技,103100,8,5592954,54.20,54.25,54.10,54.25,+,0.35,54.10,55,54.35,55,0.00
3,0053,元大電子,4000,4,139730,34.94,34.94,34.92,34.92,+,0.31,34.68,30,34.83,2,0.00
4,0054,元大台商50,3000,3,67350,22.45,22.45,22.45,22.45,+,0.01,22.46,45,22.61,10,0.00
5,0055,元大MSCI金融,16164,13,296642,18.35,18.38,18.30,18.37,+,0.16,18.33,30,18.36,8,0.00
6,0056,元大高股息,4262698,1839,114828853,26.79,26.99,26.79,26.93,+,0.15,26.92,816,26.93,127,0.00
7,0057,富邦摩台,21000,3,1087750,51.75,51.80,51.75,51.80,+,0.15,51.70,152,51.90,40,0.00
8,0058,富邦發達,20000,2,937200,46.86,46.86,46.86,46.86,+,0.09,46.81,1,46.93,20,0.00
9,0059,富邦金融,3000,3,140230,46.67,46.78,46.67,46.78,+,0.34,46.80,20,46.89,20,0.00


In [126]:
# step 1: loading time package
import datetime
import time

In [127]:
# step 2: now time
datetime.datetime.now()

datetime.datetime(2019, 7, 4, 14, 37, 14, 862964)

In [128]:
# step 3: now time transfer to string
str(datetime.datetime.now())

'2019-07-04 14:37:15.952038'

In [129]:
# step 4-1: string to split the front date
str(datetime.datetime.now()).split(' ')

['2019-07-04', '14:37:18.232310']

In [130]:
# step 4-1: string to split the front date
print(str(datetime.datetime.now()).split(' ')[0]) # choose index 0
print(str(datetime.datetime.now()).split(' ')[1]) # choose index 1

2019-07-04
14:37:19.985807


In [131]:
# step 5: So, we try again to remove the '-'.
str(datetime.datetime.now()).split(' ')[0].split('-')

['2019', '07', '04']

In [132]:
# step 6: Then combine to string
''.join(str(datetime.datetime.now()).split(' ')[0].split('-'))

'20190704'

In [2]:
# define a function

def crawler(date_time):
    page_url = 'http://www.twse.com.tw/exchangeReport/MI_INDEX?response=csv&date=' + date_time +'&type=ALLBUT0999'
    page = requests.get(page_url)
    use_text = page.text.splitlines()
    for i,text in enumerate(use_text):
        # search correct row data at i_th row with text
        if text == '"證券代號","證券名稱","成交股數","成交筆數","成交金額","開盤價","最高價","最低價","收盤價","漲跌(+/-)","漲跌價差","最後揭示買價","最後揭示買量","最後揭示賣價","最後揭示賣量","本益比",':
            initial_point = i
            break
    test_df = pd.read_csv(io.StringIO(''.join([text[:-1] + '\n' for text in use_text[initial_point:]])))
    test_df['證券代號'] = test_df['證券代號'].apply(lambda x:x.replace('"',''))
    test_df['證券代號'] = test_df['證券代號'].apply(lambda x:x.replace('=',''))
    return test_df

In [3]:
# get date_time

def trans_date(date_time):
    return ''.join(str(date_time).split(' ')[0].split('-'))


def parse_n_day(start_date, n):
    df_dict = {}
    now_date = start_date
    
    for i in range(n):
        time.sleep(5)
        now_date = now_date - datetime.timedelta(days = 1) # minus 1 days (period for 1 day)
        
        try:
            df = crawler( trans_date( now_date ) )
            print("Current date: " + trans_date(now_date))
            df_dict.update({trans_date(now_date):df}) # save the dataset
            print('Successful!!')
        except:
            print('Fails at: ' + str(now_date))
            
    return df_dict


In [4]:
result_dict = parse_n_day(datetime.datetime.now(), 60)

Current date: 20190703
Successful!!
Current date: 20190702
Successful!!
Current date: 20190701
Successful!!
Fails at: 2019-06-30 22:31:50.067342
Fails at: 2019-06-29 22:31:50.067342
Current date: 20190628
Successful!!
Current date: 20190627
Successful!!
Current date: 20190626
Successful!!
Current date: 20190625
Successful!!
Current date: 20190624
Successful!!
Fails at: 2019-06-23 22:31:50.067342
Fails at: 2019-06-22 22:31:50.067342
Current date: 20190621
Successful!!
Current date: 20190620
Successful!!
Current date: 20190619
Successful!!
Current date: 20190618
Successful!!
Current date: 20190617
Successful!!
Fails at: 2019-06-16 22:31:50.067342
Fails at: 2019-06-15 22:31:50.067342
Current date: 20190614
Successful!!
Current date: 20190613
Successful!!
Current date: 20190612
Successful!!
Current date: 20190611
Successful!!
Current date: 20190610
Successful!!
Fails at: 2019-06-09 22:31:50.067342
Fails at: 2019-06-08 22:31:50.067342
Fails at: 2019-06-07 22:31:50.067342
Current date: 20190

In [5]:
result_dict.keys()

dict_keys(['20190703', '20190702', '20190701', '20190628', '20190627', '20190626', '20190625', '20190624', '20190621', '20190620', '20190619', '20190618', '20190617', '20190614', '20190613', '20190612', '20190611', '20190610', '20190606', '20190605', '20190604', '20190603', '20190531', '20190530', '20190529', '20190528', '20190527', '20190524', '20190523', '20190522', '20190521', '20190520', '20190517', '20190516', '20190515', '20190514', '20190513', '20190510', '20190509', '20190508', '20190507', '20190506'])

In [6]:
result_dict['20190701']

Unnamed: 0,證券代號,證券名稱,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價,漲跌(+/-),漲跌價差,最後揭示買價,最後揭示買量,最後揭示賣價,最後揭示賣量,本益比
0,0050,元大台灣50,13771395,5806,1137417652,82.05,82.80,82.05,82.65,+,1.75,82.65,125,82.70,408,0.00
1,0051,元大中型100,48263,27,1575525,32.60,32.66,32.60,32.66,+,0.56,32.66,19,32.69,2,0.00
2,0052,富邦科技,115100,18,6320374,54.20,54.95,54.20,54.95,+,1.85,54.95,3,55.00,6,0.00
3,0053,元大電子,97000,21,3401140,34.69,35.15,34.69,35.10,+,1.09,35.03,2,35.10,1,0.00
4,0054,元大台商50,35023,17,790253,22.31,22.67,22.31,22.67,+,0.36,22.67,1,22.68,1,0.00
5,0055,元大MSCI金融,23017,15,421131,18.34,18.34,18.27,18.30,+,0.07,18.30,2,18.32,17,0.00
6,0056,元大高股息,10905721,3876,293604941,26.90,26.96,26.87,26.95,+,0.36,26.94,37,26.95,349,0.00
7,0057,富邦摩台,21000,3,1099200,52.35,52.35,52.20,52.20,+,1.00,52.15,50,52.35,41,0.00
8,0058,富邦發達,29000,7,1373450,47.35,47.38,47.30,47.38,+,0.60,47.29,50,47.42,20,0.00
9,0059,富邦金融,7000,6,326760,46.68,46.70,46.67,46.67,X,0.00,46.59,35,46.72,1,0.00


In [8]:
# output to dataset

for key in result_dict.keys():
    result_dict[key].to_csv(str(key) + '.csv')
    

## Library: twstock

In [155]:
## data from library
from twstock import Stock
stock = Stock('2330')

In [157]:
type(stock)

twstock.stock.Stock

In [159]:
# 這個物件寫好的功能
dir(stock)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_month_year_iter',
 'capacity',
 'change',
 'close',
 'continuous',
 'data',
 'date',
 'fetch',
 'fetch_31',
 'fetch_from',
 'fetcher',
 'high',
 'low',
 'ma_bias_ratio',
 'ma_bias_ratio_pivot',
 'moving_average',
 'open',
 'price',
 'raw_data',
 'sid',
 'transaction',
 'turnover']

In [163]:
stock_price_31 = stock.fetch_31()

In [164]:
type(stock_price_31)

list

In [165]:
stock_price_31

[Data(date=datetime.datetime(2019, 5, 22, 0, 0), capacity=36289034, turnover=8669172710, open=236.5, high=240.5, low=235.5, close=238.0, change=4.0, transaction=14019),
 Data(date=datetime.datetime(2019, 5, 23, 0, 0), capacity=62258627, turnover=14379543985, open=233.5, high=233.5, low=230.0, close=230.0, change=-8.0, transaction=24974),
 Data(date=datetime.datetime(2019, 5, 24, 0, 0), capacity=38226789, turnover=8866253632, open=230.0, high=234.0, low=230.0, close=233.0, change=3.0, transaction=14586),
 Data(date=datetime.datetime(2019, 5, 27, 0, 0), capacity=37447033, turnover=8697538216, open=234.0, high=235.0, low=231.0, close=231.0, change=-2.0, transaction=13895),
 Data(date=datetime.datetime(2019, 5, 28, 0, 0), capacity=99322033, turnover=22910765567, open=232.0, high=232.0, low=230.5, close=230.5, change=-0.5, transaction=10122),
 Data(date=datetime.datetime(2019, 5, 29, 0, 0), capacity=32260236, turnover=7385029780, open=228.0, high=230.5, low=227.0, close=229.5, change=-1.0, 

In [166]:
stock_price_31[0]

Data(date=datetime.datetime(2019, 5, 22, 0, 0), capacity=36289034, turnover=8669172710, open=236.5, high=240.5, low=235.5, close=238.0, change=4.0, transaction=14019)

# Part II: Practice the High Frequency Dataset

FXCM 的模擬帳戶提供高頻資料的 API 獲取外匯資料

 - Step1:開啟[免費模擬帳戶](https://www.fxcm.com/uk/forex-trading-demo/)
 - Step2:創造 API Token

In [189]:
# loading fcxm simulation account

import fxcmpy
API_key = 'df0649474ae26796b9cfaf494ad2edf5ceb09b55'
api = fxcmpy.fxcmpy(access_token = API_key, server='demo')



In [190]:
from fxcmpy import fxcmpy_tick_data_reader as tdr

In [191]:
print(tdr.get_available_symbols())

('AUDCAD', 'AUDCHF', 'AUDJPY', 'AUDNZD', 'CADCHF', 'EURAUD', 'EURCHF', 'EURGBP', 'EURJPY', 'EURUSD', 'GBPCHF', 'GBPJPY', 'GBPNZD', 'GBPUSD', 'GBPCHF', 'GBPJPY', 'GBPNZD', 'NZDCAD', 'NZDCHF', 'NZDJPY', 'NZDUSD', 'USDCAD', 'USDCHF', 'USDJPY')


In [192]:
start = datetime.datetime(2018,2,1)
end = datetime.datetime(2018,2,2)

In [193]:
print(start)
print(end)

2018-02-01 00:00:00
2018-02-02 00:00:00


In [194]:
dr = tdr('EURJPY', start, end)

In [195]:
dr.get_raw_data().info()

<class 'pandas.core.frame.DataFrame'>
Index: 2889838 entries, 01/28/2018 22:00:46.433 to 02/02/2018 21:59:00.215
Data columns (total 2 columns):
Bid    float64
Ask    float64
dtypes: float64(2)
memory usage: 66.1+ MB


In [198]:
dr.get_data().info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2889838 entries, 2018-01-28 22:00:46.433000 to 2018-02-02 21:59:00.215000
Data columns (total 2 columns):
Bid    float64
Ask    float64
dtypes: float64(2)
memory usage: 66.1 MB


In [199]:
dr.get_data().head()

Unnamed: 0,Bid,Ask
2018-01-28 22:00:46.433,135.033,135.128
2018-01-28 22:00:59.133,135.022,135.12
2018-01-28 22:01:29.177,135.034,135.124
2018-01-28 22:01:57.034,135.029,135.12
2018-01-28 22:02:41.678,135.019,135.115
