## 从雅虎财经网站获取股票数据

In [2]:
import urllib
import os
import datetime
import pandas as pd
import numpy as np

In [2]:
# 下载整个股票数据
def retrive_stock_data(stockid, folder):
    print('downloading %s to %s' % (stockid, folder))
    url = 'http://table.finance.yahoo.com/table.csv?s=%s' % (stockid)
    fname = os.path.join(folder, '%s.csv' % stockid.split('.')[0])
    if not os.path.isdir(folder):
        os.mkdir(folder)
    urllib.urlretrieve(url, fname)

In [3]:
retrive_stock_data('600690.ss', 'yahoo-data')

downloading 600690.ss to yahoo-data


In [32]:
# 更新股票数据，如果不存在，则下载。如果存在，则只更新最近日期的数据
def update_stock_data(stockid, folder):
    fname = os.path.join(folder, '%s.csv' % stockid.split('.')[0])
    if not os.path.exists(fname):
        retrive_stock_data(stockid, folder)
        return
    
    data = pd.read_csv(fname, index_col='Date', parse_dates=True)
    
    last_date = data.iloc[0:1].index.tolist()[0]
    today = pd.Timestamp(datetime.date.today())
    if today - last_date < pd.Timedelta(days=2):
        print('Nothing to update. %s last date is %s.' % (stockid, last_date))
        return
    
    print('updatting %s to from %s to %s' % (stockid, last_date.date(), today.date()))
    query = [
        ('a', last_date.month - 1),
        ('b', last_date.day),
        ('c', last_date.year),
        ('d', today.month - 1),
        ('e', today.day),
        ('f', today.year),
        ('s', stockid),
    ]
    url = 'http://table.finance.yahoo.com/table.csv?%s' % urllib.urlencode(query)
    temp_file = fname + '.tmp'
    urllib.urlretrieve(url, temp_file)
    update_data = pd.read_csv(temp_file, index_col='Date', parse_dates=True)
    data = data.append(update_data)
    data.sort_index(ascending=False, inplace=True)
    data.to_csv(fname, mode='w')
    os.unlink(temp_file)

In [33]:
update_stock_data('600690.ss', 'yahoo-data')

Nothing to update. 600690.ss last date is 2015-12-22 00:00:00.


### 获取所有的股票列表

In [41]:
sh = pd.read_csv('SH.txt', header=None, names=['name', 'id'], dtype={'id': np.string0}, skipinitialspace=True)
sh['id'] = sh['id'] + '.ss'
sh.head()

Unnamed: 0,name,id
0,浦发银行,600000.ss
1,邯郸钢铁,600001.ss
2,齐鲁石化,600002.ss
3,ST东北高,600003.ss
4,白云机场,600004.ss


In [42]:
sz = pd.read_csv('SZ.txt', header=None, names=['name', 'id'], dtype={'id': np.string0}, skipinitialspace=True)
sz['id']= sz['id'] + '.sz'
sz.head()

Unnamed: 0,name,id
0,平安银行,000001.sz
1,万科A,000002.sz
2,PT金田A,000003.sz
3,国农科技,000004.sz
4,世纪星源,000005.sz


In [43]:
slist = pd.concat([sh, sz])
slist.head()['id'].values

array(['600000.ss', '600001.ss', '600002.ss', '600003.ss', '600004.ss'], dtype=object)

In [47]:
slist[slist['id'].str.startswith('002')].head()

Unnamed: 0,name,id
509,新和成,002001.sz
510,鸿达兴业,002002.sz
511,伟星股份,002003.sz
512,华邦健康,002004.sz
513,德豪润达,002005.sz


## 数据分析

In [64]:
import stock_analysis as sa

amp = sa.amplitude(interval=10)

head 5 recent amplitude in period of 10 for all stocks in yahoo-data till 2015-12-25 00:02:31.553774:
         id       amp
453  002468  1.610959
529  002545  1.516767
639  002655  1.462879
201  002202  1.441441
285  002287  1.418274
tail 5 recent ripples in period of 10 for all stocks in yahoo-data till 2015-12-25 00:02:31.553774:
         id       amp
651  002667 -1.214078
739  002761 -1.222222
542  002558 -1.268108
69   002070 -1.291734
175  002176 -1.431384


In [66]:
amp[amp['amp'] < 1.3]

Unnamed: 0,id,amp
646,002662,1.296159
88,002089,1.295930
613,002629,1.294355
18,002019,1.290202
223,002224,1.288203
199,002200,1.287500
434,002449,1.287404
743,002766,1.287093
518,002534,1.282877
412,002427,1.282583
