## 通过sina获取实时交易数据

### 利用爬虫库

In [1]:
from urllib.request import urlopen
import pandas as pd
pd.set_option('expand_frame_repr', False) # 列不换行
pd.set_option('display.max_rows', 5000) # 最多显示行数

# 返回数据网址
# 获取代码网址 http://hq.sinajs.cn/list=sz000001,sh000001
# 正常网址 http://finance.sina.com.cn/realstock/company/sh600000/nc.shtml

### 构建网址

In [2]:
# 含退市，停牌，新股
stock_code_list = ['sh600000', 'sz000002', 'sh600002', 'sh600003', 'sh600610', 'sh600145', 'sh603982']
url = 'http://hq.sinajs.cn/list=' + ','.join(stock_code_list)
print(url)

http://hq.sinajs.cn/list=sh600000,sz000002,sh600002,sh600003,sh600610,sh600145,sh603982


### 抓取数据

In [3]:
content = urlopen(url).read().decode('gbk')
print(type(content))
print(content)

<class 'str'>
var hq_str_sh600000="浦发银行,11.320,11.320,11.160,11.340,11.120,11.150,11.160,40000391,447828812.000,93641,11.150,52600,11.140,198185,11.130,596402,11.120,787000,11.110,4795,11.160,74250,11.170,103072,11.180,73300,11.190,137473,11.200,2019-05-22,15:00:01,00";
var hq_str_sz000002="万 科Ａ,27.400,27.520,27.360,27.590,27.100,27.360,27.370,20464814,560169485.340,12322,27.360,69500,27.350,14170,27.340,15148,27.330,15400,27.320,33400,27.370,1300,27.380,14500,27.390,60226,27.400,34200,27.410,2019-05-22,15:00:03,00";
var hq_str_sh600002="齐鲁石化,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,2019-05-10,11:45:01,-3";
var hq_str_sh600003="ST东北高,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,2019-05-10,11:45:01,-3";
var hq_str_sh600610="*ST毅达,0.000,3.260,3.260,0.000,0.000,0.000,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.0

### 整理数据

In [4]:
content = content.strip() # 去除文本前后空格、回车等
data_line = content.split('\n') # 通过回车划分数据
print(data_line)

['var hq_str_sh600000="浦发银行,11.320,11.320,11.160,11.340,11.120,11.150,11.160,40000391,447828812.000,93641,11.150,52600,11.140,198185,11.130,596402,11.120,787000,11.110,4795,11.160,74250,11.170,103072,11.180,73300,11.190,137473,11.200,2019-05-22,15:00:01,00";', 'var hq_str_sz000002="万 科Ａ,27.400,27.520,27.360,27.590,27.100,27.360,27.370,20464814,560169485.340,12322,27.360,69500,27.350,14170,27.340,15148,27.330,15400,27.320,33400,27.370,1300,27.380,14500,27.390,60226,27.400,34200,27.410,2019-05-22,15:00:03,00";', 'var hq_str_sh600002="齐鲁石化,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,2019-05-10,11:45:01,-3";', 'var hq_str_sh600003="ST东北高,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,2019-05-10,11:45:01,-3";', 'var hq_str_sh600610="*ST毅达,0.000,3.260,3.260,0.000,0.000,0.000,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.000,0,0.0

In [5]:
data_line = [data.replace('var hq_str_', '').split(',') for data in data_line] # 去除无意义文字，对每个列表再细分
df = pd.DataFrame(data_line, dtype='float') # 将数字识别出来
print(df)

                0      1      2      3      4      5      6      7           8             9   ...     23        24     25       26     27        28     29          30        31    32
0   sh600000="浦发银行  11.32  11.32  11.16  11.34  11.12  11.15  11.16  40000391.0  4.478288e+08  ...  11.17  103072.0  11.18  73300.0  11.19  137473.0  11.20  2019-05-22  15:00:01  00";
1   sz000002="万 科Ａ  27.40  27.52  27.36  27.59  27.10  27.36  27.37  20464814.0  5.601695e+08  ...  27.38   14500.0  27.39  60226.0  27.40   34200.0  27.41  2019-05-22  15:00:03  00";
2   sh600002="齐鲁石化   0.00   0.00   0.00   0.00   0.00   0.00   0.00         0.0  0.000000e+00  ...   0.00       0.0   0.00      0.0   0.00       0.0   0.00  2019-05-10  11:45:01  -3";
3  sh600003="ST东北高   0.00   0.00   0.00   0.00   0.00   0.00   0.00         0.0  0.000000e+00  ...   0.00       0.0   0.00      0.0   0.00       0.0   0.00  2019-05-10  11:45:01  -3";
4  sh600610="*ST毅达   0.00   3.26   3.26   0.00   0.00   0.00   0.00         0.0 

### 细整理数据，赋予列名

In [6]:
df[0] = df[0].str.split('="')
df['stock_code'] = df[0].str[0].str.strip()
df['stock_name'] = df[0].str[-1].str.strip()
df['candle_end_time'] = df[30] + ' ' + df[31]
df['candle_end_time'] = pd.to_datetime(df['candle_end_time'])

print(df)

                   0      1      2      3      4      5      6      7           8             9  ...       26     27        28     29          30        31    32  stock_code  stock_name     candle_end_time
0   [sh600000, 浦发银行]  11.32  11.32  11.16  11.34  11.12  11.15  11.16  40000391.0  4.478288e+08  ...  73300.0  11.19  137473.0  11.20  2019-05-22  15:00:01  00";    sh600000        浦发银行 2019-05-22 15:00:01
1   [sz000002, 万 科Ａ]  27.40  27.52  27.36  27.59  27.10  27.36  27.37  20464814.0  5.601695e+08  ...  60226.0  27.40   34200.0  27.41  2019-05-22  15:00:03  00";    sz000002        万 科Ａ 2019-05-22 15:00:03
2   [sh600002, 齐鲁石化]   0.00   0.00   0.00   0.00   0.00   0.00   0.00         0.0  0.000000e+00  ...      0.0   0.00       0.0   0.00  2019-05-10  11:45:01  -3";    sh600002        齐鲁石化 2019-05-10 11:45:01
3  [sh600003, ST东北高]   0.00   0.00   0.00   0.00   0.00   0.00   0.00         0.0  0.000000e+00  ...      0.0   0.00       0.0   0.00  2019-05-10  11:45:01  -3";    sh600003   

In [7]:
rename_dict = {1:'open', 2:'pre_close', 3:'close', 4:'high', 5:'low', 6:'buy1', 7:'sell1',
              8:'amount', 9:'volume', 32:'status'}
df.rename(columns=rename_dict, inplace=True)
df['status'] = df['status'].str.strip('";')

print(df)

                   0   open  pre_close  close   high    low   buy1  sell1      amount        volume  ...       26     27        28     29          30        31  status  stock_code  stock_name     candle_end_time
0   [sh600000, 浦发银行]  11.32      11.32  11.16  11.34  11.12  11.15  11.16  40000391.0  4.478288e+08  ...  73300.0  11.19  137473.0  11.20  2019-05-22  15:00:01      00    sh600000        浦发银行 2019-05-22 15:00:01
1   [sz000002, 万 科Ａ]  27.40      27.52  27.36  27.59  27.10  27.36  27.37  20464814.0  5.601695e+08  ...  60226.0  27.40   34200.0  27.41  2019-05-22  15:00:03      00    sz000002        万 科Ａ 2019-05-22 15:00:03
2   [sh600002, 齐鲁石化]   0.00       0.00   0.00   0.00   0.00   0.00   0.00         0.0  0.000000e+00  ...      0.0   0.00       0.0   0.00  2019-05-10  11:45:01      -3    sh600002        齐鲁石化 2019-05-10 11:45:01
3  [sh600003, ST东北高]   0.00       0.00   0.00   0.00   0.00   0.00   0.00         0.0  0.000000e+00  ...      0.0   0.00       0.0   0.00  2019-05-10  1

In [8]:
df = df[['stock_code', 'stock_name', 'candle_end_time', 'open', 'high', 'low', 'close', 'pre_close', 
         'amount', 'volume', 'buy1', 'sell1', 'status']]
print(df)

  stock_code stock_name     candle_end_time   open   high    low  close  pre_close      amount        volume   buy1  sell1 status
0   sh600000       浦发银行 2019-05-22 15:00:01  11.32  11.34  11.12  11.16      11.32  40000391.0  4.478288e+08  11.15  11.16     00
1   sz000002       万 科Ａ 2019-05-22 15:00:03  27.40  27.59  27.10  27.36      27.52  20464814.0  5.601695e+08  27.36  27.37     00
2   sh600002       齐鲁石化 2019-05-10 11:45:01   0.00   0.00   0.00   0.00       0.00         0.0  0.000000e+00   0.00   0.00     -3
3   sh600003      ST东北高 2019-05-10 11:45:01   0.00   0.00   0.00   0.00       0.00         0.0  0.000000e+00   0.00   0.00     -3
4   sh600610      *ST毅达 2019-05-22 15:29:59   0.00   0.00   0.00   3.26       3.26         0.0  0.000000e+00   0.00   0.00     03
5   sh600145      *ST新亿 2019-05-22 15:29:59   0.00   0.00   0.00   1.87       1.87         0.0  0.000000e+00   0.00   0.00     03
6   sh603982        N泉峰 2019-05-22 15:00:01  11.75  14.10  11.75  14.10       9.79     834

### 备注
退市、停牌：开盘价为0 df[['open'] - 0 < 0.000001]

正常：status = 00

退市：status = -3, pre_close == 0

停牌：status = 03, pre_close != 0


新上市：pre_close为发行价

除权： pre_close为除权后的收盘价