In [1]:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import tushare as ts
import warnings;warnings.simplefilter('ignore')

### 1 简单金融应用和分析

In [None]:
hs300 = ts.get_k_data('hs300',start='2015-01-01',end='2017-06-30')

In [None]:
hs300.set_index('date',inplace=True)
hs300.head()

In [None]:
hs300['close'].plot(figsize=(8,5),grid=True,title='HS300 Close Price')

In [None]:
# 计算每天的连续收益，必背！！！
hs300['return'] = np.log(hs300['close']/hs300['close'].shift(1))

In [None]:
hs300[['close','return']].plot(subplots=True,figsize=(10,8),grid=True)

In [None]:
#计算股价的移动平均，必背！！！
hs300['SMA20'] = hs300['close'].rolling(window=20).mean()
hs300.tail()
# rolling(windoe=xx) 滚动xx个窗口 https://baijiahao.baidu.com/s?id=1622798772654712959&wfr=spider&for=pc

In [None]:
#用talib计算移动平均，talib是一个技术分析的package
import talib as ta
hs300['SMA20_talib'] = ta.SMA(np.array(hs300['close']),20)

In [None]:
hs300[['close','SMA20']].plot(figsize=(8,6))

In [None]:
hs300['SMA60']=hs300['close'].rolling(window=60,min_periods=0).mean()
hs300[['close','SMA60']].plot(figsize=(8,6))
# min_periods=0即最小移动为0天，可以补全前几个空值

In [None]:
import math
hs300['Mov_Vol'] = hs300['return'].rolling(window=252,min_periods=0).std()* math.sqrt(252)
hs300.head()

In [None]:
hs300[['close','Mov_Vol','return']].plot(figsize=(8,6),subplots=True,grid=True)

### 2 爬取tushare数据并进行选股条件分析

从估值角度选取股票，即在行业分类汇总的基础上，选取被低估的股票（低PE，PB)

tushare官网：http://tushare.org/index.html

In [2]:
hs300 = ts.get_hs300s() #获得hs300的股票列表
hs300

Unnamed: 0,date,code,name,weight
0,2020-07-31,600000,浦发银行,0.73
1,2020-07-31,600004,白云机场,0.09
2,2020-07-31,600009,上海机场,0.39
3,2020-07-31,600010,包钢股份,0.16
4,2020-07-31,600011,华能国际,0.12
...,...,...,...,...
295,2020-07-31,300413,芒果超媒,0.29
296,2020-07-31,300433,蓝思科技,0.26
297,2020-07-31,300498,温氏股份,0.64
298,2020-07-31,300601,康泰生物,0.45


In [3]:
hs300 = hs300['code'].tolist() #将series/df转化成list
hs300

['600000',
 '600004',
 '600009',
 '600010',
 '600011',
 '600015',
 '600016',
 '600018',
 '600019',
 '600025',
 '600027',
 '600028',
 '600029',
 '600030',
 '600031',
 '600036',
 '600038',
 '600048',
 '600050',
 '600061',
 '600066',
 '600068',
 '600085',
 '600089',
 '600104',
 '600109',
 '600111',
 '600115',
 '600118',
 '600170',
 '600176',
 '600177',
 '600183',
 '600188',
 '600196',
 '600208',
 '600219',
 '600221',
 '600233',
 '600271',
 '600276',
 '600297',
 '600299',
 '600309',
 '600332',
 '600340',
 '600346',
 '600352',
 '600362',
 '600369',
 '600372',
 '600383',
 '600390',
 '600398',
 '600406',
 '600436',
 '600438',
 '600482',
 '600487',
 '600489',
 '600498',
 '600516',
 '600519',
 '600522',
 '600547',
 '600570',
 '600583',
 '600585',
 '600588',
 '600606',
 '600637',
 '600655',
 '600660',
 '600674',
 '600690',
 '600703',
 '600705',
 '600741',
 '600745',
 '600760',
 '600795',
 '600809',
 '600837',
 '600848',
 '600867',
 '600886',
 '600887',
 '600893',
 '600900',
 '600919',
 '600926',

In [4]:
#获取基本面信息
stock_basics = ts.get_stock_basics()
stock_basics

Unnamed: 0_level_0,name,industry,area,pe,outstanding,totals,totalAssets,liquidAssets,fixedAssets,reserved,...,bvps,pb,timeToMarket,undp,perundp,rev,profit,gpr,npr,holders
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
688556,N高测,专用机械,山东,118.78,0.37,1.62,13.33,10.17,1.72,1.38,...,5.92,8.29,20200807,1.54,0.95,0.00,0.00,37.65,9.37,34814.0
605066,N天正,电气设备,浙江,31.50,0.71,4.01,21.50,16.49,3.17,0.66,...,3.94,3.66,20200807,4.41,1.10,0.00,0.00,29.48,8.69,72150.0
688011,新光光电,元器件,黑龙江,0.00,0.41,1.00,13.16,11.43,1.08,10.23,...,12.22,4.14,20190722,0.88,0.88,-49.58,-127.55,54.53,-11.86,9134.0
600480,凌云股份,汽车配件,河北,0.00,7.63,7.65,141.48,79.21,29.18,24.96,...,5.91,1.54,20030815,13.18,1.72,-21.58,-318.50,15.31,-4.83,40013.0
002786,银宝山新,专用机械,深圳,455.84,3.80,3.81,46.61,31.93,9.24,1.74,...,2.34,4.07,20151223,2.80,0.73,28.43,105.66,14.81,0.22,31945.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
688185,康希诺,生物制药,天津,0.00,0.00,0.00,17.36,7.30,0.83,16.20,...,0.00,0.00,0,-3.91,0.00,0.00,0.00,91.49,-573.33,0.0
688229,博睿数据,软件服务,北京,0.00,0.00,0.00,2.34,2.26,0.04,0.05,...,0.00,0.00,0,1.50,0.00,0.00,0.00,74.73,12.33,0.0
688286,敏芯股份,元器件,江苏,0.00,0.00,0.00,3.29,2.60,0.40,1.30,...,0.00,0.00,0,1.08,0.00,0.00,0.00,32.98,10.77,0.0
688313,仕佳光子,通信设备,河南,0.00,0.00,0.00,9.94,5.50,3.62,2.43,...,0.00,0.00,0,0.13,0.00,0.00,0.00,25.96,8.69,0.0


In [5]:
stock_basics.reset_index(inplace=True)
stock_basics
#reset_index()重置df的索引

Unnamed: 0,code,name,industry,area,pe,outstanding,totals,totalAssets,liquidAssets,fixedAssets,...,bvps,pb,timeToMarket,undp,perundp,rev,profit,gpr,npr,holders
0,688556,N高测,专用机械,山东,118.78,0.37,1.62,13.33,10.17,1.72,...,5.92,8.29,20200807,1.54,0.95,0.00,0.00,37.65,9.37,34814.0
1,605066,N天正,电气设备,浙江,31.50,0.71,4.01,21.50,16.49,3.17,...,3.94,3.66,20200807,4.41,1.10,0.00,0.00,29.48,8.69,72150.0
2,688011,新光光电,元器件,黑龙江,0.00,0.41,1.00,13.16,11.43,1.08,...,12.22,4.14,20190722,0.88,0.88,-49.58,-127.55,54.53,-11.86,9134.0
3,600480,凌云股份,汽车配件,河北,0.00,7.63,7.65,141.48,79.21,29.18,...,5.91,1.54,20030815,13.18,1.72,-21.58,-318.50,15.31,-4.83,40013.0
4,002786,银宝山新,专用机械,深圳,455.84,3.80,3.81,46.61,31.93,9.24,...,2.34,4.07,20151223,2.80,0.73,28.43,105.66,14.81,0.22,31945.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3963,688185,康希诺,生物制药,天津,0.00,0.00,0.00,17.36,7.30,0.83,...,0.00,0.00,0,-3.91,0.00,0.00,0.00,91.49,-573.33,0.0
3964,688229,博睿数据,软件服务,北京,0.00,0.00,0.00,2.34,2.26,0.04,...,0.00,0.00,0,1.50,0.00,0.00,0.00,74.73,12.33,0.0
3965,688286,敏芯股份,元器件,江苏,0.00,0.00,0.00,3.29,2.60,0.40,...,0.00,0.00,0,1.08,0.00,0.00,0.00,32.98,10.77,0.0
3966,688313,仕佳光子,通信设备,河南,0.00,0.00,0.00,9.94,5.50,3.62,...,0.00,0.00,0,0.13,0.00,0.00,0.00,25.96,8.69,0.0


In [6]:
# loc[a,b]a选取行标签,b列标签
# 数据过滤 isin()
data1 = stock_basics.loc[stock_basics['code'].isin(hs300),['code','name','industry','pb','pe','esp','rev','profit',]]
data1.columns = ['代码','名称','行业','PE','PB','EPS','收入%','利润%']
data1

Unnamed: 0,代码,名称,行业,PE,PB,EPS,收入%,利润%
11,601216,君正集团,化工原料,4.48,34.90,0.075,45.80,-12.97
32,600893,航发动力,航空,4.00,334.02,0.038,3.34,2106.25
44,300601,康泰生物,生物制药,24.52,305.31,0.400,7.71,0.63
58,002236,大华股份,电器仪表,4.18,52.50,0.106,-19.47,0.50
87,600038,中直股份,航空,4.70,186.53,0.088,-18.24,-34.12
...,...,...,...,...,...,...,...,...
3816,300059,东方财富,证券,9.35,62.12,0.110,102.31,126.48
3842,300433,蓝思科技,元器件,5.52,38.16,0.201,44.60,1010.53
3852,600297,广汇汽车,汽车服务,0.87,0.00,-0.049,-31.36,-149.58
3869,601788,光大证券,证券,2.17,23.81,0.239,-17.62,-16.20


In [None]:
# 方法2：不重置index，通过loc直接进行索引定位
stock_basics = ts.get_stock_basics()
data1 = stock_basics.loc[hs300,['code','name','industry','pb','pe','esp','rev','profit',]]
data1.columns = ['代码','名称','行业','PE','PB','EPS','收入%','利润%']
data1

In [7]:
stock_profit = ts.get_profit_data(2017,1)
stock_profit

[Getting data:]############################################################

Unnamed: 0,code,name,roe,net_profit_ratio,gross_profit_rate,net_profits,eps,business_income,bips
0,600423,ST柳化,215.12,10.30,-1.3970,40.6863,0.1018,394.6887,0.9883
1,600306,*ST商城,71.97,71.48,17.3030,171.9346,0.9651,240.5080,1.3501
2,000717,韶钢松山,26.83,3.19,6.2247,168.1588,0.0695,5268.7636,2.1776
3,600133,东湖高新,26.81,55.22,11.6396,686.8356,1.0828,1243.7266,1.9609
4,002893,华通热力,25.83,21.81,32.2225,112.7380,1.2526,516.6846,5.7409
...,...,...,...,...,...,...,...,...,...
3536,300740,御家汇,,9.11,51.3119,24.4876,0.2040,268.6035,2.2383
3537,300746,汉嘉设计,,6.91,26.7241,7.3503,0.0465,106.2728,0.6734
3538,603321,梅轮电梯,,6.16,28.8689,5.8602,0.0254,95.1162,0.4135
3539,300848,美瑞新材,,,,,,,


In [8]:
data2 = stock_profit.loc[stock_profit['code'].isin(hs300),['code','roe','gross_profit_rate','net_profit_ratio',]]
data2.columns=['代码','ROE','毛利率','净利率']
data2

Unnamed: 0,代码,ROE,毛利率,净利率
19,002027,12.20,67.5988,45.72
22,600309,11.26,39.2307,19.94
26,002714,10.72,40.4656,34.55
34,300136,9.58,29.6922,28.43
35,002304,9.52,61.2324,36.14
...,...,...,...,...
3328,603156,,45.8056,21.72
3355,601066,,51.2235,38.59
3483,601066,,51.2235,38.59
3495,601838,,50.8416,41.20


In [9]:
stock_growth = ts.get_growth_data(2017,1)
data3 = stock_growth.loc[stock_growth['code'].isin(hs300),['code','nprg',]]
data3.columns = ['代码','NI增长率']
data3

[Getting data:]#######################################################

Unnamed: 0,代码,NI增长率
1,002352,22009.0595
20,601225,3805.2698
28,002601,3002.9102
33,000725,2443.3145
35,002624,2275.0723
...,...,...
3221,300142,
3236,600893,
3247,000723,
3259,000768,


In [17]:
from functools import reduce
import pandas as pd

#数据合并
merge = lambda x,y: pd.merge(x,y,how='left',on='代码')
data = reduce(merge,[data1,data2,data3])
data.drop_duplicates(inplace=True) #去除重复列code
data
#reduce(function,sequence)顺序迭代调用function

Unnamed: 0,代码,名称,行业,PE,PB,EPS,收入%,利润%,ROE,毛利率,净利率,NI增长率
0,601216,君正集团,化工原料,4.48,34.90,0.075,45.80,-12.97,3.75,40.3669,28.90,85.8170
1,600893,航发动力,航空,4.00,334.02,0.038,3.34,2106.25,-0.56,18.3001,-3.30,
3,300601,康泰生物,生物制药,24.52,305.31,0.400,7.71,0.63,3.21,86.9504,19.26,-54.7311
4,002236,大华股份,电器仪表,4.18,52.50,0.106,-19.47,0.50,3.36,39.7464,10.82,28.7268
5,600038,中直股份,航空,4.70,186.53,0.088,-18.24,-34.12,1.02,12.7824,3.40,27.9821
...,...,...,...,...,...,...,...,...,...,...,...,...
311,300059,东方财富,证券,9.35,62.12,0.110,102.31,126.48,0.77,57.4585,42.19,-14.3836
312,300433,蓝思科技,元器件,5.52,38.16,0.201,44.60,1010.53,1.54,24.9153,5.36,33.2010
313,600297,广汇汽车,汽车服务,0.87,0.00,-0.049,-31.36,-149.58,3.79,10.5030,2.95,42.2225
314,601788,光大证券,证券,2.17,23.81,0.239,-17.62,-16.20,1.22,31.6875,33.92,1.5761


估值系数（烟蒂）：PE*PB，烟蒂越低越被低估

In [32]:
data['估值系数'] = data['PE']*data['PB']
data = round(data,2)
# round() 方法返回浮点数x的四舍五入值。x -- 数值表达式。n -- 数值表达式，表示从小数点位数。
data.head(5)

Unnamed: 0,代码,NI增长率,名称,行业,PE,PB,EPS,收入%,利润%,ROE,毛利率,净利率,估值系数
0,2352,22009.06,顺丰控股,仓储物流,6.97,82.98,0.21,39.59,-28.16,3.62,20.1,5.0,578.37
1,601225,3805.27,陕西煤业,煤炭开采,1.26,8.17,0.24,27.28,-15.08,7.13,55.72,21.78,10.29
2,2601,3002.91,龙蟒佰利,化工原料,3.59,18.59,0.64,17.98,1.84,4.53,39.77,24.89,66.74
3,725,2443.31,京东方Ａ,元器件,1.9,73.54,0.02,-2.17,-46.12,,,,139.73
4,2624,2275.07,完美世界,影视音像,6.85,28.31,0.32,26.09,26.41,4.28,56.27,17.03,193.92


In [37]:
data_filter = data.loc[(data['估值系数']<60) &(data['ROE']>5),
                       ['代码','名称','PE','PB','估值系数','ROE','收入%']]
print(data_filter)
print('筛选结果共 %d 只个股' %len(data_filter))

         代码    名称     PE     PB   估值系数   ROE    收入%
1    601225  陕西煤业   1.26   8.17  10.29  7.13  27.28
50   003816  中国广核   1.67  24.87  41.53  5.76   6.76
71   002466  天齐锂业   5.93   0.00   0.00  7.73 -27.57
74   600340  华夏幸福   1.37   5.87   8.04  5.64  89.64
111  000963  华东医药   3.53  10.37  36.61  6.95 -11.41
158  601318  中国平安   2.02  13.38  27.03  5.60 -13.22
169  601888  中国中免  21.72   0.00   0.00  5.40 -44.23
172  600398  海澜之家   2.06  23.09  47.57  9.13 -36.80
173  600115  东方航空   1.21   0.00   0.00  5.62 -48.58
215  000625  长安汽车   1.14  20.15  22.97  5.22 -27.76
245  600177   雅戈尔   1.18   7.59   8.96  5.47  51.07
筛选结果共 11 只个股


### 2.2 先进行数据分类

In [38]:
# apply() 函数则会遍历每一个元素，对元素运行指定的 function
def map_func(x):
    if x['ROE']>5:
        return '成长'
    elif x['ROE']>=0:
        return '低成长'
    elif x['ROE']<0:
        return '亏损'
data['成长性'] = data.apply(map_func,axis=1)
data.head(5)

Unnamed: 0,代码,NI增长率,名称,行业,PE,PB,EPS,收入%,利润%,ROE,毛利率,净利率,估值系数,成长性
0,2352,22009.06,顺丰控股,仓储物流,6.97,82.98,0.21,39.59,-28.16,3.62,20.1,5.0,578.37,低成长
1,601225,3805.27,陕西煤业,煤炭开采,1.26,8.17,0.24,27.28,-15.08,7.13,55.72,21.78,10.29,成长
2,2601,3002.91,龙蟒佰利,化工原料,3.59,18.59,0.64,17.98,1.84,4.53,39.77,24.89,66.74,低成长
3,725,2443.31,京东方Ａ,元器件,1.9,73.54,0.02,-2.17,-46.12,,,,139.73,
4,2624,2275.07,完美世界,影视音像,6.85,28.31,0.32,26.09,26.41,4.28,56.27,17.03,193.92,低成长


In [42]:
#通过成长性进行分类
def gropu_func(df):
    #返回df，按照烟蒂系数排序的前三位
    return df.sort_values(['估值系数'],ascending=True)[:2]
data_grouped = data.groupby('成长性').apply(gropu_func)
data_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,代码,NI增长率,名称,行业,PE,PB,EPS,收入%,利润%,ROE,毛利率,净利率,估值系数,成长性
成长性,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
亏损,256,600583,-115.79,海油工程,石油开采,0.96,0.0,-0.07,28.33,-16.74,-0.74,-7.71,-12.17,0.0,亏损
亏损,258,300142,,沃森生物,生物制药,25.8,0.0,-0.01,-36.34,-146.78,-0.92,55.84,-34.0,0.0,亏损
低成长,76,600297,42.22,广汇汽车,汽车服务,0.87,0.0,-0.05,-31.36,-149.58,3.79,10.5,2.95,0.0,低成长
低成长,34,600028,140.25,中国石化,石油加工,0.68,0.0,-0.16,-22.59,-234.0,2.28,19.9,2.85,0.0,低成长
成长,173,600115,5.4,东方航空,空运,1.21,0.0,-0.24,-48.58,-296.06,5.62,12.44,11.48,0.0,成长
成长,169,601888,6.25,中国中免,旅游服务,21.72,0.0,-0.06,-44.23,-105.21,5.4,30.14,12.42,0.0,成长


In [43]:
#通过行业进行分类
data_grouped = data.groupby('行业').apply(gropu_func)
data_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,代码,NI增长率,名称,行业,PE,PB,EPS,收入%,利润%,ROE,毛利率,净利率,估值系数,成长性
行业,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
IT设备,163,600271,8.75,航天信息,IT设备,3.01,0.00,-0.23,-26.68,-182.79,4.03,15.29,7.20,0.00,低成长
IT设备,239,000977,-40.76,浪潮信息,IT设备,4.88,99.17,0.11,15.90,47.84,1.77,11.18,1.83,483.95,低成长
专用机械,139,002371,14.84,北方华创,专用机械,17.54,973.92,0.05,32.49,33.00,0.05,33.30,0.38,17082.56,低成长
中成药,129,600332,18.82,白云山,中成药,2.32,12.41,0.73,-5.93,-15.85,2.71,39.37,9.16,28.79,低成长
中成药,137,000538,15.66,云南白药,中成药,3.55,27.11,1.00,10.53,-34.48,4.40,30.53,12.27,96.24,低成长
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
食品,121,603288,20.49,海天味业,食品,27.16,76.60,0.50,7.17,9.17,8.88,44.69,24.35,2080.46,成长
饲料,241,002157,-41.79,正邦科技,饲料,6.39,18.23,0.36,37.37,318.63,2.08,12.18,2.67,116.49,低成长
饲料,208,000876,-6.03,新 希 望,饲料,5.35,22.76,0.39,26.87,144.13,2.96,8.25,4.37,121.77,低成长
黄金,11,601899,1250.45,紫金矿业,黄金,3.77,40.51,0.04,24.52,18.93,3.45,14.13,5.44,152.72,低成长


In [None]:
# 解释上面的代码块：
# data_growth = data[data['成长性'] == '成长'].sort_values(['估值系数'],ascending=True)
# data_growth.head()