In [9]:
import datetime
import matplotlib.pyplot as plt

# pandas_datareader爬取A股数据

In [10]:
import pandas_datareader.data as web

start = datetime.datetime(2015,1,1)#获取数据的时间段-起始时间
end = datetime.date.today()#获取数据的时间段-结束时间

stock = web.DataReader("300783.SZ", "yahoo", start, end) #SZ深证 SA上证
stock_hegu = web.DataReader("300783.SZ", "yahoo-actions", start, end) # 合股
stock_fenhong = web.DataReader("300783.SZ", "yahoo-dividends", start, end) #分红
stock_hangqing=web.get_quote_yahoo("300783.SZ").T #该只股票的行情数据

In [0]:
plt.plot(stock.Close)

In [0]:
stock.describe()

## pandas_profiling看全盘数据分析

In [0]:
import pandas_profiling   # pandas-profiling 生产详细的数据报告
profile=stock.profile_report(title='test')
profile.to_file(output_file='test.html')

# 获取天天基金数据

In [0]:
import requests
from bs4 import BeautifulSoup
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib

#指定默认字体
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
matplotlib.rcParams['font.family']='sans-serif'
#解决负号'-'显示为方块的问题
matplotlib.rcParams['axes.unicode_minus'] = False

# 抓取网页
def get_url(url, params=None, proxies=None):
    rsp = requests.get(url, params=params, proxies=proxies)
    rsp.raise_for_status()
    return rsp.text

# 从网页抓取数据
def get_fund_data(code,per=10,sdate='',edate='',proxies=None):
    url = 'http://fund.eastmoney.com/f10/F10DataApi.aspx'
    params = {'type': 'lsjz', 'code': code, 'page':1,'per': per, 'sdate': sdate, 'edate': edate}
    html = get_url(url, params, proxies)
    soup = BeautifulSoup(html, 'html.parser')

    # 获取总页数
    pattern=re.compile(r'pages:(.*),')
    result=re.search(pattern,html).group(1)
    pages=int(result)

    # 获取表头
    heads = []
    for head in soup.findAll("th"):
        heads.append(head.contents[0])

    # 数据存取列表
    records = []

    # 从第1页开始抓取所有页面数据
    page=1
    while page<=pages:
        params = {'type': 'lsjz', 'code': code, 'page':page,'per': per, 'sdate': sdate, 'edate': edate}
        html = get_url(url, params, proxies)
        soup = BeautifulSoup(html, 'html.parser')

        # 获取数据
        for row in soup.findAll("tbody")[0].findAll("tr"):
            row_records = []
            for record in row.findAll('td'):
                val = record.contents

                # 处理空值
                if val == []:
                    row_records.append(np.nan)
                else:
                    row_records.append(val[0])

            # 记录数据
            records.append(row_records)

        # 下一页
        page=page+1

    # 数据整理到dataframe
    np_records = np.array(records)
    data= pd.DataFrame()
    for col,col_name in enumerate(heads):
        data[col_name] = np_records[:,col]

    return data


# 主程序
if __name__ == "__main__":
    data=get_fund_data('000011',per=1,sdate='2018-01-01',edate='2019-09-24')
    # 修改数据类型
    data['净值日期']=pd.to_datetime(data['净值日期'],format='%Y/%m/%d')
    data['单位净值']= data['单位净值'].astype(float)
    data['累计净值']=data['累计净值'].astype(float)
    data['日增长率']=data['日增长率'].str.strip('%').astype(float)
    # 按照日期升序排序并重建索引
    data=data.sort_values(by='净值日期',axis=0,ascending=True).reset_index(drop=True)
    print(data)

    # 获取净值日期、单位净值、累计净值、日增长率等数据并
    net_value_date = data['净值日期']
    net_asset_value = data['单位净值']
    accumulative_net_value=data['累计净值']
    daily_growth_rate = data['日增长率']

    # 作基金净值图
    fig = plt.figure()
    #坐标轴1
    ax1 = fig.add_subplot(111)
    ax1.plot(net_value_date,net_asset_value)
    ax1.plot(net_value_date,accumulative_net_value)
    ax1.set_ylabel('净值数据')
    ax1.set_xlabel('日期')
    plt.legend(loc='upper left')
    #坐标轴2
    ax2 = ax1.twinx()
    ax2.plot(net_value_date,daily_growth_rate,'r')
    ax2.set_ylabel('日增长率（%）')
    plt.legend(loc='upper right')
    plt.title('基金净值数据')
    plt.show()

    # 绘制分红配送信息图
    bonus = accumulative_net_value-net_asset_value
    plt.figure()
    plt.plot(net_value_date,bonus)
    plt.xlabel('日期')
    plt.ylabel('累计净值-单位净值')
    plt.title('基金“分红”信息')
    plt.show()

    # 日增长率分析
    print('日增长率缺失：',sum(np.isnan(daily_growth_rate)))
    print('日增长率为正的天数：',sum(daily_growth_rate>0))
    print('日增长率为负（包含0）的天数：',sum(daily_growth_rate<=0))

In [0]:
data.head()

In [1]:
import os
os.chdir('/Users/foster/Documents/person content/foster_data/python_learning')

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"  #一个cell可输出多个结果

In [3]:
import pandas as pd
from data_feature import data_feature

#构造数据集
import plotly.express as px
data = px.data.gapminder()
pd.set_option('display.max_columns', 500, 'display.max_rows', 1000)
cols = ['lifeExp', 'pop', 'gdpPercap']
years = [5,10,20,30,70]
data["now"] = 2008
data["year_diff"] = data["now"]-data["year"]
data.head()

af=data_feature(data[data.country=="Afghanistan"],cols,years,"country",merge_ori = 0)

af

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num,now,year_diff
0,Afghanistan,Asia,1952,28.801,8425333,779.445314,AFG,4,2008,56
1,Afghanistan,Asia,1957,30.332,9240934,820.85303,AFG,4,2008,51
2,Afghanistan,Asia,1962,31.997,10267083,853.10071,AFG,4,2008,46
3,Afghanistan,Asia,1967,34.02,11537966,836.197138,AFG,4,2008,41
4,Afghanistan,Asia,1972,36.088,13079460,739.981106,AFG,4,2008,36


44444444
44444444
44444444
44444444
44444444
333333


Unnamed: 0,country,last_5_lifeExp_count,last_5_lifeExp_sum,last_5_lifeExp_max,last_5_lifeExp_min,last_5_lifeExp_mean,last_5_lifeExp_var,last_5_lifeExp_std,last_5_lifeExp_median,last_5_lifeExp_skew,last_5_pop_count,last_5_pop_sum,last_5_pop_max,last_5_pop_min,last_5_pop_mean,last_5_pop_var,last_5_pop_std,last_5_pop_median,last_5_pop_skew,last_5_gdpPercap_count,last_5_gdpPercap_sum,last_5_gdpPercap_max,last_5_gdpPercap_min,last_5_gdpPercap_mean,last_5_gdpPercap_var,last_5_gdpPercap_std,last_5_gdpPercap_median,last_5_gdpPercap_skew,last_10_lifeExp_count,last_10_lifeExp_sum,last_10_lifeExp_max,last_10_lifeExp_min,last_10_lifeExp_mean,last_10_lifeExp_var,last_10_lifeExp_std,last_10_lifeExp_median,last_10_lifeExp_skew,last_10_pop_count,last_10_pop_sum,last_10_pop_max,last_10_pop_min,last_10_pop_mean,last_10_pop_var,last_10_pop_std,last_10_pop_median,last_10_pop_skew,last_10_gdpPercap_count,last_10_gdpPercap_sum,last_10_gdpPercap_max,last_10_gdpPercap_min,last_10_gdpPercap_mean,last_10_gdpPercap_var,last_10_gdpPercap_std,last_10_gdpPercap_median,last_10_gdpPercap_skew,last_20_lifeExp_count,last_20_lifeExp_sum,last_20_lifeExp_max,last_20_lifeExp_min,last_20_lifeExp_mean,last_20_lifeExp_var,last_20_lifeExp_std,last_20_lifeExp_median,last_20_lifeExp_skew,last_20_pop_count,last_20_pop_sum,last_20_pop_max,last_20_pop_min,last_20_pop_mean,last_20_pop_var,last_20_pop_std,last_20_pop_median,last_20_pop_skew,last_20_gdpPercap_count,last_20_gdpPercap_sum,last_20_gdpPercap_max,last_20_gdpPercap_min,last_20_gdpPercap_mean,last_20_gdpPercap_var,last_20_gdpPercap_std,last_20_gdpPercap_median,last_20_gdpPercap_skew,last_30_lifeExp_count,last_30_lifeExp_sum,last_30_lifeExp_max,last_30_lifeExp_min,last_30_lifeExp_mean,last_30_lifeExp_var,last_30_lifeExp_std,last_30_lifeExp_median,last_30_lifeExp_skew,last_30_pop_count,last_30_pop_sum,last_30_pop_max,last_30_pop_min,last_30_pop_mean,last_30_pop_var,last_30_pop_std,last_30_pop_median,last_30_pop_skew,last_30_gdpPercap_count,last_30_gdpPercap_sum,last_30_gdpPercap_max,last_30_gdpPercap_min,last_30_gdpPercap_mean,last_30_gdpPercap_var,last_30_gdpPercap_std,last_30_gdpPercap_median,last_30_gdpPercap_skew,last_70_lifeExp_count,last_70_lifeExp_sum,last_70_lifeExp_max,last_70_lifeExp_min,last_70_lifeExp_mean,last_70_lifeExp_var,last_70_lifeExp_std,last_70_lifeExp_median,last_70_lifeExp_skew,last_70_pop_count,last_70_pop_sum,last_70_pop_max,last_70_pop_min,last_70_pop_mean,last_70_pop_var,last_70_pop_std,last_70_pop_median,last_70_pop_skew,last_70_gdpPercap_count,last_70_gdpPercap_sum,last_70_gdpPercap_max,last_70_gdpPercap_min,last_70_gdpPercap_mean,last_70_gdpPercap_var,last_70_gdpPercap_std,last_70_gdpPercap_median,last_70_gdpPercap_skew
0,Afghanistan,1,43.828,43.828,43.828,43.828,0.0,0.0,43.828,0.0,1,31889923,31889923,31889923,31889923,0.0,0.0,31889923,0.0,1,974.580338,974.580338,974.580338,974.580338,0.0,0.0,974.580338,0.0,2,85.957,43.828,42.129,42.9785,1.443301,1.201374,42.9785,0.0,2,57158328,31889923,25268405,28579164,21922250312162,4682120.0,28579164,0.0,2,1701.314393,974.580338,726.734055,850.657197,30713.890147,175.253788,850.657197,0.0,4,169.394,43.828,41.674,42.3485,1.01162,1.005793,41.946,1.78241,4,95703664,31889923,16317921,23925916,41998060000000.0,6480591.0,23747910,0.152543,4,2985.997139,974.580338,635.341351,746.499285,24735.791029,157.276162,688.037725,1.639383,6,250.07,43.828,39.854,41.678333,1.778571,1.333631,41.7185,0.420691,6,122453437,31889923,12881816,20408910.0,54982540000000.0,7415021.0,19272668,0.632343,6,4816.404523,978.011439,635.341351,802.734087,24009.04703,154.94853,789.565,0.14867,12,449.746,43.828,28.801,37.478833,25.996192,5.098646,39.146,-0.548733,12,189884585,31889923,8425333,15823720.0,50617300000000.0,7114583.0,13473708.5,1.289663,12,9632.095181,978.011439,635.341351,802.674598,11707.873947,108.202929,803.483195,0.152716
