In [None]:
# https://github.com/weibycn/fund

In [94]:
import requests
import json
import re
from datetime import datetime
import numpy as np
from copy import deepcopy

In [None]:
# def get_all_funds():
    api = "http://fund.eastmoney.com/js/fundcode_search.js"
    requests.get(api)
api_

In [2]:
api = "http://fund.eastmoney.com/js/fundcode_search.js"
res = requests.get(api)

In [3]:
funds = json.loads(res.content.decode("utf-8")[9:-1])
funds = [{"id": id, "code": code, "name": name, "kind": kind, "fullcode": fullcode} for id, code, name, kind, fullcode in funds]

In [4]:
funds

[{'id': '000001',
  'code': 'HXCZHH',
  'name': '华夏成长混合',
  'kind': '混合型-偏股',
  'fullcode': 'HUAXIACHENGZHANGHUNHE'},
 {'id': '000002',
  'code': 'HXCZHH',
  'name': '华夏成长混合(后端)',
  'kind': '混合型-偏股',
  'fullcode': 'HUAXIACHENGZHANGHUNHE'},
 {'id': '000003',
  'code': 'ZHKZZZQA',
  'name': '中海可转债债券A',
  'kind': '债券型-可转债',
  'fullcode': 'ZHONGHAIKEZHUANZHAIZHAIQUANA'},
 {'id': '000004',
  'code': 'ZHKZZZQC',
  'name': '中海可转债债券C',
  'kind': '债券型-可转债',
  'fullcode': 'ZHONGHAIKEZHUANZHAIZHAIQUANC'},
 {'id': '000005',
  'code': 'JSZQXYDQZQ',
  'name': '嘉实增强信用定期债券',
  'kind': '债券型-长债',
  'fullcode': 'JIASHIZENGQIANGXINYONGDINGQIZHAIQUAN'},
 {'id': '000006',
  'code': 'XBLDLHCZHHA',
  'name': '西部利得量化成长混合A',
  'kind': '混合型-偏股',
  'fullcode': 'XIBULIDELIANGHUACHENGZHANGHUNHEA'},
 {'id': '000008',
  'code': 'JSZZ500ETFLJA',
  'name': '嘉实中证500ETF联接A',
  'kind': '指数型-股票',
  'fullcode': 'JIASHIZHONGZHENG500ETFLIANJIEA'},
 {'id': '000009',
  'code': 'YFDTTLCHBA',
  'name': '易方达天天理财货币A',
  'kind': '货币

In [17]:
def expand(match):
    if match:
        return match.groups()

In [29]:
def to_dict(keys):
    def wrap(items):
        if items is None:
            return None
        if len(keys) != len(items):
            raise f"{keys} don't match {items}"
        return dict(zip(keys, items))
    return wrap

In [38]:
id = lambda one: one

In [32]:
keys = ["date", "price", "acc_price", "daily_change_percent", "buy_status", "sell_status", "dividends"]
tr_re = re.compile(r'<tr>(.*?)</tr>')
item_re = re.compile(r'''<td>(\d{4}-\d{2}-\d{2})</td><td.*?>(.*?)</td><td.*?>(.*?)</td><td.*?>(.*?)</td><td.*?>(.*?)</td><td.*?>(.*?)</td><td.*?>(.*?)</td>''', re.X)
to_dict_ = to_dict(keys)

In [137]:
dividends_re = re.compile(r"每份派现金([\d\.]+)元")
dividends_re_2 = re.compile(r"每份基金份额折算([\.\d]+)份")
def calc_dividends(val, price):
    if not val or not price:
        return 0.0
    match = dividends_re.match(val)
    if match:
        return float(match.groups()[0])
    match = dividends_re_2.match(val)
    if match:
        return (float(match.groups()[0]) - 1.0) * price
def numerize(item):
    if item is None:
        return None
    fomatter = {
        "date": lambda val: datetime.strptime(val, '%Y-%m-%d'),
        "price": lambda val: float(val) if val else 0.0,
        "acc_price": lambda val: float(val) if val else 0.0,
        "daily_change_percent": lambda val: float(val[:-1] if val[:-1] else 0) / 100,
        "dividends": id,
        "buy_status": id,
        "sell_status": id,
    }
    for key in keys:
        item[key] = fomatter[key](item[key])
    item["dividends"] = calc_dividends(item["dividends"], item["price"])
    item["dividends_percent"] = item["dividends"] / item["price"] if item["dividends"] and item["price"] else 0.0
    return item

In [72]:
fund_id = "002001"
page_re = re.compile(r"pages:(\d+),", re.X)
def get_fund_data(fund_id, page):
    res = requests.get(f"https://fundf10.eastmoney.com/F10DataApi.aspx?type=lsjz&code={fund_id}&page={page}&per=50&sdate=2020-09-10&edate=2021-09-10")
    content = res.content.decode("utf-8")
    lines = tr_re.findall(content)
    table = list(filter(id, [numerize(to_dict_(expand(item_re.match(line)))) for line in lines]))
    total_page = int(page_re.findall(content)[0])
    return table, total_page

In [78]:
def get_fund_year_data(fund_id):
    current = 1
    total_page = 1
    table = []
    while current <= total_page:
        page, total_page = get_fund_data(fund_id, current)
        table += page
        current += 1
    return table

In [113]:
def deduplicate(funds):
    fund_ids = set()
    funds_new = []
    for fund in funds:
        if fund["id"] not in fund_ids:
            fund_ids.add(fund["id"])
        funds_new.append(fund)
    return funds_new

In [80]:
fund_1 = get_fund_year_data(fund_id)

In [92]:
def get_metrics(table):
    return {
        "dividends_count": len(list(filter(lambda one: one["dividends"] > 0, table))),
        "anual_dividends_percent": sum(map(lambda one: one["dividends_percent"], table)),
        "dividends_std": np.std(list(map(lambda one: one["dividends"], table))),
        "dividends_percent_std": np.std(list(map(lambda one: one["dividends_percent"], table))),
        "price_std": np.std(list(map(lambda one: one["price"], table))),
    }

In [93]:
get_metrics(fund_1)

{'dividends_count': 14,
 'anual_dividends_percent': 0.17762188421776784,
 'dividends_std': 0.005685642497752113,
 'dividends_percent_std': 0.0032725857559546294,
 'price_std': 0.1412083561623124}

In [None]:
dividend_funds = []

In [None]:
for fund in funds[2000:4000]:
    table = get_fund_year_data(fund["id"])
    metric = get_metrics(table)
    if metric["anual_dividends_percent"] > 0.04:
        fund = deepcopy(fund)
        fund["metric"] = metric
        fund["table"] = table
        dividend_funds.append(fund)

In [155]:
len(dividend_funds)

236

In [150]:
dividend_funds = deduplicate(dividend_funds)

In [156]:
sorted([[fund["id"], fund["name"], fund["metric"]] for fund in dividend_funds], key=lambda one: one[2]["anual_dividends_percent"], reverse=True)

[['000939',
  '中银研究精选灵活配置混合A',
  {'dividends_count': 4,
   'anual_dividends_percent': 0.5059669912509068,
   'dividends_std': 0.020132134937528537,
   'dividends_percent_std': 0.017258287550931655,
   'price_std': 0.10966266202267391}],
 ['000743',
  '红塔红土盛世普益混合发起式',
  {'dividends_count': 3,
   'anual_dividends_percent': 0.4933833860293311,
   'dividends_std': 0.023130355475863687,
   'dividends_percent_std': 0.021176786950070553,
   'price_std': 0.19221420514292262}],
 ['002303',
  '金鹰智慧生活混合',
  {'dividends_count': 2,
   'anual_dividends_percent': 0.47904615357631697,
   'dividends_std': 0.0288992977898199,
   'dividends_percent_std': 0.022635065641531234,
   'price_std': 0.1915066215716555}],
 ['001007',
  '国联安鑫安灵活配置混合',
  {'dividends_count': 4,
   'anual_dividends_percent': 0.44077288285481997,
   'dividends_std': 0.017295682257132607,
   'dividends_percent_std': 0.014184793634911392,
   'price_std': 0.15585291349398883}],
 ['002407',
  '前海开源恒远灵活配置混合',
  {'dividends_count': 4,
   'a

# interested
宝盈核心优势混合C
https://fund.eastmoney.com/000241.html
'dividends_count': 12,
'anual_dividends_percent': 0.1569051502505668,
'dividends_std': 0.0037453288969647234,
'dividends_percent_std': 0.0029154833879929684,
'price_std': 0.11489876989310534

国联安鑫安灵活配置混合
https://fund.eastmoney.com/001007.html
'dividends_count': 4,
'anual_dividends_percent': 0.44077288285481997,
'dividends_std': 0.017295682257132607,
'dividends_percent_std': 0.014184793634911392,
'price_std': 0.15585291349398883

圆信永丰双利A
https://fund.eastmoney.com/000824.html
'dividends_count': 4,
'anual_dividends_percent': 0.4044969605364803,
'dividends_std': 0.01733474656291999,
'dividends_percent_std': 0.013409297966068084,
'price_std': 0.07567492502397623

华夏回报混合A
https://fund.eastmoney.com/002001.html
'dividends_count': 14,
'anual_dividends_percent': 0.17762188421776784,
'dividends_std': 0.005685642497752113,
'dividends_percent_std': 0.0032725857559546294,
'price_std': 0.1412083561623124
