设计思路：

假设从某个时间点开始，每周淘汰最后10%。


In [1]:
import pandas as pd
import math
import gc

import base
import fund_scanner.common_tools.database as db

engine = db.get_sqlalchemy_engine()

In [2]:
# 准备所有参赛者

sql = 'select * from funds a left join funds_update b on a.funds_id=b.funds_id'

df_all_candidates = pd.read_sql(sql, engine).iloc[:,[0,1,2,3,4,5,11]]

df_all_candidates = df_all_candidates.set_index('funds_id')

df_all_candidates['last_price'] = math.nan
df_all_candidates['current_price'] = math.nan
df_all_candidates['gain_ratio'] = math.nan
df_all_candidates['good'] = 0
good = {
    'not_in_yet' : 0,
    'healthy' : 1,
    'hurt_once' : 2,
    'hurt_twice' : 3,
    'out' : 10,
    'noway' : 11
    }
# good值:
#     0 : 未参赛
#     1 : 已参赛，正活跃
#     2 : 受伤 1次
#     3 : 受伤 2次
#     10 : 淘汰
#     11 : 没有参赛资格
# last_price: 上一轮净值
# current_price: 这一轮净值
# gain_ratio: 这一轮涨幅

df_all_candidates = df_all_candidates.sort_values('funds_start_date')

df_all_candidates.tail(3)

Unnamed: 0_level_0,funds_code,funds_name_full,funds_type,funds_start_date,update_time,funds_amount,last_price,current_price,gain_ratio,good
funds_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
5418,1817,易方达瑞兴灵活配置混合I,,NaT,2017-07-29 09:34:16,,,,,0
5426,1988,南方纯元A,,NaT,2017-07-29 09:35:12,,,,,0
5439,4911,中加纯债定开债券A,,NaT,2017-07-29 09:36:17,,,,,0


In [27]:
# game1 游戏规则
# 选择 2013年 前成立的老基金们参赛，从 2013年 开始，每 14天 比一次净值的涨幅，
# 把涨幅最靠后 8支 的淘汰，如果剩下基金比较少了，则淘汰排名最后的 1/10
# 一直跑到 today，
# 如果中途淘汰数量过多，最后剩下只有 5支 基金的时候，则直接中止比赛，
# 返回最后留下的基金

def game1(funds_start_date='2013-1-1', competition_start_date='2013-1-1', competition_end_date='today',
          competition_time_span=14, bottom_to_be_out_max_count=8, bottom_to_be_out_percentage=10, 
          min_result_count=5):
    
    gc.collect()
    # 重新读取数据
    df_competition = df_all_candidates.copy()

    # 2017年1月1日以后成立的基金或者没有写成立时间的基金没有参赛资格
    df_competition.loc[df_competition['funds_start_date']>pd.to_datetime(funds_start_date), 'good'] = good['out']
    df_competition.loc[df_competition['funds_start_date'].isnull(),'good'] = good['out']

    #资产规模小于1亿，或者没有数据的没有资格参赛
    df_competition.loc[df_competition['funds_amount']<1, 'good'] = good['out']
    df_competition.loc[df_competition['funds_amount'].isnull(),'good'] = good['out']


    #比赛从最早一直基金成立开始
    #start_point = pd.to_datetime(df_competition.iloc[0,3])
    #比赛从2013年1月1日开始
    start_point = pd.to_datetime(competition_start_date)
    round_count = 1
    current_date = start_point
    df_competition_results = []
    while current_date < pd.to_datetime(competition_end_date):

        # 得到当天价格（如果当天没几个价格就查次日的）
        df_competition['last_price'] = df_competition['current_price']
        while True:
            df_current_price = pd.read_sql('select * from funds_historical_price where funds_price_date=\'%s\''%current_date, engine)
            if len(df_current_price)>10:
                break
            else:
                current_date = current_date + pd.DateOffset(1)

        df_current_price = df_current_price.set_index('funds_id')
        df_competition['current_price'].update(df_current_price['funds_price_adjust'])

        # 如果价格从0到有，则说明参赛了，设置为 参赛1
        df_competition.loc[(df_competition['last_price']>0) & (df_competition['good']==good['not_in_yet']), 'good']=good['healthy']


        # 如果价格从非0到有，则说明价格变化了，计算变化率
        df_competition['gain_ratio'] = \
        ( df_competition['current_price'] - df_competition['last_price'] ) / df_competition['last_price']

        # 按变化率排序，将最后10%选手设置为 淘汰
        active_candidates = len(df_competition.loc[(df_competition['good']==good['healthy'])])
        print('Round %d: %s'% (round_count, current_date))
        if active_candidates>min_result_count:
            should_be_removed = active_candidates // bottom_to_be_out_percentage
            if should_be_removed > bottom_to_be_out_max_count:
                should_be_removed = bottom_to_be_out_max_count
            if should_be_removed>=1:
                current_competition = df_competition.loc[(df_competition['good']==good['healthy'])].sort_values('gain_ratio')
                min_gain_ratio = current_competition['gain_ratio'].min()
                max_gain_ratio = current_competition['gain_ratio'].max()
                threshold = current_competition.iloc[should_be_removed, :]['gain_ratio']
                print('最低涨幅 %f, 最高涨幅 %f ----> 淘汰 %d/%d 名选手，他们当期收益率低于 %f'\
                      %(min_gain_ratio, max_gain_ratio, should_be_removed, active_candidates, threshold))
                df_competition.loc[(df_competition['good']==good['healthy']) & (df_competition['gain_ratio']<=threshold), 'good'] = good['out']
            else:
                break

        round_count += 1
        current_date = current_date + pd.DateOffset(14)
        df_competition_results.append(df_competition.copy())


    # The winner is:
    return df_competition.loc[df_competition['good']==good['healthy']].sort_values('current_price', ascending=False)

In [26]:
game1(com)

Round 1: 2013-01-04 00:00:00
Round 2: 2013-01-18 00:00:00
最低涨幅 -0.017804, 最高涨幅 0.125874 ----> 淘汰 8/852 名选手，他们当期收益率低于 0.000532
Round 3: 2013-02-01 00:00:00
最低涨幅 -0.045450, 最高涨幅 0.133468 ----> 淘汰 8/844 名选手，他们当期收益率低于 -0.017442
Round 4: 2013-02-18 00:00:00
最低涨幅 -0.042082, 最高涨幅 0.061244 ----> 淘汰 8/836 名选手，他们当期收益率低于 -0.027273
Round 5: 2013-03-04 00:00:00
最低涨幅 -0.096186, 最高涨幅 0.033050 ----> 淘汰 8/827 名选手，他们当期收益率低于 -0.085086
Round 6: 2013-03-18 00:00:00
最低涨幅 -0.051387, 最高涨幅 0.065000 ----> 淘汰 8/818 名选手，他们当期收益率低于 -0.036011
Round 7: 2013-04-01 00:00:00
最低涨幅 -0.030047, 最高涨幅 0.066298 ----> 淘汰 8/809 名选手，他们当期收益率低于 -0.013351
Round 8: 2013-04-15 00:00:00
最低涨幅 -0.155729, 最高涨幅 0.023369 ----> 淘汰 8/800 名选手，他们当期收益率低于 -0.057745
Round 9: 2013-05-02 00:00:00
最低涨幅 -0.034115, 最高涨幅 0.117067 ----> 淘汰 8/791 名选手，他们当期收益率低于 -0.016334
Round 10: 2013-05-16 00:00:00
最低涨幅 -0.001129, 最高涨幅 0.121261 ----> 淘汰 8/782 名选手，他们当期收益率低于 0.000000
Round 11: 2013-05-30 00:00:00
最低涨幅 -0.050967, 最高涨幅 0.092074 ----> 淘汰 8/773 名选手，他们当期收益率低于 -

Unnamed: 0_level_0,funds_code,funds_name_full,funds_type,funds_start_date,update_time,funds_amount,last_price,current_price,gain_ratio,good
funds_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2933,100020,富国天益价值混合,混合型,2004-06-15,2017-07-30 07:20:04,30.89,4.3457,4.3559,0.002347,1
4606,150103,银河银泰理财,混合型,2004-03-30,2017-07-29 01:24:07,12.83,4.1242,4.1356,0.002764,1
4050,180010,银华优质增长混合,混合型,2006-06-09,2017-07-29 00:26:13,27.59,3.8459,3.8526,0.001742,1
2895,160603,鹏华普天收益,混合型,2003-07-12,2017-07-30 06:42:04,5.49,3.786,3.786,0.0,1
4014,2031,华夏策略精选,混合型,2008-10-23,2017-07-29 00:23:04,10.39,3.675,3.686,0.002993,1
3091,320003,诺安先锋混合,混合型,2005-12-19,2017-07-30 10:00:04,39.25,3.2038,3.199,-0.001498,1
3547,450001,国富中国收益,混合型,2005-06-01,2017-07-28 23:09:11,3.01,2.5738,2.5867,0.005012,1
3218,165511,信诚中证500分级,股票指数,2011-02-11,2017-07-28 05:35:05,2.51,1.779,1.783,0.002248,1
2942,202211,南方恒元保本,保本型,2008-11-12,2017-07-30 07:29:04,4.56,1.605,1.613,0.004984,1
