In [62]:
import pandas as pd 
import numpy as np 
import re 


class Daily2WeekData(object):
    """
    把日线数据降频成周线数据，写入文件
    适用于ETF和股票日线数据
    """
    def __init__(self,file_list):
        self.file_list = file_list
        self.df = pd.DataFrame()
        self.codes = list()
        self.get_data_accu()



    def get_data_accu(self):
        #拼接每天的增量数据
        df_list = []
        
        for i in self.file_list:
            df = pd.read_csv(i)
            df_list.append(df)
        df = pd.concat(df_list,axis=0)
        cols = ['date','code','open','high','low','close','pct_chg','vol']
        df = df[cols].copy()
       
        df['date'] = df['date'].astype('str')  #把日期转为字符串格式
        
        df.sort_values(['date'],inplace=True)
        df['date'] = df['date'].apply(lambda x: self.format_date_str(x)) #把字符串格式标准化
        df['date'] =  pd.to_datetime(df['date']) #把字符串转为日期格式

        codes = list(df['code'].unique())
        df = df.reset_index(drop=True)
        df = df.set_index('date').copy()
        self.df = df.copy()
        self.codes = codes
        return self.codes, self.df

    def format_date_str(self,date_str):
        # 格式转换
        return re.sub(r"(\d{4})(\d{2})(\d{2})", date_str[-8:-4]+'-'+date_str[-4:-2]+'-'+date_str[-2:], date_str, 0, re.IGNORECASE)

    def daily2week(self):
        df_bar_list = list()
        for code in self.codes:
            df_t = self.df[self.df['code']==code]
            df_bar = df_t.resample('W',label='left', closed='right').agg({'close':['max', 'min', 'mean', 'first', 'last'],
                                                                'pct_chg':['max', 'min', 'mean', 'first', 'last'],'vol':['sum']
                                                     }                                 
                                                   ).ffill()
            cols = [('close','max'),('close','first'),('close','min'),('close','last'),('vol','sum'),('pct_chg','last')]
            df_bar = df_bar[cols]
            df_bar.columns = ['_'.join(col) for col in df_bar.columns.values]
            df_bar['code'] = code
            df_bar_list.append(df_bar)
        df = pd.concat(df_bar_list,axis=0)
        codes = list(df['code'].unique())
        df.to_csv('D:\Quant_Code\data\ETF\weekly_ETF.csv')

        return codes,df

if __name__ == '__main__': 
    # d = Daily2WeekData(['D:\E-BOOK\daily_stock.csv'])  
    d = Daily2WeekData(['D:\Quant_Code\data\ETF\daily_etf.csv','D:\Quant_Code\data\ETF\daily_etf_21_22.csv']) 
    
    # codes,df = d.get_data_accu()
    codes,df =d.daily2week()
    print(df.tail())
    # d.daily2week()  

            close_max  close_first  close_min  close_last    vol_sum  \
date                                                                   
2021-11-21      1.004        1.002      0.991       0.991  409037.88   
2021-11-28      1.011        0.984      0.984       0.991  464754.29   
2021-12-05      1.010        0.980      0.975       1.001  400189.94   
2021-12-12      1.018        1.010      1.004       1.004  430729.26   
2021-12-19      1.008        0.987      0.987       1.008  151274.87   

            pct_chg_last       code  
date                                 
2021-11-21       -0.8008  159739.SZ  
2021-11-28       -0.1008  159739.SZ  
2021-12-05       -0.8911  159739.SZ  
2021-12-12       -1.2783  159739.SZ  
2021-12-19        0.5988  159739.SZ  


In [1]:
import pandas as pd 
import numpy as np 
from get_code import get_code

class StockPricevol(object):
    """
    筛选符合杯柄形态的资产
    计算资产排序后的RPS指标
    """
    def __init__(self,path='D:\Quant_Code\data\stock\weekly_stock.csv'):
        df = pd.read_csv(path)
        self.codes = df['code'].unique().tolist()
        df.reset_index(drop=False)
        self.df = df.set_index('code').copy()         
     
    #筛选价格和成交量突破N日阈值的ETF
    def find_price_vol_instrument(self,n,r=1.02):
        df = self.df     
        up_list = []
        for code in self.codes:
            # df = self.df.loc[code]
            # print(df.head())
            # break
            try:      
                close=df['close_last'][code].tolist()
            except:
                print(df)
            open_=df['close_first'][code].tolist() #open_变量须加下划线区别python关键字
            high=df['close_max'][code].tolist()
            low=df['close_min'][code].tolist()
            vol=df['vol_sum'][code].tolist()
    #         print(code)
            #剔除一字涨停
            flag=True
            if isinstance(close,list):
                try:
                    if close[-1]==open_[-1]==high[-1]==low[-1]:
                        flag=False
                        continue
                except:
                    print(close)
                    print(code)
                    print(type(close))
            else:
                continue
    #         #最近五日没有没有长上影线,以单日回撤3%为长上影线
            close_len = len(close[-5:])
            if close_len >= 5:
                for i in range(5):                    
                    if close[-5:][i]*1.03<high[-5:][i]:
                        flag=False
                        break
            else:
                for i in range(close_len):
                    if close[-5:][i]*1.03<high[-5:][i]:
                        flag=False
                        break                         

    #         #价格突破前N日新高
            p=close[-1] #当前价格
            p0=np.min(close[-n:-1])
            p1=np.max(close[-n:-1]) #前n-1日最高价
    #        #价格短期已上涨超过50%，涨幅过大不宜介入
    # #         
            # if (p-p0)/p0>0.5:
            #     flag=False
            #     break 
    #         #价格突破且放量上涨
            if flag==True and \
            p1<p<p1*r and \
            np.mean(vol[-5:]) / np.mean(vol[-10:-5]) > 2:
                up_list.append(code)
        return up_list
    def get_rps_data(self):
        df = self.df.reset_index(drop=False)
        df= df.set_index(['date','code'])['close_last']
        df = df.unstack()
        codes =  list(get_code('20211227').values())
        df = df.loc[:,codes].copy()
        df.fillna(method='ffill',inplace=True)
        df = df.loc['2020-11-08':].copy()
        col_dict = dict()
        for k,v in get_code('20211227').items():
            col_dict[v] = k

        df.rename(columns=col_dict,inplace=True)

        return df

    def cal_ret(self,df,w):

        ret=(df/df.shift(w)-1).iloc[w:].fillna(0)
    
        return ret.T


    def rps_all(self,w):
        res_dic = dict()
        df = self.get_rps_data()
        ret = self.cal_ret(df,w)
        for col in ret.columns:
            res_dic[col] =  pd.DataFrame(ret[col].sort_values(ascending=False))
            df = res_dic[col]
            df['排名'] = range(1,len(df)+1)
            df['RPS'] = (1-df['排名']/len(df[col]))*100
            df.rename(columns={col:'收益率'},inplace=True)
        
        return res_dic

    


if __name__ == '__main__':
    s = StockPricevol('D:\Quant_Code\data\stock\weekly_stock.csv')
    print(s.find_price_vol_instrument(5))
     
    print(s.rps_all(6)['2021-11-28'].head(30))       
        
    

['603367.SH', '002251.SZ', '002303.SZ', '000025.SZ', '002421.SZ', '603843.SH', '000534.SZ', '600831.SH', '600959.SH', '600572.SH', '601366.SH', '002921.SZ', '600190.SH']
           收益率  排名        RPS
code                         
九安医疗  2.159944   1  99.948400
欣锐科技  1.752475   2  99.896801
盾安环境  1.212251   3  99.845201
中文在线  1.192859   4  99.793602
钧达股份  1.061415   5  99.742002
宇晶股份  1.042580   6  99.690402
丰元股份  0.983924   7  99.638803
精功科技  0.975910   8  99.587203
鹏辉能源  0.947040   9  99.535604
汇得科技  0.945992  10  99.484004
永贵电器  0.927907  11  99.432405
京城股份  0.921832  12  99.380805
陕西金叶  0.911856  13  99.329205
永新光学  0.901087  14  99.277606
一汽富维  0.880614  15  99.226006
英洛华   0.845701  16  99.174407
中天科技  0.840857  17  99.122807
中锐股份  0.808984  18  99.071207
保隆科技  0.776299  19  99.019608
天壕环境  0.775344  20  98.968008
海兰信   0.754853  21  98.916409
大金重工  0.701773  22  98.864809
正海磁材  0.688412  23  98.813209
尚纬股份  0.687163  24  98.761610
东方日升  0.671134  25  98.710010
炬华科技  0.671034  26  