In [6]:
import datetime as dt
import time
import logging as log
from multiprocessing import Pool, TimeoutError

%run BreakSectionManager.ipynb
%run Util.ipynb
%run Constant.ipynb
%run Metric.ipynb
%run Operation.ipynb
%run Account.ipynb
%run Policy_Loader.ipynb
%run Evaluation.ipynb


class RocketRegression:
    
    def __init__(self, policy_name):
        self.policy_name = policy_name
        self.policy_loader = Policy_Loader(self.policy_name)
        self.debug = self.policy_loader.enabled_log_debug()
        self.context = self.policy_loader.get_context()
        
        self.level = self.context['level']
        self.short = self.context['short']
        self.mid = self.context['mid']
        
    def build_operation_file_path(self,symbol,regression_end_date):
        return report_dir+'operations_'+symbol+'_'+str(self.short)+'_'+str(self.mid)+'_'+self.level+'_'+regression_end_date+'_'+operation_version+'.csv'
    
    def build_deal_file_path(self,symbol,regression_end_date):
        return report_dir+'deals_'+symbol+'_'+str(self.short)+'_'+str(self.mid)+'_'+self.level+'_'+regression_end_date+'_'+operation_version+'.csv'
        
    def build_asset_file_path(self,symbol,regression_end_date):
        return report_dir+'account_asset_'+symbol+'_'+str(self.short)+'_'+str(self.mid)+'_'+self.level+'_'+regression_end_date+'_'+operation_version+'.csv'
    
    def persist(self,account,operation_df,symbol,regression_end_date):
        account.get_asset_df().to_csv(self.build_asset_file_path(symbol,regression_end_date),index=False)
        account.deal_df.to_csv(self.build_deal_file_path(symbol,regression_end_date),index=False)
        operation_df.to_csv(self.build_operation_file_path(symbol,regression_end_date), index=False)
        
    
    def start_regression_on_one_stock(self,symbol,regression_end_date):
        start = time.time()
        sectionManager = BreakSectionManager(symbol,self.context)
        section_df = sectionManager.get_all_break_sections()
        section_df = section_df[section_df['d_s_datetime']<to_datetime(regression_end_date)]
        
        metric = Metric(symbol,self.context)
        
        stock_start_date = metric.get_stock_start_date()
        stock_end_date = metric.get_stock_end_date()
        log.info('stock_start_date ' + stock_start_date+' stock_end_date:'+stock_end_date)
        
        account = Account(stock_start_date,stock_end_date,self.context,self.policy_name)
        operation = Operation(account,symbol,self.context,self.policy_name)
        
        for key,row in section_df.iterrows():
            start_date = row['d_s_date']
            end_date = row['d_e_date']
            
            start_datetime = to_datetime(start_date)
            end_datetime = to_datetime(end_date)

            buy_price = metric.get_cur_price(start_date)
            
            if(self.debug==True):
                print('\n'+row['symbol']+' section '+start_date+'===>'+end_date)
            while(start_datetime<=end_datetime):
                cur_date = start_datetime.strftime(YMD_format)
                
                if(metric.is_today_open(cur_date)):
                    account.daily_audit(cur_date)

                    if(account.can_open_new_stock()==True):
                        if(operation.is_buy_point(cur_date)==True):
                            operation.open_opsition(cur_date)

                    if(account.has_shares()==True):
                        if(operation.is_sell_point(cur_date)==True):
                            operation.sell_stock(cur_date)

                start_datetime += dt.timedelta(days = 1)
                
        operation_df = operation.get_operations()
        self.persist(account,operation_df,symbol,regression_end_date)
        
        evaluation = Evaluation(account,self.context,self.policy_name)
        evaluation_df = evaluation.get_evaluation_report(symbol,stock_start_date,stock_end_date,
                                                         operation_df)

        end = time.time()
        if(self.debug==False):
            print(symbol+' regression cost time ' + str(round(end-start,1))+' seconds')
        return evaluation_df
    
    
    def start_sync_regression(self,scale,regression_end_date):
        evaluation_df = pd.DataFrame(columns=evaluation_columns)
        symbols = get_symbols(scale,home)
        total = len(symbols) 
        
        for symbol in symbols:
            one_evaluation = self.start_regression_on_one_stock(symbol,regression_end_date)
            evaluation_df = pd.concat([one_evaluation,evaluation_df], ignore_index=True, sort=False)
           
        
        evaluation_df['loss_times'] =evaluation_df['loss_times'].astype('int')
        evaluation_df['deal_count'] =evaluation_df['deal_count'].astype('int')
        evaluation_df['r2'] =evaluation_df['r2'].astype('float')
        evaluation_df['rar'] =evaluation_df['rar'].astype('float')

        return evaluation_df
    
    def start_regression(self,scale,regression_end_date,process_num):
        print('context:' + str(self.context))
        
        if(process_num==1):
            return self.start_sync_regression(scale,regression_end_date)
        
        evaluation_df = pd.DataFrame(columns=evaluation_columns)
        symbols = get_symbols(scale,home)
        total = len(symbols) 
        
        pool = Pool(processes=process_num)
        tasks = []
        for symbol in symbols:
            task = pool.apply_async(self.start_regression_on_one_stock, (symbol,regression_end_date))
            tasks.append(task)
            
        for task in tasks:
            try:
                one_evaluation = task.get()
                evaluation_df = pd.concat([one_evaluation,evaluation_df], ignore_index=True, sort=False)
            except Exception as e:
                print(str(e))
        
        pool.close()
        pool.join()
        
        evaluation_df['loss_times'] =evaluation_df['loss_times'].astype('int')
        evaluation_df['deal_count'] =evaluation_df['deal_count'].astype('int')
        evaluation_df['r2'] =evaluation_df['r2'].astype('float')
        evaluation_df['rar'] =evaluation_df['rar'].astype('float')

        return evaluation_df
    

In [None]:
regression = RocketRegression('rocket_break_v4_enable_reducing_position_based_ATR.json')

regression_end='2019-05-30'
scale=300
process_num=24

evaluation_df = regression.start_regression(scale,regression_end,process_num)
evaluation_df.describe()

context:{'short': 20, 'mid': 240, 'scale_start': 0, 'scale_end': 3, 'level': 'Day'}


  keepdims=keepdims)
  ret = ret.dtype.type(ret / rcount)
  keepdims=keepdims)
  ret = ret.dtype.type(ret / rcount)


SZ#002345 regression cost time 33.5 seconds
SZ#002343 regression cost time 33.5 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#002505 regression cost time 35.6 seconds


  keepdims=keepdims)
  ret = ret.dtype.type(ret / rcount)


SZ#002181 regression cost time 39.1 seconds
SZ#002183 regression cost time 43.5 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#002182 regression cost time 52.9 seconds
SZ#000582 regression cost time 66.3 seconds
SZ#000809 regression cost time 67.2 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#002346 regression cost time 35.4 seconds
SZ#002184 regression cost time 67.2 seconds
SZ#000803 regression cost time 77.0 seconds
SZ#000004 regression cost time 86.0 seconds
SZ#002021 regression cost time 86.0 seconds


  keepdims=keepdims)
  ret = ret.dtype.type(ret / rcount)


SZ#000573 regression cost time 90.2 seconds
SZ#002020 regression cost time 90.4 seconds
list.remove(x): x not in list
SZ#000005 regression cost time 63.6 seconds
SZ#000806 regression cost time 99.1 seconds
SZ#000001 regression cost time 101.5 seconds
SZ#000576 regression cost time 104.3 seconds
list.remove(x): x not in list
SZ#002023 regression cost time 104.7 seconds
SZ#002507 regression cost time 71.4 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#002186 regression cost time 27.8 seconds
SZ#000807 regression cost time 114.5 seconds
SZ#002348 regression cost time 29.0 seconds
SZ#000581 regression cost time 116.3 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#000002 regression cost time 119.3 seconds
SZ#002022 regression cost time 121.4 seconds
list.remove(x): x not in list
SZ#002185 regression cost time 78.1 seconds
SZ#000584 regression cost time 87.8 seconds
SZ#002349 regression cost time 22.1 seconds
SZ#002509 regression cost time 40.7 seconds
SZ#000585 regression cost time 63.3 seconds
SZ#000810 regression cost time 92.4 seconds
SZ#002025 regression cost time 59.3 seconds
SZ#002510 regression cost time 32.0 seconds
SZ#002187 regression cost time 35.9 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#002350 regression cost time 20.5 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#000006 regression cost time 73.8 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#002188 regression cost time 24.9 seconds
SZ#002508 regression cost time 75.4 seconds
SZ#002024 regression cost time 107.9 seconds
list.remove(x): x not in list
SZ#002511 regression cost time 28.9 seconds
SZ#000007 regression cost time 62.8 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#002512 regression cost time 17.1 seconds
SZ#002026 regression cost time 54.6 seconds
SZ#000811 regression cost time 79.9 seconds
SZ#000586 regression cost time 65.5 seconds


  keepdims=keepdims)
  ret = ret.dtype.type(ret / rcount)


SZ#002351 regression cost time 27.6 seconds
SZ#000587 regression cost time 45.1 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#002189 regression cost time 29.2 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#002352 regression cost time 20.2 seconds
SZ#000812 regression cost time 66.2 seconds
SZ#002027 regression cost time 48.6 seconds
SZ#000813 regression cost time 49.4 seconds
SZ#000008 regression cost time 60.3 seconds
SZ#000010 regression cost time 37.2 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#000009 regression cost time 52.7 seconds
SZ#002190 regression cost time 33.8 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#000590 regression cost time 39.0 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#002514 regression cost time 18.9 seconds
SZ#002515 regression cost time 14.8 seconds
SZ#002354 regression cost time 16.5 seconds
SZ#000815 regression cost time 54.6 seconds
SZ#000589 regression cost time 58.6 seconds
SZ#002353 regression cost time 26.4 seconds
SZ#002191 regression cost time 30.9 seconds
SZ#000011 regression cost time 38.8 seconds
SZ#000816 regression cost time 51.6 seconds
SZ#000592 regression cost time 34.4 seconds
SZ#002355 regression cost time 17.0 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#000012 regression cost time 35.9 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#002029 regression cost time 55.7 seconds
SZ#002192 regression cost time 33.5 seconds
SZ#000818 regression cost time 45.0 seconds
SZ#002516 regression cost time 20.1 seconds
SZ#000819 regression cost time 39.9 seconds
SZ#002193 regression cost time 24.5 seconds
SZ#002356 regression cost time 12.2 seconds
SZ#002031 regression cost time 39.8 seconds
SZ#002030 regression cost time 49.5 seconds




SZ#000591 regression cost time 50.8 seconds
SZ#002028 regression cost time 75.6 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


list.remove(x): x not in list
SZ#002517 regression cost time 15.2 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#002194 regression cost time 22.4 seconds
SZ#002357 regression cost time 14.2 seconds
SZ#000014 regression cost time 51.8 seconds
SZ#000017 regression cost time 24.1 seconds
SZ#002518 regression cost time 19.0 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#002519 regression cost time 14.4 seconds
SZ#002195 regression cost time 21.6 seconds
SZ#000820 regression cost time 47.7 seconds


In [None]:
print('Helo')