In [4]:
import datetime as dt
import time
import logging as log
from multiprocessing import Pool, TimeoutError

%run BreakSectionManager.ipynb
%run Util.ipynb
%run Constant.ipynb
%run Metric.ipynb
%run Operation.ipynb
%run Account.ipynb
%run Policy_Loader.ipynb
%run Evaluation.ipynb


class RocketRegression:
    
    def __init__(self, policy_name):
        self.policy_name = policy_name
        self.policy_loader = Policy_Loader(self.policy_name)
        self.debug = self.policy_loader.enabled_log_debug()
        self.context = self.policy_loader.get_context()
        
        self.level = self.context['level']
        self.short = self.context['short']
        self.mid = self.context['mid']
        
    def build_operation_file_path(self,symbol,regression_end_date):
        return report_dir+'operations_'+symbol+'_'+str(self.short)+'_'+str(self.mid)+'_'+self.level+'_'+regression_end_date+'_'+operation_version+'.csv'
    
    def build_deal_file_path(self,symbol,regression_end_date):
        return report_dir+'deals_'+symbol+'_'+str(self.short)+'_'+str(self.mid)+'_'+self.level+'_'+regression_end_date+'_'+operation_version+'.csv'
        
    def build_asset_file_path(self,symbol,regression_end_date):
        return report_dir+'account_asset_'+symbol+'_'+str(self.short)+'_'+str(self.mid)+'_'+self.level+'_'+regression_end_date+'_'+operation_version+'.csv'
    
    def persist(self,account,operation_df,symbol,regression_end_date):
        account.get_asset_df().to_csv(self.build_asset_file_path(symbol,regression_end_date),index=False)
        account.deal_df.to_csv(self.build_deal_file_path(symbol,regression_end_date),index=False)
        operation_df.to_csv(self.build_operation_file_path(symbol,regression_end_date), index=False)
        
    
    def start_regression_on_one_stock(self,symbol,regression_end_date):
        start = time.time()
        sectionManager = BreakSectionManager(symbol,self.context)
        section_df = sectionManager.get_all_break_sections()
        section_df = section_df[section_df['d_s_datetime']<to_datetime(regression_end_date)]
        
        metric = Metric(symbol,self.context)
        
        stock_start_date = metric.get_stock_start_date()
        stock_end_date = metric.get_stock_end_date()
        log.info('stock_start_date ' + stock_start_date+' stock_end_date:'+stock_end_date)
        
        account = Account(stock_start_date,stock_end_date,self.context,self.policy_name)
        operation = Operation(account,symbol,self.context,self.policy_name)
        
        for key,row in section_df.iterrows():
            start_date = row['d_s_date']
            end_date = row['d_e_date']
            
            start_datetime = to_datetime(start_date)
            end_datetime = to_datetime(end_date)

            buy_price = metric.get_cur_price(start_date)
            
            if(self.debug==True):
                print('\n'+row['symbol']+' section '+start_date+'===>'+end_date)
            while(start_datetime<=end_datetime):
                cur_date = start_datetime.strftime(YMD_format)
                
                if(metric.is_today_open(cur_date)):
                    account.daily_audit(cur_date)

                    if(account.can_open_new_stock()==True):
                        if(operation.is_buy_point(cur_date)==True):
                            operation.open_opsition(cur_date)

                    if(account.has_shares()==True):
                        if(operation.is_sell_point(cur_date)==True):
                            operation.sell_stock(cur_date)

                start_datetime += dt.timedelta(days = 1)
                
        operation_df = operation.get_operations()
        self.persist(account,operation_df,symbol,regression_end_date)
        
        evaluation = Evaluation(account,self.context,self.policy_name)
        evaluation_df = evaluation.get_evaluation_report(symbol,stock_start_date,stock_end_date,
                                                         operation_df)

        end = time.time()
        if(self.debug==False):
            print(symbol+' regression cost time ' + str(round(end-start,1))+' seconds')
        return evaluation_df
    
    
    def start_sync_regression(self,scale,regression_end_date):
        evaluation_df = pd.DataFrame(columns=evaluation_columns)
        symbols = get_symbols(scale,home)
        total = len(symbols) 
        
        for symbol in symbols:
            one_evaluation = self.start_regression_on_one_stock(symbol,regression_end_date)
            evaluation_df = pd.concat([one_evaluation,evaluation_df], ignore_index=True, sort=False)
           
        
        evaluation_df['loss_times'] =evaluation_df['loss_times'].astype('int')
        evaluation_df['deal_count'] =evaluation_df['deal_count'].astype('int')
        evaluation_df['r2'] =evaluation_df['r2'].astype('float')
        evaluation_df['rar'] =evaluation_df['rar'].astype('float')

        return evaluation_df
    
    def start_regression(self,scale,regression_end_date,process_num):
        print('context:' + str(self.context))
        
        if(process_num==1):
            return self.start_sync_regression(scale,regression_end_date)
        
        evaluation_df = pd.DataFrame(columns=evaluation_columns)
        symbols = get_symbols(scale,home)
        total = len(symbols) 
        
        pool = Pool(processes=process_num)
        tasks = []
        for symbol in symbols:
            task = pool.apply_async(self.start_regression_on_one_stock, (symbol,regression_end_date))
            tasks.append(task)
            
        for task in tasks:
            try:
                one_evaluation = task.get()
                evaluation_df = pd.concat([one_evaluation,evaluation_df], ignore_index=True, sort=False)
            except Exception as e:
                print(str(e))
        
        pool.close()
        pool.join()
        
        evaluation_df['loss_times'] =evaluation_df['loss_times'].astype('int')
        evaluation_df['deal_count'] =evaluation_df['deal_count'].astype('int')
        evaluation_df['r2'] =evaluation_df['r2'].astype('float')
        evaluation_df['rar'] =evaluation_df['rar'].astype('float')

        return evaluation_df
    

In [5]:
regression = RocketRegression('rocket_break_v4_enable_reducing_position_based_ATR.json')

regression_end='2019-05-30'
scale=300
process_num=24

evaluation_df = regression.start_regression(scale,regression_end,process_num)
evaluation_df.describe()

context:{'short': 20, 'mid': 240, 'scale_start': 0, 'scale_end': 3, 'level': 'Day'}


  keepdims=keepdims)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#002505 regression cost time 15.7 seconds


  keepdims=keepdims)
  ret = ret.dtype.type(ret / rcount)


SZ#002343 regression cost time 18.1 seconds
SZ#002345 regression cost time 19.3 seconds
SZ#002181 regression cost time 21.3 seconds


  keepdims=keepdims)
  ret = ret.dtype.type(ret / rcount)


SZ#002183 regression cost time 22.4 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#002182 regression cost time 25.3 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#002346 regression cost time 17.3 seconds
SZ#000582 regression cost time 35.4 seconds
SZ#000809 regression cost time 36.2 seconds
SZ#000803 regression cost time 38.6 seconds
SZ#002184 regression cost time 36.3 seconds
SZ#002021 regression cost time 41.2 seconds


  keepdims=keepdims)
  ret = ret.dtype.type(ret / rcount)


SZ#000573 regression cost time 45.8 seconds
SZ#000004 regression cost time 46.2 seconds
SZ#002020 regression cost time 47.6 seconds
list.remove(x): x not in list
SZ#000806 regression cost time 48.8 seconds
SZ#002023 regression cost time 50.7 seconds
SZ#000576 regression cost time 52.4 seconds
SZ#000005 regression cost time 35.0 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#002186 regression cost time 13.0 seconds
SZ#002507 regression cost time 39.5 seconds
SZ#002348 regression cost time 15.6 seconds
SZ#000807 regression cost time 58.4 seconds
SZ#000001 regression cost time 58.6 seconds
list.remove(x): x not in list
SZ#000581 regression cost time 63.5 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#000002 regression cost time 65.6 seconds
SZ#002022 regression cost time 66.1 seconds
list.remove(x): x not in list
SZ#000584 regression cost time 47.8 seconds
SZ#002185 regression cost time 45.9 seconds
SZ#002349 regression cost time 15.8 seconds
SZ#002509 regression cost time 25.4 seconds
SZ#000585 regression cost time 39.3 seconds
SZ#000810 regression cost time 54.9 seconds
SZ#002510 regression cost time 23.1 seconds
SZ#002025 regression cost time 36.5 seconds
SZ#002187 regression cost time 27.1 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#000006 regression cost time 44.4 seconds
SZ#002508 regression cost time 47.3 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#002188 regression cost time 17.4 seconds


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SZ#002350 regression cost time 15.7 seconds


Process ForkPoolWorker-39:
Process ForkPoolWorker-31:
Process ForkPoolWorker-37:
Process ForkPoolWorker-44:
Process ForkPoolWorker-48:
Process ForkPoolWorker-38:
Process ForkPoolWorker-34:
Process ForkPoolWorker-25:
Process ForkPoolWorker-47:
Process ForkPoolWorker-27:
Process ForkPoolWorker-46:
Process ForkPoolWorker-29:
Process ForkPoolWorker-45:
Process ForkPoolWorker-30:
Process ForkPoolWorker-33:
Process ForkPoolWorker-26:
Process ForkPoolWorker-35:
Process ForkPoolWorker-42:
Process ForkPoolWorker-40:
Process ForkPoolWorker-36:
Process ForkPoolWorker-32:
Process ForkPoolWorker-43:
Process ForkPoolWorker-28:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/opt/conda/lib/python3.6/multiprocessing/process.py", li

  File "/opt/conda/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-4-6bcd192e9961>", line 80, in start_regression_on_one_stock
    if(operation.is_sell_point(cur_date)==True):
  File "<ipython-input-4-6bcd192e9961>", line 80, in start_regression_on_one_stock
    if(operation.is_sell_point(cur_date)==True):
  File "<ipython-input-4-c3ac7e5ce17c>", line 166, in is_sell_point
    return self.exec_chief.is_sell_point(cur_date)
  File "<ipython-input-4-c3ac7e5ce17c>", line 160, in is_buy_point
    return self.exec_chief.is_buy_point(cur_date)
  File "<ipython-input-4-bfadc0667496>", line 20, in get_market_status
    if(self.is_strong_market(cur_date)==True):
  File "/opt/conda/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/conda/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "<i

  File "<ipython-input-4-6bcd192e9961>", line 72, in start_regression_on_one_stock
    if(metric.is_today_open(cur_date)):
  File "<ipython-input-4-c3ac7e5ce17c>", line 160, in is_buy_point
    return self.exec_chief.is_buy_point(cur_date)
  File "<ipython-input-4-40f82d7e16a0>", line 107, in is_sell_point
    sell_type = self.which_sell_type(cur_date)
  File "<ipython-input-4-443fbd43c7bf>", line 225, in get_today_stock_asset
    latest_price = d_k_metric.get_latest_price(cur_date)
  File "<ipython-input-4-40f82d7e16a0>", line 116, in does_broken_ATR
    cur_price = self.metric.get_cur_price(cur_date)
  File "<ipython-input-4-40f82d7e16a0>", line 217, in which_sell_type
    if(self.does_broken_ATR(cur_date)==True):
  File "/opt/conda/lib/python3.6/site-packages/pandas/core/indexing.py", line 189, in __setitem__
    self._setitem_with_indexer(indexer, value)
  File "<ipython-input-4-40f82d7e16a0>", line 118, in does_broken_ATR
    broken_atr_price = self.get_broken_atr_price(cur_date)


  File "/opt/conda/lib/python3.6/site-packages/pandas/core/generic.py", line 5154, in _convert
    copy=copy)).__finalize__(self)
  File "/opt/conda/lib/python3.6/site-packages/pandas/core/ops.py", line 1143, in na_op
    result = _comp_method_OBJECT_ARRAY(op, x, y)
  File "<ipython-input-4-0c98c55bce2b>", line 69, in atr
    tr_value = self.tr(row['date'])
  File "<ipython-input-4-40f82d7e16a0>", line 118, in does_broken_ATR
    broken_atr_price = self.get_broken_atr_price(cur_date)
  File "/opt/conda/lib/python3.6/site-packages/pandas/core/indexing.py", line 451, in _setitem_with_indexer
    self.obj._data = self.obj.append(value)._data
  File "<ipython-input-4-fc443e25477c>", line 81, in <lambda>
    self.day_k['week'] = self.day_k['date'].apply(lambda x: to_datetime(x).isocalendar()[1])
  File "<ipython-input-4-40f82d7e16a0>", line 217, in which_sell_type
    if(self.does_broken_ATR(cur_date)==True):
  File "/opt/conda/lib/python3.6/site-packages/pandas/core/ops.py", line 1331, in 

  File "<ipython-input-4-0c98c55bce2b>", line 80, in tr
    last_day = self.yesterday(cur_date)
  File "/opt/conda/lib/python3.6/site-packages/pandas/core/internals.py", line 1258, in take_nd
    allow_fill=True, fill_value=fill_value)
  File "/opt/conda/lib/python3.6/site-packages/pandas/core/ops.py", line 1143, in na_op
    result = _comp_method_OBJECT_ARRAY(op, x, y)
  File "/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py", line 536, in _init_ndarray
    return create_block_manager_from_blocks([values], [columns, index])
  File "/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py", line 2705, in _getitem_slice
    return self._slice(key, axis=0)
  File "<ipython-input-4-0c98c55bce2b>", line 80, in tr
    last_day = self.yesterday(cur_date)
  File "/opt/conda/lib/python3.6/site-packages/pandas/core/internals.py", line 1258, in take_nd
    allow_fill=True, fill_value=fill_value)
  File "/opt/conda/lib/python3.6/site-packages/pandas/core/ops.py", line 1122, in _comp

KeyboardInterrupt: 

KeyboardInterrupt
KeyboardInterrupt
  File "/opt/conda/lib/python3.6/site-packages/pandas/core/algorithms.py", line 1600, in take_nd
    arr = np.asarray(arr)
  File "/opt/conda/lib/python3.6/site-packages/pandas/core/internals.py", line 5440, in is_uniform_join_units
    all(not ju.is_na or ju.block.is_extension for ju in join_units) and
  File "/opt/conda/lib/python3.6/site-packages/pandas/core/series.py", line 3804, in isnull
    return super(Series, self).isnull()
  File "/opt/conda/lib/python3.6/site-packages/pandas/core/generic.py", line 4358, in __finalize__
    for name in self._metadata:
  File "/opt/conda/lib/python3.6/site-packages/pandas/core/dtypes/common.py", line 152, in is_sparse
    return isinstance(arr, (ABCSparseArray, ABCSparseSeries))
KeyboardInterrupt
  File "/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py", line 2724, in _getitem_array
    return self._take(indexer, axis=0)
  File "/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py", line 26

  File "<ipython-input-4-fc443e25477c>", line 81, in <lambda>
    self.day_k['week'] = self.day_k['date'].apply(lambda x: to_datetime(x).isocalendar()[1])
KeyboardInterrupt
  File "/opt/conda/lib/python3.6/site-packages/pandas/core/internals.py", line 1258, in take_nd
    allow_fill=True, fill_value=fill_value)
KeyboardInterrupt
  File "<frozen importlib._bootstrap>", line 1007, in _handle_fromlist
  File "<ipython-input-4-4e2df8878709>", line 7, in to_datetime
    return datetime.strptime(date_str,YMD_format)
KeyboardInterrupt
  File "/opt/conda/lib/python3.6/site-packages/pandas/core/algorithms.py", line 1613, in take_nd
    dtype, fill_value = maybe_promote(arr.dtype, fill_value)
  File "/opt/conda/lib/python3.6/site-packages/pandas/core/dtypes/cast.py", line 296, in maybe_promote
    elif is_extension_array_dtype(dtype) and isna(fill_value):
  File "/opt/conda/lib/python3.6/_strptime.py", line 565, in _strptime_datetime
    tt, fraction = _strptime(data_string, format)
  File "/opt

SZ#002511 regression cost time 23.3 seconds


  keepdims=keepdims)
  ret = ret.dtype.type(ret / rcount)


SZ#000011 regression cost time 39.1 seconds


In [None]:
print('Helo')