In [None]:
def init_ema_variations(data):
    """ Init EMA vector for variation-based features.
    Args:
        - data (Data object): data info.
    Return:
        - em (np array): initialized EMA vector. """
    # init exponetial means
    em = np.zeros((data.lbd.shape))+SymbolVar[0]
    for i in range(nExS-mW):
        em = data.lbd*em+(1-data.lbd)*SymbolVar[i]
        
    return em

def init_parsar(data, firstSymbol):
    """ Init ParSar object.
    Args:
        - data (Data object): data-related parameters.
        - firstSymbol (float): first symbol value (bid, variation, ...).
    Return:
        - ParSar (ParSar object): initialized ParSar structure. """
    class ParSar:
        oldSARh = firstSymbol+np.zeros((len(data.parsars)))
        oldSARl = firstSymbol+np.zeros((len(data.parsars)))
        HP = np.zeros((len(data.parsars)))
        LP = 100000+np.zeros((len(data.parsars)))
        stepAF = 0.02
        AFH = stepAF+np.zeros((len(data.parsars)))
        AFL = stepAF+np.zeros((len(data.parsars)))
        maxAF = np.array(data.parsars)*stepAF
    
    return ParSar

def update_parsar(parsar, min_value, max_value, thisPeriodVariations):
    """ Update ParSar values and structure.
    Args:
        - parsar (ParSar object): structure with current values of Par Sar.
        - min_value (int): min value of current window.
        - max_value (int): max value of current window
        - thisPeriodVariations (np array): variation values of current period.
    Return:
        - parsar (ParSar object): updated structure with current values of Par Sar.
        - parSARhigh (np vector): current high par sar values.
        - parSARlow (np vector): current low par sar values."""
    thisParSARhigh = np.zeros((len(data.parsars)))
    thisParSARlow = np.zeros((len(data.parsars)))
    for ps in range(len(data.parsars)):
        parsar.HP[ps] = np.max([max_value,parsar.HP[ps]])
        parsar.LP[ps] = np.min([min_value,parsar.LP[ps]])
        thisParSARhigh[ps] = parsar.oldSARh[ps]+parsar.AFH[ps]*(parsar.HP[ps]-parsar.oldSARh[ps])
        thisParSARlow[ps] = parsar.oldSARl[ps]-parsar.AFL[ps]*(parsar.oldSARl[ps]-parsar.LP[ps])
        if parSARhigh[ps]<parsar.HP[ps]:
            parsar.AFH[ps] = np.min([parsar.AFH[ps]+parsar.stepAF,parsar.maxAF[ps]])
            parsar.LP[ps] = np.min(thisPeriodVariations)
        if parSARlow[ps]>parsar.LP[ps]:
            parsar.AFL[ps] = np.min([parsar.AFH[ps]+parsar.stepAF,parsar.maxAF[ps]])
            parsar.HP[ps] = np.max(thisPeriodVariations)
            parsar.oldSARh[ps] = parSARhigh[ps]
            parsar.oldSARl[ps] = parSARlow[ps]
            
    return parsar, thisParSARhigh, thisParSARlow

def get_features_from_var_raw(data, features, DateTime, SymbolVar):
    """
    Function that calculates features from raw data in per batches
    Args:
        - data
        - features
        - DateTime
        - SymbolBid
    Returns:
        - features
    """    
    tic = time.time()
    # init scalars
    nExS = data.nEventsPerStat
    mW = data.movingWindow
    nE = DateTime.shape[0]
    m = int(np.floor((nE/nExS-1)*nExS/mW)+1)
    secsInDay = 86400.0
    
    em = init_ema_variations(data)
    
    parSar = init_parsar(data, SymbolVar[0])
    
    batch_size = 10000000
    par_batches = int(np.ceil(m/batch_size))
    l_index = 0
    # loop over batched
    for b in range(par_batches):
        # get m
        m_i = np.min([batch_size, m-b*batch_size])
        
        # init structures
        EMA = np.zeros((m_i, em.shape[0]))
        variations = np.zeros((m_i))
        variance = np.zeros((m_i))
        maxValue = np.zeros((m_i))
        minValue = np.zeros((m_i))
        timeInterval = np.zeros((m_i))
        timeSecs = np.zeros((m_i))
        parSARhigh = np.zeros((m_i, len(data.parsars)))
        parSARlow = np.zeros((m_i, len(data.parsars)))
 
        for mm in range(m_i):
            
            startIndex = l_index+mm*mW
            endIndex = startIndex+nExS
            thisPeriod = range(startIndex,endIndex)
            thisPeriodVariations = SymbolVar[thisPeriod]
            
            newBidsIndex = range(endIndex-mW,endIndex)
            for i in newBidsIndex:
                #a=data.lbd*em/(1-data.lbd**i)+(1-data.lbd)*tradeInfo.SymbolBid.loc[i]
                em = data.lbd*em+(1-data.lbd)*SymbolVar[i]
                
            t0 = dt.datetime.strptime(DateTime[thisPeriod[0]].decode("utf-8"),'%Y.%m.%d %H:%M:%S')
            te = dt.datetime.strptime(DateTime[thisPeriod[-1]].decode("utf-8"),'%Y.%m.%d %H:%M:%S')
            
            variations[mm] = SymbolVar[thisPeriod[-1]]
            EMA[mm,:] = em
            variance[mm] = np.var(thisPeriodVariations)
            timeInterval[mm] = (te-t0).seconds/nExS
            maxValue[mm] = np.max(thisPeriodVariations)
            minValue[mm] = np.min(thisPeriodVariations)
            timeSecs[mm] = (te.hour*60*60+te.minute*60+te.second)/secsInDay
            
            parsar, parSARhigh[mm,:], parSARlow[mm,:] = update_parsar(parsar, minValue[mm], maxValue[mm], thisPeriodVariations)
            
        # end of for mm in range(m_i):
        l_index = startIndex+mW
        #print(l_index)
        toc = time.time()
        print("\t\tmm="+str(b*batch_size+mm+1)+" of "+str(m)+". Total time: "+str(np.floor(toc-tic))+"s")
        # update features vector
        init_idx = b*batch_size
        end_idx = b*batch_size+m_i

        nF = 0
        features[init_idx:end_idx,nF] = variations

        nF += 1
        features[init_idx:end_idx,nF:nF+data.lbd.shape[0]] = EMA

        nF += data.lbd.shape[0]
        logVar = 10*np.log10(variance/data.std_var+1e-10)
        features[init_idx:end_idx,nF] = logVar

        nF += 1
        logInt = 10*np.log10(timeInterval/data.std_time+0.01)
        features[init_idx:end_idx,nF] = logInt
        
        
        for ps in range(len(data.parsars)):
            nF += 1
            features[init_idx:end_idx,nF] = parSARhigh[:,ps]
            nF += 1
            features[init_idx:end_idx,nF] = parSARlow[:,ps]
        
        nF += 1
        features[init_idx:end_idx,nF] = timeSecs
        
        nF += 1
        features[init_idx:end_idx,nF] = maxValue-variations
        
        nF += 1
        features[init_idx:end_idx,nF] = variations-minValue
        
        nF += 1
        features[init_idx:end_idx,nF] = minValue/maxValue
        
        for i in range(data.lbd.shape[0]):          
            nF += 1        
            features[init_idx:end_idx,nF] = bids/EMA[:,i]
            
    return features