In [None]:
%load_ext autoreload

%autoreload 2

In [None]:
%load_ext line_profiler
%load_ext memory_profiler
%load_ext Cython

In [None]:
%pylab inline

In [None]:
import sys, argparse, logging
import pymssql
import _mssql
from datetime import datetime, time
from decimal import Decimal
import pymongo
from pymongo import MongoClient
from tqdm import tqdm, tnrange, tqdm_notebook
import pandas as pd
from tmqr.settings import *
from tmqrfeed.assetsession import AssetSession
import pickle
import pytz
try:
    from tmqr.settings_local import *
except:
    pass

from tmqrfeed.datafeed import DataFeed



In [None]:
# Init mongo asset index
client = MongoClient(MONGO_CONNSTR)
mongo_db = client[MONGO_DB]


In [None]:
session_settings = [{
                                    'decision': '10:40',
                                    'dt': datetime(1900, 1, 1),
                                    'execution': '10:45',
                                    'start': '00:32'},

                                {
                                    'decision': '11:40',
                                    'dt': datetime(2011, 9, 30),
                                    'execution': '11:45',
                                    'start': '01:32'},

                                {
                                    'decision': '12:40',
                                    'dt': datetime(2012, 1, 1),
                                    'execution': '12:45',
                                    'start': '02:32'},
                                ]
tz = pytz.timezone('US/Pacific')
sess = AssetSession(session_settings, tz)

In [None]:
feed = DataFeed()

In [None]:
def load_case1():
    dframes_list = []
    for data in mongo_db['quotes_intraday'].find({'tckr': 'US.F.CL.Q12.120720'}):
        df = pickle.loads(data['ohlc'])
        dframes_list.append(df)    

    result = pd.concat(dframes_list)

In [None]:
def load_case2():
    result_df = pd.DataFrame()
    for data in mongo_db['quotes_intraday'].find({'tckr': 'US.F.CL.Q12.120720'}):
        df = pickle.loads(data['ohlc'])
        result_df = pd.concat([result_df, df])    

    return result_df

In [None]:
%timeit load_case1()

In [None]:
%timeit load_case2()

In [None]:
%lprun -f load_case1 load_case1()

In [None]:
%lprun -f load_case2 load_case2()

In [None]:
df = load_case2()
df.tz_convert('US/Pacific', copy=False)

In [None]:
%mprun -f df.tz_convert df.tz_convert('US/Pacific', copy=True)

In [None]:
dt1 = df.index[-1]


In [None]:
%lprun -f sess.date_is_insession df.index.map(lambda x: sess.date_is_insession(x))

In [None]:
%lprun -f sess.date_is_insession sess.date_is_insession(dt1)

In [None]:
%timeit df.between_time('9:30', '10:45')

In [None]:
%timeit df.index.map(lambda x: sess.date_is_insession(x))

In [None]:
%mprun -f sess.filter_dataframe sess.filter_dataframe(df)

In [None]:
%timeit sess.filter_dataframe(df)

In [None]:
date_start = datetime(2012, 1, 1)
date_end = datetime(2013, 1, 1)

%timeit df[(df.index >= date_start) & (df.index < date_end)]

In [None]:
%timeit df.ix[date_start:date_end]

In [None]:
pd.Timestamp(np_date).date()

In [None]:
def datetime64_to_time_of_day(datetime64_array):
    """
    Return a new array. For every element in datetime64_array return the time of day (since midnight).
    >>> datetime64_to_time_of_day(np.array(['2012-01-02T01:01:01.001Z'],dtype='datetime64[ms]'))
    array([3661001], dtype='timedelta64[ms]')
    >>> datetime64_to_time_of_day(np.datetime64('2012-01-02T01:01:01.001Z','[ms]'))
    numpy.timedelta64(3661001,'ms')
    """
    day = datetime64_array.astype('datetime64[D]').astype(datetime64_array.dtype)
    time_of_day = datetime64_array - day
    return time_of_day

In [None]:
%timeit datetime64_to_time_of_day(df.index.values)

In [None]:
(datetime(2010, 1, 1, 18, 29) - datetime(2010, 1, 1)).seconds * 1000000000

In [None]:
 datetime64_to_time_of_day(df.index.values)

In [None]:
np.datetime64(datetime(2011, 1,1))

In [None]:
sess_params = [x for x in sess.sessions]

In [None]:
sess_params

In [None]:
%%cython -a
import numpy as np
import pandas as pd
from datetime import datetime

def filter_index(self, dataframe_index, sess_params):
    """
    Creates boolean filter array used to filter dataframe from out-of-session datapoints
    :param dataframe_index:
    :return:
    """

    flt = np.empty(len(dataframe_index))
    flt.fill(False)
    start_time = 0
    end_time = 0
    next_sess_date = 0

    
    date_array = dataframe_index.values
    time_array = date_array.view('datetime64[D]')
    
    date = date_array[0]
    for sess_idx, sess in enumerate(reversed(sess_params)):
        if date >= sess['dt']:
            start_datetime = np.datetime64(datetime.combine(date, sess['start']))
            end_datetime = np.datetime64(datetime.combine(date, sess['decision']))
            
            

            if sess_idx > 0:
                next_sess_date = np.datetime64(sess_params[len(sess_params) - sess_idx]['dt'])
            else:
                next_sess_date = 0
    
        
    for i in range(len(dataframe_index)):
        dt = date_array[i]
        t = time_array[i]

        if t >= start_time and t <= end_time:
            flt[i] = 1

    return flt

In [None]:
sess_params[0]['start']

In [None]:
%timeit df.index.values.astype('datetime64[D]')

In [None]:
%timeit df.index.values.view('datetime64[D]')

In [None]:
dt = df.index.values

In [None]:
date_arr = dt.astype('datetime64[D]').view('uint64')

In [None]:
date_arr

In [None]:
date_arr2 = dt.view('uint64')

In [None]:
date_arr2

In [None]:
datetime.fromtimestamp(, tz=pytz.timezone("UTC"))

In [None]:
sess_params

In [None]:
def sess_filt():
    df_list = []
    for i in range(1, len(sess_params)):
        if i < len(sess_params)-1:
            date_start = sess_params[i-1]['dt']
            date_end = sess_params[i]['dt']
            time_start = sess_params[i-1]['start']
            time_end = sess_params[i-1]['decision']
        else:
            date_start = sess_params[i-1]['dt']        
            date_end = datetime(2100, 1, 1)        
            time_start = sess_params[i-1]['start']
            time_end = sess_params[i-1]['decision']

        print('DateStart: {0} DateEnd: {1}'.format(date_start, date_end))
        print('TimeStart: {0} TimeEnd: {1}'.format(time_start, time_end))
        tmp_df = df[(df.index >= date_start) & (df.index < date_end)]
        df_list.append(tmp_df.between_time(time_start, time_end))

    df_sess = pd.concat(df_list)

In [None]:
%timeit sess_filt()

In [None]:
[x.utcoffset().seconds/3600 for x in df.index]

In [None]:
[print(x) for x in df.index]