# Testing numbers

In [1]:
0.1 + 0.2 == 0.3

False

In [2]:
0.1 + 0.1 == 0.2

True

In [3]:
0.1

0.1

In [4]:
print('{0:.50f}'.format(0.1))

0.10000000000000000555111512312578270211815834045410


# Decimal

In [5]:
(0.1 + 0.2 - 0.3) * int(10e20)

55511.15123125783

In [6]:
import decimal

In [7]:
D = decimal.Decimal

In [8]:
(D('0.1') + D('0.2') - D('0.3')) * int(10e20)

Decimal('0.0')

# Timestamps

In [9]:
import time
time.time()

1604598630.5271592

# Nanoseconds

In [10]:
import pandas as pd

PATH = 'data/hpt_xetra_20180816_sample.csv'

df_precision_loss = pd.read_csv(PATH, delimiter=';')

In [11]:
df_precision_loss.head()

Unnamed: 0,MarketSegmentID,SecurityID,ExecID,ApplSeqNum,PartitionID,CompletionIndicator,TradeCondition,AggressorSide,LastQty,LastPx,RestingHiddenQty,RestingCxlQty,RequestTime,AggressorTime,TransactTime,EOBICaptTime,ETICaptTime
0,52408,2504501,1534402803098524274,30464,54,1,255,1,111,4264000000,,0,1.534403e+18,1.534403e+18,1534402803098574361,1534402803098576041,
1,52408,2504501,1534402803130589112,30499,54,1,255,1,99,4264000000,,0,1.534403e+18,1.534403e+18,1534402803130631999,1534402803130633441,1.534403e+18
2,52408,2504501,1534402803135381294,30505,54,0,255,2,137,4263000000,,0,1.534403e+18,1.534403e+18,1534402803135432113,1534402803135433606,1.534403e+18
3,52408,2504501,1534402803156810177,30524,54,1,255,2,92,4263000000,,0,1.534403e+18,1.534403e+18,1534402803156846800,1534402803156848412,1.534403e+18
4,52408,2504501,1534402803156878665,30525,54,1,255,2,20,4263000000,,0,1.534403e+18,1.534403e+18,1534402803156914432,1534402803156916054,1.534403e+18


In [12]:
df_precision_loss.dtypes

MarketSegmentID          int64
SecurityID               int64
ExecID                   int64
ApplSeqNum               int64
PartitionID              int64
CompletionIndicator      int64
TradeCondition           int64
AggressorSide            int64
LastQty                  int64
LastPx                   int64
RestingHiddenQty       float64
RestingCxlQty            int64
RequestTime            float64
AggressorTime          float64
TransactTime             int64
EOBICaptTime             int64
ETICaptTime            float64
dtype: object

## Str to int vs. int to float to int

In [13]:
df_correct = pd.read_csv(PATH, dtype=str, delimiter=';') #Note that this requires mapping numeric types later. In HPT data it is easy as all values are integers.

# Replace NaNs with zeros

df_correct['ETICaptTime_int'] = df_correct['ETICaptTime'].fillna(0).astype(int)

df_precision_loss['ETICaptTime_int'] = df_precision_loss['ETICaptTime'].fillna(0).astype(int)

# Simple check for precision loss, look at the last bits

diff = df_correct['ETICaptTime_int'] - df_precision_loss['ETICaptTime_int']
diff.head()

OverflowError: Python int too large to convert to C long

## Using explicit converter

In [None]:
import numpy as np

# convert missing values to zero
def time_converter(num):
    try:
        return np.int64(num)
    except:
        return np.int64(0)
    
df_converted = pd.read_csv(PATH,sep=";", 
                         converters = {'ETICaptTime'  : time_converter, 
                                       'EOBICaptTime' : time_converter, 
                                       'TransactTime' : time_converter,
                                       'AggressorTime': time_converter,
                                       'RequestTime'  : time_converter},
                      usecols=['MarketSegmentID', 'ExecID',
                               'PartitionID', 'AggressorSide', 'LastQty',
                               'RequestTime',   # t3n
                               'AggressorTime', # t5
                               'TransactTime',  # t9
                               'EOBICaptTime',  # t9d
                               'ETICaptTime'    # t3a
                              ],
                    dtype = {'MarketSegmentID':np.int32,  
                             'ExecID':np.int64,
                             'PartitionID':np.int8,
                             'AggressorSide':np.int8,
                             'LastQty':np.int64},
                     )

In [None]:
df_correct['ETICaptTime_int'] - df_converted['ETICaptTime']