In [1]:
import csv

In [2]:
# Logs

order_log = []
trade_log = []

class Action:
    REVOKE = 0
    POST = 1
    MATCH = 2

CODES = ['USD000000TOD', 'USD000UTSTOM', 'EUR_RUB__TOD', 'EUR_RUB__TOM', 'EURUSD000TOD', 'EURUSD000TOM']

instruments_info = {'USD000000TOD': {'SCHEDULE': 174500000000, 'PRICE_STEP': 0.0025, 'INDEX':0},
                    'USD000UTSTOM': {'SCHEDULE': 235000000000, 'PRICE_STEP': 0.0025, 'INDEX':1},
                    'EUR_RUB__TOD': {'SCHEDULE': 150000000000, 'PRICE_STEP': 0.0025, 'INDEX':2},
                    'EUR_RUB__TOM': {'SCHEDULE': 235000000000, 'PRICE_STEP': 0.0025, 'INDEX':3},
                    'EURUSD000TOM': {'SCHEDULE': 235000000000, 'PRICE_STEP': 0.00001, 'INDEX':4},
                    'EURUSD000TOD': {'SCHEDULE': 150000000000, 'PRICE_STEP': 0.00001, 'INDEX':5}}

In [4]:
# Reading
# there will be change for filename for actual trade log file

filename = '/Users/levsvalov/code_workspace/Spring2020/DM/OrderLog20180507.txt'

# Order log
reader = csv.DictReader(open(filename))
for row in reader:
    order_log.append(row)

# Trade log
reader = csv.DictReader(open(filename))
for row in reader:
    trade_log.append(row)

In [5]:
import numpy as np

def reformat(log):
    """
    Change the column types (in place)
    """
    types_dict = {
        'NO'        : int,
        'SECCODE'   : str,
        'BUYSELL'   : str,
        'TIME'      : int,
        'ORDERNO'   : int,
        'ACTION'    : int,
        'PRICE'     : float,
        'VOLUME'    : int,
        'TRADENO'   : lambda x: float(x) if x != '' else np.nan,
        'TRADEPRICE': lambda x: float(x) if x != '' else np.nan
    }
    
    for row in log:
        for col in row:
            row[col] = types_dict[col](row[col])
            
    return log


# Reformat order log
reformat(order_log);

In [6]:
order_log[:3]

[OrderedDict([('NO', 1),
              ('SECCODE', 'USD000000TOD'),
              ('BUYSELL', 'B'),
              ('TIME', 100000161028),
              ('ORDERNO', 116),
              ('ACTION', 1),
              ('PRICE', 61.5875),
              ('VOLUME', 101000),
              ('TRADENO', nan),
              ('TRADEPRICE', nan)]),
 OrderedDict([('NO', 2),
              ('SECCODE', 'USD000000TOD'),
              ('BUYSELL', 'S'),
              ('TIME', 100000174997),
              ('ORDERNO', 130),
              ('ACTION', 1),
              ('PRICE', 64.3975),
              ('VOLUME', 202000),
              ('TRADENO', nan),
              ('TRADEPRICE', nan)]),
 OrderedDict([('NO', 3),
              ('SECCODE', 'USD000000TOD'),
              ('BUYSELL', 'B'),
              ('TIME', 100000179686),
              ('ORDERNO', 135),
              ('ACTION', 1),
              ('PRICE', 62.445),
              ('VOLUME', 203000),
              ('TRADENO', nan),
              ('TRADEPRICE', n

In [7]:
def filter(df, predicate):
    """
    Filter out rows that satisfy a predicate
    """
    return [ row for row in df if predicate(row) ]


filter(order_log, lambda x: x['SECCODE'] == 'USD000000TOD')[0]

OrderedDict([('NO', 1),
             ('SECCODE', 'USD000000TOD'),
             ('BUYSELL', 'B'),
             ('TIME', 100000161028),
             ('ORDERNO', 116),
             ('ACTION', 1),
             ('PRICE', 61.5875),
             ('VOLUME', 101000),
             ('TRADENO', nan),
             ('TRADEPRICE', nan)])

In [8]:
# Filtering orders
df_order = filter(order_log, lambda row:
                      row['SECCODE'] in CODES and
                      row['TIME'] < 2350 * 1E8)[:1000]

df_trade = filter(trade_log, lambda row:
                     row['SECCODE'] in CODES);

In [16]:
prep_dic = {Action.POST: 0, Action.MATCH: 1, Action.REVOKE: 2}
unprep_dic = { v: k for k, v in prep_dic.items() } # Inverse `prep_dic`


def apply(df, f):
    """
    Apply function to df
    """
    [ f(row) for row in df ]


def sort(df, cols):
    """
    Sort df by columns
    """
    pass


def harvard(row):
    for col in row:
        if col == 'ACTION':
            row[col] = prep_dic[row[col]]
            
    return row


def harvard_inverse(row):
    for col in row:
        if col == 'ACTION':
            row[col] = unprep_dic[row[col]]
            
    return row


apply(df_order, harvard)
sort(df_order, ['TIME', 'ACTION'])
apply(df_order, harvard_inverse);

OrderedDict([('NO', 1), ('SECCODE', 'USD000000TOD'), ('BUYSELL', 'B'), ('TIME', 100000161028), ('ORDERNO', 116), ('ACTION', 1), ('PRICE', 61.5875), ('VOLUME', 101000), ('TRADENO', nan), ('TRADEPRICE', nan)])
OrderedDict([('NO', 1), ('SECCODE', 'USD000000TOD'), ('BUYSELL', 'B'), ('TIME', 100000161028), ('ORDERNO', 116), ('ACTION', 0), ('PRICE', 61.5875), ('VOLUME', 101000), ('TRADENO', nan), ('TRADEPRICE', nan)])


In [26]:
'''
# Helper functions
def drop_by_entry(df_cur, entry, inplace=True):
    df_cur.drop(df_cur[df_cur['ORDERNO'] == entry['ORDERNO']].index, inplace=inplace)

def reduce_by_entry(df_cur, entry, FROM, TO):
    df_cur.loc[df_cur[df_cur['ORDERNO'] == entry['ORDERNO']].index, ['VOLUME']] = FROM['VOLUME'] - TO['VOLUME']
'''
# I really doubt if we need to return df_cur back, correct me if we should (first dima's implementation didnt have returns)
def drop_by_entry(df_cur, entry):
    for row in df_cur:
        if row['ORDERNO'] == entry['ORDERNO']: df_cur.remove(row)
    # return df_cur
def reduce_by_entry(df_cur, entry, FROM, TO):
    for row in df_cur:
        if row['ORDERNO'] == entry['ORDERNO']:
            row['VOLUME'] = FROM['VOLUME'] - TO['VOLUME']
    # return df_cur

In [None]:
total_mistakes = 0

def print_error(error):
    global total_mistakes
    print('-' * 40)
    print(f'in row: {row["NO"]}')
    print(error)
    print('-' * 40)
    print()
    total_mistakes += 1

'''
def check_exists(df_cur, row, NO='ORDERNO'):
    # check if the given ORDERNO exists
    # sub_df = df_cur[df_cur[NO] == row[NO]]
    df_cur_searchable = df_cur.set_index(NO)
    sub_df = df_cur[df_cur_searchable==row[NO]]
    if len(sub_df) > 0:
        return True, sub_df
    # printing the error
    else:
        print_error(f"ERROR: Record with {NO} {row[NO]} doesn't exist")
        return False, None
'''
def find_sub_df(df_cur, row, NO):
    sub_df = []
    for entry in df_cur:
        if entry[NO] == row[NO]: sub_df.append(entry)
    return sub_df
def check_exists(df_cur, row, NO='ORDERNO'):
    sub_df = find_sub_df(df_cur,row, NO)
    if len(sub_df) > 0:
        return True, sub_df
    # printing the error
    else:
        print_error(f"ERROR: Record with {NO} {row[NO]} doesn't exist")
        return False, None

In [None]:
def handle_revoke(df_cur, row):
    # check if the given ORDERNO exists
    check, sub_df = check_exists(df_cur, row)
    if check:
        series = sub_df[0]
        # check if revoking volume is not greater than the current one
        if int(series['VOLUME']) >= int(row['VOLUME']):
            if int(series['VOLUME']) == int(row['VOLUME']):
                # removing the record
                drop_by_entry(df_cur, row)
            else:
                # reducing the amount
                reduce_by_entry(df_cur, row, series, row)
        # printing the error
        else:
            # Delete negative number
            drop_by_entry(df_cur, row)
            print_error("ERROR: Cannot revoke more that there is")

In [None]:
def find(df_cur, value):
    res = []
    for row in df_cur:
        if row['ORDERNO'] == value: res.append(row)
    return res

In [None]:
def handle_match(df_cur, row):
    global df_trade
    # check if the given ORDERNO exists
    check, sub_df = check_exists(df_cur, row)
    if check:
        # check if the given tradeno exists in trade logs
        check, sub_trade = check_exists(df_trade, row, 'TRADENO')
        if check:
            series_trade = sub_trade[0]

            seller = find(df_cur, series_trade['SELLORDERNO'])
            buyer = find(df_cur, series_trade['BUYORDERNO'])

            # check if seller and buyer exist
            if len(seller) > 0 and len(buyer) > 0:
                seller = seller[0]
                buyer = buyer[0]

                # check if the volume is ok
                if int(seller['VOLUME']) >= int(row['VOLUME']) and int(buyer['VOLUME']) >= int(row['VOLUME']):
                    if int(seller['VOLUME']) == int(row['VOLUME']):
                        # removing the record
                        drop_by_entry(df_cur, seller)
                    else:
                        # reducing the amount
                        reduce_by_entry(df_cur, seller, seller, row)

                    if buyer['VOLUME'] == row['VOLUME']:
                        # removing the record
                        drop_by_entry(df_cur, buyer)
                    else:
                        # reducing the amount
                        reduce_by_entry(df_cur, buyer, buyer, row)

                # printing the error
                else:
                    # Delete negative numbers
                    if int(seller['VOLUME']) < int(row['VOLUME']):
                       drop_by_entry(df_cur, seller)
                    else:
                        reduce_by_entry(df_cur, seller, seller, row)

                    # Delete negative numbers
                    if int(buyer['VOLUME']) < int(row['VOLUME']):
                        drop_by_entry(df_cur, buyer)
                    else:
                        reduce_by_entry(df_cur, buyer, buyer, row)

                    print_error("ERROR: Not enough amount for buying and/or selling")

            # printing the error
            else:
                print_error(f"ERROR: There's no such buyer and/or seller")


