In [None]:
import pandas as pd
import os
import matplotlib.pyplot as plt
from pandas.io.parsers.readers import read_csv
import numpy as np
import plotly.graph_objects as go
from datetime import datetime
from datetime import timedelta
from datetime import time
import math
import operator
import zipfile
import pprint
import sys
from pdb import set_trace
def split_csv(file_name,m):
  df = read_csv(file_name)
  l = len(df)
  n = math.floor(l / m)
  r = l%m
  if r > 0:
    n = n + 1

  f_name = file_name[0:len(file_name) - 4]
  for i in range(n):
    start = i * m
    end = (i + 1) * m
    df_small = df.iloc[start:end]
    out_name = f_name + '_' + str(i).zfill(3) + '.csv'
    df_small.to_csv(out_name)



def load_prep_data(file_name):
    df = read_csv(file_name)

    mapping = {'DOW': 'US30',
              'NSDQ':'NDX100',
              'NAS100':'NDX100',
              'SP':'SPX500',
              'US500':'SPX500',
              'DAX':'GER30',
              'ASX':'ASX200',
              'AUS200':'ASX200',
              'NIKKEI':'JPN225',
              'FTSE':'UK100',
              'STOXX50':'EUSTX50',
              'CAC':'FRA40',
              'BRENT':'UKOUSD',
              'UKOil':'UKOUSD',
              'CL':'USOUSD',
              'USOil':'USOUSD'}

    for i in range(len(df)):

      symbol = df['stdSymbol'][i]
      n = symbol.find('_')
      if n > -1:
        symbol = symbol[0:n]

      n = symbol.find('.')
      if n > -1:
        symbol = symbol[0:n]

      if symbol in mapping:
        symbol = mapping[symbol]
      df['stdSymbol'][i] = symbol


    return df


def anyname(a):
  return a['Open']

def construct_tree(df):
  tree = {}
  for i in range(len(df)):
    r = df.iloc[i] #locate rows
    symbol = r['stdSymbol'] #define symbol; extracting it from whichever row we are working with
    side = r['side']
    open = r['openTime']
    trade = r['ticket']
    user = r['login']
    lot = r['lots']
    profit = r['profit']
    close = r['closeTime']

    if not symbol in tree: #if not symbol in tree = which will turn to be true, move to next line in dictionary
      tree[symbol] = {}
    if not side in tree[symbol]: #if not side in tree[symbol] which will be True, then we move on
      tree[symbol][side] = []

    open = open[:19]
    close = close[:19]

    t1 = datetime.strptime(open, '%Y-%m-%d %H:%M:%S')
    t2 = datetime.strptime(close, '%Y-%m-%d %H:%M:%S')
    c = t2 - t1
    duration = c.total_seconds() / 60

    a = {
         'Open':t1.timestamp(),
         'Login':user,
         'Lot':lot,
         'Duration':duration,
         'Profit':profit,
         'Side':side,
         'Symbol':symbol}

    tree[symbol][side].append(a)

  for symbol in tree:
    o = tree[symbol]
    for side in o:
      data = o[side]
      o[side] = sorted(data, key = anyname)


  return tree

def get_lot(trade):
  return trade['Lot']



def search_matching(trade_row, tree): #function to go through just one row. Then we will call the func for the binary search to go through each row
  symbol = trade_row['stdSymbol']
  side = trade_row['side']
  open = trade_row['openTime']
  trade = trade_row['ticket']
  user = trade_row['login']
  lot = trade_row['lots']
  profit = trade_row['profit']
  close = trade_row['closeTime']
  side0 = side

  if side == 'Buy':
    side = 'Sell'
  else:
    side = 'Buy'

  open = open[:19]
  close = close[:19]

  t1 = datetime.strptime(open, '%Y-%m-%d %H:%M:%S')
  t2 = datetime.strptime(close, '%Y-%m-%d %H:%M:%S')
  c = t2 - t1
  duration = c.total_seconds() / 60

  openT = t1.timestamp()

  trade1 = {
         'Open':t1.timestamp(),
         'Login':user,
         'Lot':lot,
         'Duration':duration,
         'Symbol': symbol,
         'Side': side0,
         'Profit':profit,
         'Symbol':symbol}

  pairs = []
  counter = 0
  if symbol in tree:
    branch1 = tree[symbol]
    if side in branch1:
      trades = branch1[side]
      i0,i1 = binary_search(trades, openT, 15 * 60)
      if i0 is not None and i1 is not None:
        #pairs.append({'t1':trade1})
        small_trades = []
        sum_lots = 0
        trades2 = trades[i0:i1 + 1]
        trades2 = sorted(trades2, key = get_lot)
        start = 0
        end = 0
        # below the real implementation of chack_cum() is listed that is based on whole sum of lots or sum of compounding fractures matching the criteria within the indices i0:i1
        for i in range(len(trades2)):
          lot2 = trades2[i]['Lot']
          duration2 = trades2[i]['Duration']
          sum_lots += lot2
          end = i

          diff = abs( sum_lots / lot - 1 )

          if diff <= 0.05:
            #small_trades.append(trades2[i])
            pairs.append({'t1':trade1,'t2':trades2[start:end + 1]})
            break
          # pairs is appended the trades2[i] trades within range i0:i1

          # if above is not a case fractions of lots that sum up to 'lot * (1 + 0.05)' are sought
          if  sum_lots > lot * (1 + 0.05):
            while sum_lots > lot * (1 + 0.05) and start < end:
              sum_lots -= trades2[start]['Lot']
              start += 1

            diff1 = abs( sum_lots / lot - 1 )
            if diff1 <= 0.05:
              small_trades.append(trades2[i])
              pairs.append({'t1':trade1,'t2':trades2[start:end + 1]})
              break

# m.k. looks to me that only a sequantial combinaiton is possible; this should be corrected to fractions in not necessarily sequantial order, yet within timeframe

  return pairs



        # pp = pprint.PrettyPrinter(indent=4)
        # pp.pprint(trades2)
        # l = len(trades2)
        # for i in range(0,l):

        #   lot2 = trades2[i]['Lot']
        #   duration2 = trades2[i]['Duration']
        #   if (abs(duration2 / duration - 1) < 0.1):
        #     small_trades.append(trades2[i])
        #     sum_lots = sum_lots + lot2

        #   if  ((abs(sum_lots / lot - 1) < 0.05)):
        #     pairs.append({'t1':trade1,'t2':small_trades})
        #     break;

        #   else:
        #     if sum_lots > lot:
        #       sum2 = sum_lots
        #       k = i - 1
        #       while sum2 >= lot and k >= 0:
        #         sum2 = sum2 - trades2[k]['Lot']
        #         k = k - 1
        #         #small_trades.remove(trades2[k])
        #       sum2 = sum2
        #       if  ((abs(sum2 / lot - 1) < 0.05)):
        #         print(i,k,sum2)
                #pairs.append({'t1':trade1,'t2':small_trades})

        #counter = 0

        #print(trades[i0],trades[i1])
        #print(trades)

  #return pairs







#here we iterate through each row of the second df_tft and will then call binary seacrch on it.
def collecting_pairs(df_tft, tree):
  captured_pairs = pd.DataFrame()

  for i in range(len(df_tft)):
    trade_row = df_tft.iloc[i]
    pairs = search_matching(trade_row, tree)
    if len(pairs) > 0:
      captured_pairs = captured_pairs.append(pd.json_normalize(pairs), ignore_index=True, sort=True)


  return captured_pairs

def user_pairs(captured_pairs):
  table = pd.DataFrame()
  table = captured_pairs.groupby(["t1.Login","t2.Login"]).agg({"t1.Profit": "sum","t2.Profit": "sum","t1.Lot":"sum","t2.Lot":"sum"})
  table = table.reset_index()

  t1profit = table['t1.Profit']
  t2profit = table['t2.Profit']
  totalP = t1profit + t2profit
  t1lot = table['t1.Lot']
  t2lot = table['t2.Lot']
  total_lot = t1lot + t2lot
  table = table.assign(Aggregate_Profit = totalP,Aggregate_Lot = total_lot)

  return table









def binary_search(data, value, delta): #create a binary search function with arugments data, value (which would be the value that we are searching for),and delta so we would be in the vicinity of our value

 # data is our subtree of other party's trades (based on instrument (e.g. 'EURUSD') and matched side (taken as opposite of ours) )

  start_index = 0 #initially our start index is 0
  end_index = len(data) - 1 # we end with the second last point in our data
  found_index = None #havents found anything yet

  # 'value' is OpenT from our df's single row
  while start_index <= end_index:
    mid_index = math.floor((start_index + end_index) / 2) #while the conditon above is present, we calc our mid index and apply math.floor which rounds a number DOWN to the nearest integer
    test_value = data[mid_index]['Open'] # now we will be running our search through the mid_index of our data


    if value  == test_value: #if we found the value straight away, this means the mid index was where our value was located
      found_index = mid_index
      break

    elif value < test_value:#if the value we're looking for is less than the test value then we move backwards 1step and equate that position as our end_index
      end_index = mid_index - 1


    else:
      start_index = mid_index + 1 #if value > test_value then we will move the start_index one after the midpoint(mid_index)

# looks like this part above resembles the part in testing check_sum() below : if the whole sum matches the value - then we return matching trades;
#  o/w we return the compond of fractions , if those match the condition
# if that line of thinking is correct then in below nested if()'s we just correct for the timeframe (b/c index was not found) and try to find fractions of trades thereafter

# as a result of section above we either have a) a nonempty 'found_index'  or  b) we'd initialize it below to continue searches within the vicinity of either of values of 'found_index'
  if found_index == None:
    found_index = start_index
    if found_index > (len(data) - 1):
      found_index = len(data) - 1

 # again, data is our subtree of other party's trades

  i = found_index
  # below we'll try to 'derive' i0, i1 based on i ( which is now set to found_index), based on value = openT, delta = 15*60 sec;


  left_item_found = False

  while i >= 0 and data[i]['Open'] >= value - delta:
    i = i - 1
    left_item_found = True

# looks above i is decremented to find the furthest left index in tree matching the criteria (if that exists)


  index0 = i + 1

  i = found_index
  right_item_found = False

  while i < len(data) and data[i]['Open'] <= value + delta:
    i = i + 1
    right_item_found = True

  index1 = i - 1
# if left_item_found or right_item_found are nonempty then values of i are shifted (changed) , along with their index0, index1 counterparts
  if (not left_item_found and not right_item_found) or (index0 > index1):
    return None, None

  else:
    return index0, index1

# New Section

In [None]:
# testing search_matching() function's main 'engine'; seperate from acctual code
data = [1.0, 1.0, 1.0, 3.0, 3.2, 7.0, 8.0]
value = 10
delta = 0.05

# check if data has a subset that sums up to value += delta
def check_sum(data, value, delta):
  sum = 0
  start = 0
  end = 0

  l = len(data)
  for i in range(l):
    sum += data[i]
    end = i

    # check if whole sum is within +- delta from value
    diff = abs( sum / value - 1 )
    if diff <= delta:
      return { 'sum': sum, 'diff_prcnt': 100*diff, 'start': start, 'end': end }

    # conversely, check if the decreasing sum (subtract first element each time) is possibly within than value
    if sum > value * (1 + delta):
      while sum > value * (1 + delta) and start < end:
        sum -= data[start]
        start += 1

      # check again after; maybe if the second if() worked and sum was decreased  to matching  'value' within delta
      diff = abs( sum / value - 1 )
      if diff <= delta:
        return { 'sum': sum, 'diff_prcnt': 100*diff, 'start': start, 'end': end }

  return None
"""gettrace = getattr(sys, 'gettrace', None)

if gettrace is None:
    print('No sys.gettrace')
elif gettrace():
    print('Hmm, Big Debugger is watching me')
else:
    print("Let's do something interesting")

    set_trace()"""
result = check_sum(data, value, delta)
# if found, collect items from data [start to end]
if result != None:
  items = data[ result['start'] : result['end']+1 ]
  print( result )
  print( items )
# call function
  result = check_sum(data, value, delta)
#print(result)


{'sum': 10.2, 'diff_prcnt': 2.0000000000000018, 'start': 4, 'end': 5}
[3.2, 7.0]


In [None]:
# split_csv('ttt2trades.csv', 5000)
# split_csv('tfthubtrades.csv', 5000)

df_ttt2 = load_prep_data('ttt2trades_001.csv')

df_tft = load_prep_data('tfthubtrades_001.csv')
#print(df_tft)

tree = construct_tree(df_ttt2)

pairs = search_matching(df_tft.iloc[0], tree)
print(pairs)
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(pairs)
#print(tree)
#data0 = tree['EURUSD']['Sell']
#print(data0)
#a = [1,2,5,64,3,23,4534,23423,12,321,41,412,412]
#i0, i1 = binary_search(a, 23, 30)
# gettrace = getattr(sys, 'gettrace', None)

# if gettrace is None:
#     print('No sys.gettrace')
# elif gettrace():
#     print('Hmm, Big Debugger is watching me')
# else:
#     print("Let's do something interesting")

#     set_trace()
#     pairs = search_matching(df_tft.iloc[0], tree)
#     print(pairs)
#     pp = pprint.PrettyPrinter(indent=4)
#     pp.pprint(pairs)

# print(len(pairs))

captured_pairs = collecting_pairs(df_tft, tree)
#print(captured_pairs)


#table = user_pairs(captured_pairs)
# print(table)
# table = table.sort_values(by=['Aggregate_Profit', 'Aggregate_Lot'])
# print(table)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['stdSymbol'][i] = symbol
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['stdSymbol'][i] = symbol


[{'t1': {'Open': 1634113424.0, 'Login': 1864504, 'Lot': 0.8, 'Duration': 428.5, 'Symbol': 'AUDUSD', 'Side': 'Buy', 'Profit': 56.8}, 't2': [{'Open': 1634113424.0, 'Login': 1864504, 'Lot': 0.8, 'Duration': 428.5, 'Profit': 56.8, 'Side': 'Sell', 'Symbol': 'AUDUSD'}]}]
[   {   't1': {   'Duration': 428.5,
                  'Login': 1864504,
                  'Lot': 0.8,
                  'Open': 1634113424.0,
                  'Profit': 56.8,
                  'Side': 'Buy',
                  'Symbol': 'AUDUSD'},
        't2': [   {   'Duration': 428.5,
                      'Login': 1864504,
                      'Lot': 0.8,
                      'Open': 1634113424.0,
                      'Profit': 56.8,
                      'Side': 'Sell',
                      'Symbol': 'AUDUSD'}]}]


  captured_pairs = captured_pairs.append(pd.json_normalize(pairs), ignore_index=True, sort=True)
