In [6]:
import pandas as pd
import os
import glob
from datetime import datetime
from collections import deque
import pytz

def process_csv(file_path):
    # Read the CSV data
    df = pd.read_csv(file_path)

    # Convert 'Date/Time' to datetime
    df['Date/Time'] = pd.to_datetime(df['Date/Time'], format='%Y%m%d;%H%M%S')

    # Localize to Eastern Time (America/New_York)
    eastern_tz = pytz.timezone('America/New_York')
    df['Date/Time'] = df['Date/Time'].apply(lambda x: x.tz_localize(eastern_tz))

    # Sort by 'Date/Time' for FIFO
    df = df.sort_values('Date/Time').reset_index(drop=True)

    # Calculate per-unit fees
    def calculate_trade_fee(row):
        return (
            abs(row['BrokerExecutionCommission']) +
            abs(row['ThirdPartyExecutionCommission']) +
            abs(row['ThirdPartyRegulatoryCommission'])
        )

    df['TotalFee'] = df.apply(calculate_trade_fee, axis=1)
    df['FeePerUnit'] = df['TotalFee'] / df['Quantity'].abs()

    # Initialize round trips list
    round_trips = []

    # Group trades by symbol
    for symbol in df['Symbol'].unique():
        symbol_df = df[df['Symbol'] == symbol].copy()

        # Initialize queues for this symbol
        buy_trades = deque()
        sell_trades = deque()

        # Split trades into single-unit trades
        for _, row in symbol_df.iterrows():
            qty = int(row['Quantity'])
            fee_per_unit = row['TotalFee'] / abs(qty)
            trade = {
                'Time': row['Date/Time'],
                'Price': row['Price'],
                'FeePerUnit': fee_per_unit,
                'TradeDate': row['TradeDate'],
                'Symbol': row['Symbol'],
                'OriginalQty': qty
            }
            if qty > 0:
                for _ in range(qty):
                    buy_trades.append(trade.copy())
            else:
                for _ in range(-qty):
                    sell_trades.append(trade.copy())

        # Generate round trips for this symbol
        while buy_trades and sell_trades:
            buy = buy_trades.popleft()
            sell = sell_trades.popleft()
            
            # Determine trade type
            if buy['Time'] <= sell['Time']:
                trade_type = 'Long'
                entered_at = buy['Time']
                exited_at = sell['Time']
                entry_price = buy['Price']
                exit_price = sell['Price']
                fees = buy['FeePerUnit'] + sell['FeePerUnit']
                pnl = (exit_price - entry_price) * 5 - fees
            else:
                trade_type = 'Short'
                entered_at = sell['Time']
                exited_at = buy['Time']
                entry_price = sell['Price']
                exit_price = buy['Price']
                fees = buy['FeePerUnit'] + sell['FeePerUnit']
                pnl = (entry_price - exit_price) * 5 - fees

            # Calculate duration
            trade_duration_seconds = (exited_at - entered_at).total_seconds()
            trade_duration_days = trade_duration_seconds // (24 * 3600)
            trade_duration_seconds %= (24 * 3600)
            trade_duration_hours = trade_duration_seconds // 3600
            trade_duration_seconds %= 3600
            trade_duration_minutes = trade_duration_seconds // 60
            trade_duration_seconds %= 60
            trade_duration_seconds = int(trade_duration_seconds)
            trade_duration_str = f"{int(trade_duration_days)} days {int(trade_duration_hours):02}:{int(trade_duration_minutes):02}:{trade_duration_seconds:02}"

            # Convert to +03:00 timezone
            target_tz = pytz.timezone('US/Central')
            entered_at_tz = entered_at.astimezone(target_tz)
            exited_at_tz = exited_at.astimezone(target_tz)
            entered_at_str = entered_at_tz.strftime('%m/%d/%Y %H:%M:%S %z').replace('+0300', '+03:00')
            exited_at_str = exited_at_tz.strftime('%m/%d/%Y %H:%M:%S %z').replace('+0300', '+03:00')

            # Create round trip
            round_trip = {
                'ContractName': buy['Symbol'],
                'EnteredAt': entered_at_str,
                'ExitedAt': exited_at_str,
                'EntryPrice': entry_price,
                'ExitPrice': exit_price,
                'Fees': round(fees, 2),
                'PnL': round(pnl, 2),
                'Size': 1,
                'Type': trade_type,
                'TradeDay': entered_at_str,
                'TradeDuration': trade_duration_str
            }
            round_trips.append(round_trip)

        # Print unmatched buy trades
        if buy_trades:
            print(f"\nUnmatched BUY trades for symbol {symbol}:")
            for t in buy_trades:
                print(f"  Time: {t['Time']}, Price: {t['Price']}, Qty: 1, Symbol: {t['Symbol']}")

        # Print unmatched sell trades
        if sell_trades:
            print(f"\nUnmatched SELL trades for symbol {symbol}:")
            for t in sell_trades:
                print(f"  Time: {t['Time']}, Price: {t['Price']}, Qty: 1, Symbol: {t['Symbol']}")

    # Convert to DataFrame
    round_trips_df = pd.DataFrame(round_trips)

    # Combine round trips with identical EnteredAt and ExitedAt
    grouped = round_trips_df.groupby([
        'EnteredAt', 'ExitedAt', 'ContractName', 'Type', 'EntryPrice', 'ExitPrice'
    ]).agg({
        'Size': 'sum',
        'PnL': 'sum',
        'Fees': 'sum',
        'TradeDay': 'first',
        'TradeDuration': 'first'
    }).reset_index()

    # Assign Ids based on trade day
    grouped['TradeDate'] = pd.to_datetime(grouped['TradeDay'], format='%m/%d/%Y %H:%M:%S %z').dt.date
    grouped = grouped.sort_values(['TradeDate', 'EnteredAt']).reset_index(drop=True)
    
    # Reset Id for each unique trade date
    grouped['Id'] = 1
    for date in grouped['TradeDate'].unique():
        mask = grouped['TradeDate'] == date
        grouped.loc[mask, 'Id'] = range(1, mask.sum() + 1)

    # Drop temporary TradeDate column
    grouped = grouped.drop(columns=['TradeDate'])

    # Reorder columns
    output_columns = [
        'Id', 'ContractName', 'EnteredAt', 'ExitedAt', 'EntryPrice', 'ExitPrice',
        'Fees', 'PnL', 'Size', 'Type', 'TradeDay', 'TradeDuration'
    ]
    round_trips_df = grouped[output_columns]

    return round_trips_df

# Define the performance directory
root_dir = os.path.join('..', '..', '..', 'data', 'temp_performance')

# Find all CSV files starting with 'Raw_' in the root directory
csv_files = glob.glob(os.path.join(root_dir, 'Raw_*.csv'))

# Process each CSV file
for file_path in csv_files:
    print(f"Processing {file_path}...")
    
    # Generate round trips
    round_trips_df = process_csv(file_path)
    
    # Generate output filename in the same directory, removing 'Raw_' prefix
    output_filename = os.path.join(
        root_dir,
        os.path.basename(file_path).replace('Raw_', '')
    )
    
    # Save the output CSV
    round_trips_df.to_csv(output_filename, index=False)
    print(f"Saved output to {output_filename}\n")

Processing ../../../data/temp_performance/Raw_Performance_2025-05-19-23.csv...
Saved output to ../../../data/temp_performance/Performance_2025-05-19-23.csv

