In [37]:
import pandas as pd
import os
from pathlib import Path
import numpy as np

In [38]:
def parse_log_file(log_file_path):
    """Parse the API cost log file and extract relevant information."""
    data = []
    
    with open(log_file_path, 'r') as f:
        current_entry = {}
        
        for line in f:
            line = line.strip()
            if not line:
                continue
                
            if line.startswith('['):
                if current_entry:
                    data.append(current_entry)
                current_entry = {'timestamp': line.strip('[]')}
            elif line.startswith('Game:'):
                current_entry['game'] = line.split(':')[1].strip()
            elif line.startswith('Model:'):
                current_entry['model'] = line.split(':')[1].strip()
            elif line.startswith('Total Input Tokens:'):
                current_entry['total_input_tokens'] = int(line.split(':')[1].strip())
            elif line.startswith('Input Text Tokens:'):
                current_entry['input_text_tokens'] = int(line.split(':')[1].strip())
            elif line.startswith('Input Image Tokens:'):
                current_entry['input_image_tokens'] = int(line.split(':')[1].strip())
            elif line.startswith('Output Tokens:'):
                current_entry['output_tokens'] = int(line.split(':')[1].strip())
            elif line.startswith('Total Input Cost:'):
                current_entry['input_cost'] = float(line.split('$')[1].strip())
            elif line.startswith('Total Output Cost:'):
                current_entry['output_cost'] = float(line.split('$')[1].strip())
            elif line.startswith('Total Cost:'):
                current_entry['total_cost'] = float(line.split('$')[1].strip())
                
        if current_entry:
            data.append(current_entry)
            
    return pd.DataFrame(data)


In [None]:
import pandas as pd
import os
from pathlib import Path

def parse_log_file(log_file_path):
    """Parse the API cost log file and extract relevant information."""
    data = []
    
    with open(log_file_path, 'r') as f:
        current_entry = {}
        
        for line in f:
            line = line.strip()
            if not line:
                continue
                
            if line.startswith('['):
                if current_entry:
                    data.append(current_entry)
                current_entry = {'timestamp': line.strip('[]')}
            elif line.startswith('Game:'):
                current_entry['game'] = line.split(':')[1].strip()
            elif line.startswith('Model:'):
                current_entry['model'] = line.split(':')[1].strip()
            elif line.startswith('Total Input Tokens:'):
                current_entry['total_input_tokens'] = int(line.split(':')[1].strip())
            elif line.startswith('Input Text Tokens:'):
                current_entry['input_text_tokens'] = int(line.split(':')[1].strip())
            elif line.startswith('Input Image Tokens:'):
                current_entry['input_image_tokens'] = int(line.split(':')[1].strip())
            elif line.startswith('Output Tokens:'):
                current_entry['output_tokens'] = int(line.split(':')[1].strip())
            elif line.startswith('Total Input Cost:'):
                current_entry['input_cost'] = float(line.split('$')[1].strip())
            elif line.startswith('Total Output Cost:'):
                current_entry['output_cost'] = float(line.split('$')[1].strip())
            elif line.startswith('Total Cost:'):
                current_entry['total_cost'] = float(line.split('$')[1].strip())
                
        if current_entry:
            data.append(current_entry)
            
    return pd.DataFrame(data)

# List of log file paths - replace these with your actual paths
log_files = [
    "cache/ace_attorney/cost_logs/o1-costs_l1.log",
    "cache/ace_attorney/cost_logs/gemini-2.5-pro-exp-03-25_api_costs.log",
    "cache/ace_attorney/cost_logs/3.7—think-cost.log"
]

# Process each log file
all_data = []
for log_file in log_files:
    try:
        df = parse_log_file(Path(log_file))
        df['log_file'] = log_file  # Add the log file path as a column
        all_data.append(df)
        print(f"Processed: {log_file}")
    except Exception as e:
        print(f"Error processing {log_file}: {str(e)}")

if not all_data:
    print("No valid data found in any log files")
else:
    # Combine all dataframes
    combined_df = pd.concat(all_data, ignore_index=True)
    
    # Convert timestamp to datetime
    combined_df['timestamp'] = pd.to_datetime(combined_df['timestamp'])
    
    # Sort by timestamp
    combined_df = combined_df.sort_values('timestamp')
    
    # Create a summary table of costs grouped by both log file and model
    summary_df = pd.DataFrame({
        'Log File': combined_df['log_file'],
        'Model': combined_df['model'],
        'Total Input Cost': combined_df['input_cost'],
        'Total Output Cost': combined_df['output_cost'],
        'Total Cost': combined_df['total_cost']
    })
    
    # Group by both log file and model
    summary_df = summary_df.groupby(['Log File', 'Model']).agg({
        'Total Input Cost': 'sum',
        'Total Output Cost': 'sum',
        'Total Cost': 'sum'
    }).reset_index()
    
    # Add count of API calls
    api_calls = combined_df.groupby(['log_file', 'model']).size().reset_index(name='Number of API Calls')
    summary_df = summary_df.merge(api_calls, left_on=['Log File', 'Model'], right_on=['log_file', 'model'])
    summary_df = summary_df.drop(['log_file', 'model'], axis=1)
    
    # Add a total row for each model
    model_totals = summary_df.groupby('Model').agg({
        'Total Input Cost': 'sum',
        'Total Output Cost': 'sum',
        'Total Cost': 'sum',
        'Number of API Calls': 'sum'
    }).reset_index()
    model_totals['Log File'] = 'TOTAL'
    
    # Combine the summary and totals
    summary_df = pd.concat([summary_df, model_totals], ignore_index=True)
    
    # Format the costs to 2 decimal places
    summary_df['Total Input Cost'] = summary_df['Total Input Cost'].map('${:.2f}'.format)
    summary_df['Total Output Cost'] = summary_df['Total Output Cost'].map('${:.2f}'.format)
    summary_df['Total Cost'] = summary_df['Total Cost'].map('${:.2f}'.format)
    
    # Reorder columns
    summary_df = summary_df[['Log File', 'Model', 'Total Input Cost', 'Total Output Cost', 'Total Cost', 'Number of API Calls']]
    
    print("\nCost Summary Table:")
    display(summary_df)
    
    # Save the summary to a CSV file
    summary_df.to_csv('api_costs_summary.csv', index=False)
    print("\nSummary saved to: api_costs_summary.csv")

Processed: cache/ace_attorney/cost_logs/o1-costs_l1.log
Processed: cache/ace_attorney/cost_logs/ace_attorney_api_costs.log
Processed: cache/ace_attorney/cost_logs/3.7—think-cost.log

Cost Summary Table:


Unnamed: 0,Log File,Model,Total Input Cost,Total Output Cost,Total Cost,Number of API Calls
0,cache/ace_attorney/cost_logs/3.7—think-cost.log,claude-3-7-sonnet-20250219,$4.62,$1.19,$5.80,343
1,cache/ace_attorney/cost_logs/ace_attorney_api_...,gemini-2.5-pro-exp-03-25,$0.42,$0.07,$0.50,245
2,cache/ace_attorney/cost_logs/o1-costs_l1.log,o1-2024-12-17,$8.63,$1.09,$9.73,186
3,TOTAL,claude-3-7-sonnet-20250219,$4.62,$1.19,$5.80,343
4,TOTAL,gemini-2.5-pro-exp-03-25,$0.42,$0.07,$0.50,245
5,TOTAL,o1-2024-12-17,$8.63,$1.09,$9.73,186



Summary saved to: api_costs_summary.csv


In [40]:
temp_table = parse_log_file(log_files[0])
np.sum(temp_table['total_cost'])

9.7263

In [41]:
temp_table = parse_log_file(log_files[1])
np.sum(temp_table['total_cost'])

0.496782

In [42]:
temp_table = parse_log_file(log_files[2])
np.sum(temp_table['total_cost'])

5.802789