# Model Metrics Report

## Pull Logs

### Setup S3 Client

In [None]:
# Automate log pull
import boto3
import os
import re
import json
import random
import numpy as np
import tarfile
import pandas as pd

### Download Files

In [None]:
def download_s3_folder(bucket_name: str, prefix: str, local_dir: str):
    """
    Download all objects under `prefix` in `bucket_name` to `local_dir`,
    preserving the S3 “folder” structure.
    """
    
    paginator = s3.get_paginator('list_objects_v2')

    # Ensure local_dir exists
    os.makedirs(local_dir, exist_ok=True)

    for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix):
        for obj in page.get('Contents', []):
            key = obj['Key']
            # Strip the prefix from the key, so we recreate only the sub-dirs
            relative_path = os.path.relpath(key, prefix)
            # Build the full local path
            local_path = os.path.join(local_dir, relative_path)

            # If the key ends with '/', it’s a “folder” placeholder – skip it
            if key.endswith('/'):
                continue

            # Make sure local subdirectory exists
            os.makedirs(os.path.dirname(local_path), exist_ok=True)

            # Download the file
            s3.download_file(bucket_name, key, local_path)
            print(f"Downloaded s3://{bucket_name}/{key} → {local_path}")
    
    print(f"All files for {bucket_name} downloaded")

In [None]:
# AWS Keys
s3 = boto3.client('s3', aws_access_key_id='ACCESS_KEY', aws_secret_access_key='SECRET_ACCESS_KEY')

# Names
BUCKET = "BUCKET_NAME"
LLM_NAME = "LLM_NAME"
PREFIX = f"{LLM_NAME}/"    # trailing slash ensures we only get that “folder”
LOCAL_DIR = f"./benchmarks/{LLM_NAME}"

In [None]:
# Download subdirectory
download_s3_folder(BUCKET, PREFIX, LOCAL_DIR)

### Unpack Files

In [None]:
# Make directries for archive files
prometheus_logs_dir = f"{LOCAL_DIR}/extracted/prometheus_logs"
proxy_logs_dir = f"{LOCAL_DIR}/extracted/proxy_logs"

os.makedirs(prometheus_logs_dir, exist_ok=True)
os.makedirs(proxy_logs_dir, exist_ok=True)

# Unarchive into directories
with tarfile.open(f"{LOCAL_DIR}/tests/prometheus_logs.tar", "r:tar") as tar:
    tar.extractall(prometheus_logs_dir)
with tarfile.open(f"{LOCAL_DIR}/tests/proxy_logs.tar.gz", "r:gz") as tar:
    tar.extractall(proxy_logs_dir)

In [None]:
# Convert tsdb --> parquet
chunks_dir_list = os.listdir(prometheus_logs_dir)
chunks_dir_list = [dir for dir in chunks_dir_list if len(dir)>=26]

# Create Output Dir
output_dir = f"{LOCAL_DIR}/parquet"
os.makedirs(output_dir, exist_ok=True)

# Run prometheus tsdb dump cli tool
index = 0
for dir in chunks_dir_list:
    path = f"{prometheus_logs_dir}/{dir}/"
    output_path = f"{output_dir}/{index}_"
    !prometheus-tsdb-dump --block $path --output $output_path
    index += 1

## Compute Resource

In [None]:
# List of parquet files
parquet_path_list = os.listdir(output_dir)
parquet_path_list = [f"{output_dir}/{file}" for file in parquet_path_list]
parquet_path_list.sort()

In [None]:
# Read parquet files into df
columns_list = ["Labels", "MinTimestamp", "MaxTimestamp", "MinValue", "MaxValue"]
full_df = pd.concat( pd.read_parquet(parquet_file, columns=columns_list) for parquet_file in parquet_path_list )
full_df["Labels"] = full_df["Labels"].astype(str)
full_df.reset_index(drop=True, inplace=True)

In [None]:
# Peak Memory Footprint
# Should be able to find peak memory by looking for: go_memstats_alloc_bytes
memory_df = full_df.loc[full_df.Labels.str.contains(r".*go_memstats_alloc_bytes.*")]
PEAK_RAM_GB = memory_df["MaxValue"].max()/1000000000

In [None]:
 # Hardware used for benchmarking
try:
    import json
    with open(f"{LOCAL_DIR}/final/ticket.json", 'r', encoding='utf-8') as f:
        ticket_json = json.load(f)

    # Pull information about instance
    instance_info = ticket_json['instance']
    INSTANCE_TYPE = instance_info['Instance Type']
    RAM = instance_info['RAM']
    NUM_GPU = instance_info['GPU Count']
    GPU_TYPE = instance_info['GPU Type']
    VRAM_TOTAL = instance_info['GPU RAM SUM']
    COST_PER_HR_USD = instance_info['Price']
except:
    instance_info = INSTANCE_TYPE = RAM = NUM_GPU = GPU_TYPE = VRAM_TOTAL = COST_PER_HR_USD = "Not Available"

## Time

In [None]:
# Proxy Logs Request Response List
proxy_logs_dir = f"{LOCAL_DIR}/extracted/proxy_logs"
list_of_proxy_logs = os.listdir(proxy_logs_dir)
pattern = re.compile(r".*\:\d+")
list_of_names = [pattern.search(log).group() for log in list_of_proxy_logs]
list_of_names = list(set(list_of_names))

In [None]:
# Calculate mean tokens/sec
tokens_per_second_array = np.array([])
processing_times = np.array([])

for log in sample:
    # Read request
    with open(f"{proxy_logs_dir}/{log}.request", "r") as file:
        lines = file.readlines()
        pattern = re.compile("content-length.*")
        match = list(filter(pattern.match, lines))
        request_tokens = int(match[0].strip().split(" ")[1])/4

    # Read response
    with open(f"{proxy_logs_dir}/{log}.response", "r") as file:
        lines = file.readlines()
        pattern = re.compile("content-length.*")
        match = list(filter(pattern.match, lines))
        response_tokens = int(match[0].strip().split(" ")[1])/4

        pattern = re.compile("X-LAS-Proxy-ProcessingTimeMS.*")
        match = list(filter(pattern.match, lines))
        processing_time_seconds = int(match[0].strip().split(" ")[1])/1000
        
    processing_times = np.append(processing_times, [processing_time_seconds])
    tokens_per_sec = (request_tokens + response_tokens)/processing_time_seconds
    tokens_per_second_array = np.append(tokens_per_second_array, [tokens_per_sec])

# Calculate mean tokens/sec
mean_tokens_per_second = tokens_per_second_array.mean()

# Calculate mean processing time sec/query
mean_processing_time = processing_times.mean()

## Cloud Compute Cost

In [None]:
# Cost per million tokens
hours_to_1M_tokens = 1000000/(mean_tokens_per_second*3600)
cost_per_million_tokens = hours_to_1M_tokens*COST_PER_HR_USD
print(cost_per_million_tokens)