# Estimate impact from tokens
I have access to historical token and model data; can I extract estimated impact?

_Note: it looks like we'll have to ignore duration for now; will need to consider consequences of this._

In [1]:
from ecologits.tracers.utils import llm_impacts

In [2]:
REQUEST_LATENCY = 1.67 # Based on average usage data from cloud provider

def get_llm_impacts(model_name: str, output_token_count: int, electricity_mix_zone: str="SWE"):
    return llm_impacts(
        provider="openai",
        model_name=model_name,
        output_token_count=output_token_count,
        request_latency=REQUEST_LATENCY,
        electricity_mix_zone=electricity_mix_zone # for UK, will be "GBR", for US, will be "USA"
    )

In [3]:
# Inspect the llm impact estimate for 100 tokens:
get_llm_impacts("gpt-4o-mini", 100).__dict__



{'energy': Energy(type='energy', name='Energy', value=RangeValue(min=0.0003267193333333333, max=0.0005405593333333333), unit='kWh'),
 'gwp': GWP(type='GWP', name='Global Warming Potential', value=RangeValue(min=2.0667645761095076e-05, max=3.060402073709507e-05), unit='kgCO2eq'),
 'adpe': ADPe(type='ADPe', name='Abiotic Depletion Potential (elements)', value=RangeValue(min=3.971346932023324e-10, max=4.137513870103324e-10), unit='kgSbeq'),
 'pe': PE(type='PE', name='Primary Energy', value=RangeValue(min=0.003672075431230847, max=0.006029875271230847), unit='MJ'),
 'usage': Usage(type='usage', name='Usage', energy=Energy(type='energy', name='Energy', value=RangeValue(min=0.0003267193333333333, max=0.0005405593333333333), unit='kWh'), gwp=GWP(type='GWP', name='Global Warming Potential', value=RangeValue(min=1.5181471230399998e-05, max=2.5117846206399996e-05), unit='kgCO2eq'), adpe=ADPe(type='ADPe', name='Abiotic Depletion Potential (elements)', value=RangeValue(min=2.5388117859866668e-11, 

In [4]:
# Collect some results for different token counts

collected_llm_impacts = {}

for token_count in [100, 1000, 10000, 100000]:
    collected_llm_impacts[token_count] = get_llm_impacts("gpt-4o-mini", token_count)


In [5]:
# Overall impact for different token counts

for token_count in collected_llm_impacts.keys():
    print(f"{token_count} tokens: {collected_llm_impacts[token_count].gwp.value.min * 1000} - {collected_llm_impacts[token_count].gwp.value.max * 1000} gCO2eq")

100 tokens: 0.020667645761095078 - 0.03060402073709507 gCO2eq
1000 tokens: 0.12820130383469508 - 0.22756505359469506 gCO2eq
10000 tokens: 1.203537884570695 - 2.1971753821706943 gCO2eq
100000 tokens: 11.956903691930693 - 21.893278667930694 gCO2eq


In [6]:
# Usage impact for different token counts

for token_count in collected_llm_impacts.keys():
    print(f"{token_count} tokens: {collected_llm_impacts[token_count].usage.gwp.value.min * 1000} - {collected_llm_impacts[token_count].usage.gwp.value.max * 1000} gCO2eq")

100 tokens: 0.015181471230399998 - 0.025117846206399997 gCO2eq
1000 tokens: 0.12271512930400001 - 0.22207887906399998 gCO2eq
10000 tokens: 1.1980517100399999 - 2.1916892076399996 gCO2eq
100000 tokens: 11.951417517399998 - 21.887792493400003 gCO2eq


In [7]:
# Embodied impact for different token counts

for token_count in collected_llm_impacts.keys():
    print(f"{token_count} tokens: {collected_llm_impacts[token_count].embodied.gwp.value.min * 1000} - {collected_llm_impacts[token_count].embodied.gwp.value.max * 1000} gCO2eq")

100 tokens: 0.005486174530695078 - 0.005486174530695078 gCO2eq
1000 tokens: 0.005486174530695078 - 0.005486174530695078 gCO2eq
10000 tokens: 0.005486174530695078 - 0.005486174530695078 gCO2eq
100000 tokens: 0.005486174530695078 - 0.005486174530695078 gCO2eq


In [8]:
def get_llm_emission_summary(model_name: str, output_token_count: int, electricity_mix_zone: str="SWE"):
    llm_impacts = get_llm_impacts(model_name, output_token_count, electricity_mix_zone)
    return {
        "min": llm_impacts.gwp.value.min * 1000,
        "max": llm_impacts.gwp.value.max * 1000
    }