Import brightway2 and load the Ecoinvent database

In [17]:
import brightway2 as bw
bw.projects.set_current("AI of LCA")
bw.bw2setup()

if 'ecoinvent-3.10.1-cutoff' in bw.databases:
    print('ecoinvent 3.10.1 is already present in the project')
else:
    bw.import_ecoinvent_release(
        version='3.10.1',
        system_model='cutoff', # can be cutoff / apos / consequential / EN15804
        username='ICL',
        password='ICL_2019-20'
    )
    
db=bw.Database('ecoinvent-3.10.1-cutoff')

Biosphere database already present!!! No setup is needed
ecoinvent 3.10.1 is already present in the project


Import inventory data per functional unit

In [18]:
import pandas as pd
import subprocess 
# Optionally you can open the Excel file in Microsoft Excel using AppleScript, if ready you can skip this step and go directly to reading the excel file 
file_path = "/Users/ek2517/Documents/GitHub/ARIA/Data_inputs.xlsx"

applescript = f'''
tell application "Microsoft Excel"
    open POSIX file "{file_path}"
    activate
end tell
'''

subprocess.call(["osascript", "-e", applescript])

input("After editing and saving the Excel file, press Enter to continue...")

data_frame = pd.read_excel(file_path, usecols="A:B", nrows=12)  

# Remove rows with any empty cells
data_frame.dropna(inplace=True)

# Reset the index after dropping rows
data_frame.reset_index(drop=True, inplace=True)

# Display the DataFrame to verify contents
print("Imported data:")
print(data_frame)


After editing and saving the Excel file, press Enter to continue... 


Imported data:
         Input/output    In/out
0    Sodium hydroxide  1.722910
1              Copper  0.330025
2      Cobalt sulfate  0.953811
3  Manganese sulphate  0.929307
4      Nickel sulfate  0.952290
5            Graphite  0.902995


Setup the openAI API

In [19]:
from openai import OpenAI
import openai
#Enter your API key here 
api_key="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"
# Specify the path to your Excel file
client = openai.OpenAI(api_key=api_key)

Define supporting functions to help us find matches in the Ecoinvent database

In [26]:
def build_search_query(term):
    """
    Constructs a wildcard search query from a term.
    E.g., "waste graphite" becomes "*waste* *graphite*".
    """
    terms = term.split()
    return "*" + "* *".join(terms) + "*"

def get_alternative_search_terms(search_term):
    """
    Uses the ChatGPT API to suggest alternative search terms.
    
    Given an original search term (e.g., "waste graphite"), this function asks ChatGPT
    to provide a comma-separated list of alternative search terms that could capture similar datasets.
    """
    prompt = (
        f"Suggest a list of 3 alternative search terms that could be used to find similar datasets "
        f"for the activity '{search_term}' in the ecoinvent database. Each suggestion should modify "
        f"the material name to represent similar materials. Return the suggestions as a comma-separated list."
    )
    
    try:
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",  # or use another model if desired
            messages=[
                {"role": "system", "content": "You are a helpful assistant that provides alternative search queries."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.7,
            max_tokens=50
        )
        suggestions_text = response.choices[0].message.content.strip()
        # Split the response into a list of suggestions using commas as the separator.
        suggestions = [s.strip() for s in suggestions_text.split(',')]
        return suggestions
    except Exception as e:
        print("Error calling ChatGPT API:", e)
        return []



Try to find matches for each flow included in the imported data

In [27]:
# --- Main Processing Loop over the DataFrame ---
for index, row in data_frame.iterrows():
    # Get the full sentence (activity name) from the current row
    activity_name = row["Input/output"].strip().lower()  # Ensure it's lowercase and stripped of whitespace
    print(f"\nProcessing row {index + 1}: {activity_name}")
    
    # Define locations to filter
    locations = ["GLO", "RoW", "GB"]
    search_results = []

    # --- Step 1: Initial Search ---
    initial_query = build_search_query(activity_name)
    for location in locations:
        # Execute the search using the generated query string.
        search_results.extend(
            db.search(initial_query, limit=50, filter={"location": location})
        )

    # Check if any results were found
    if search_results:
        print(f"Found {len(search_results)} matching activities for '{activity_name}':")
        results_string = ""
        for result in search_results:
            results_string += f"- {result['name']}, {result['location']}\n"
        print(results_string)
    else:
        print(f"No matching activities found for '{activity_name}'.")
        
        # --- Step 2: Use ChatGPT to Suggest Alternative Search Terms ---
        alternative_terms = get_alternative_search_terms(activity_name)
        if alternative_terms:
            print("ChatGPT suggested the following alternative search terms:", alternative_terms)
            found_alternative = False
            # Try each alternative suggestion until matching datasets are found.
            for alt_term in alternative_terms:
                revised_query = build_search_query(alt_term)
                alternative_results = []
                for location in locations:
                    alternative_results.extend(
                        db.search(revised_query, limit=50, filter={"location": location})
                    )
                if alternative_results:
                    print(f"\nFound {len(alternative_results)} matching activities for alternative search term '{alt_term}':")
                    results_string = ""
                    for result in alternative_results:
                        results_string += f"- {result['name']}, {result['location']}\n"
                    print(results_string)
                    found_alternative = True
                    break  # Stop after the first successful alternative is found.
            if not found_alternative:
                print("No matching datasets found even after trying ChatGPT suggested alternative search terms.")
        else:
            print("No alternative search term suggestions were received from ChatGPT.")

        
    chat_completion = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": (
                    #
                    f"I am performing a life cycle assessment of a battery recycling process to recover valuable materials from electric vehicle batteries\n"
                    f"The product system utilises lithium carbonate to relithiate the recovered cathode material, resulting in regenerated cathode material and waste flows, resulting from non-cathode materials included in lithium-ion batteries\n"
                    f". The geographical scope of the study is in the UK, therefore datasets that include GB should be given prference, if you can't find matches use RoW or GLO, else choose a country closest to the UK. In this you should account for the different country codes included in the ecoinvent database .\n"
                )
            },
            
            {"role": "user", "content": (
                    f"The Ecoinvent database found the following results'{results_string}' related to'{activity_name}' \n"
                    f"Choose one dataset to be used for '{activity_name}' for life cycle assessment under the following rules:\n"
                    f"1. If they exists, you should give the highest preference to datasets that include the exact term '{activity_name}'.\n"
                    f"2.You must only print the exact name of the recommended dataset in the original format as shown in' {results_string}' without any extra text, special characters, explanation, or punctuation."
                    f"3.You should always give preference to datasets that include the exact terms as appearing in term '{activity_name}'.\n"
                    f"4. If '{activity_name}' includes the term production, never choose a dataset that includes the term waste"
                    f"4. If '{activity_name}' includes the term waste, never choose a dataset that includes the term production, you can give preference to the ones including the word treatment"
                )
            }
        ]
    )
    # Extract the response from ChatGPT
    response_content = chat_completion.choices[0].message.content.strip()
    print("ChatGPT Response:")
    print(response_content)

    # Save the ChatGPT response in the new column for the current row
    data_frame.at[index, "Ecoinvent process"] = response_content

print(data_frame.to_string(index=False))


Processing row 1: sodium hydroxide
Excluding 146 filtered results
Excluding 135 filtered results
Excluding 190 filtered results
Found 94 matching activities for 'sodium hydroxide':
- disodium disulphite production, GLO
- trisodium phosphate production, GLO
- sodium fluoride production, GLO
- sodium formate production, GLO
- sodium persulfate production, GLO
- sodium methoxide production, GLO
- sodium tetrafluoroborate production, GLO
- sodium aluminate production, powder, GLO
- sodium hydrogen sulfate production, GLO
- chlor-alkali electrolysis, diaphragm cell, GLO
- chlor-alkali electrolysis, mercury cell, GLO
- soda ash, dense, to generic market for neutralising agent, GLO
- chlor-alkali electrolysis, diaphragm cell, GLO
- chlor-alkali electrolysis, diaphragm cell, GLO
- chlor-alkali electrolysis, mercury cell, GLO
- chlor-alkali electrolysis, mercury cell, GLO
- aluminium collector foil production, for Li-ion battery, GLO
- diethyl ether production, GLO
- diethyl ether production, 