# Chaining LLMs

<center><img src="tweet_langchain.jpg" width=400 display="block"></center>


# So what is langchain?

#### Langchain is a framework for developing applications powered by language models.


It aims to:

- Be data-aware: connect a language model to other sources of data

- Be agentic: allow a language model to interact with its environment

In [1]:
import os
from utils import *
from langchain.chat_models import ChatOpenAI
from langchain import (
    PromptTemplate,
    chains
)

OPENAI_API_KEY = "***REMOVED***"

# Prompt Templates

In [2]:
# First, define a prompt template
prompt_template = PromptTemplate(
    input_variables = ["smiles"],
    template = (
        "What functional groups are present in this molecule? {smiles}. "
        "This molecule contains the following functional groups:"
    )
)

In [3]:
smiles = "c1(Br)ccc(CC(=O)Cl)cc1"

cdk(smiles)
prompt_template.format(smiles=smiles)

'What functional groups are present in this molecule? c1(Br)ccc(CC(=O)Cl)cc1. This molecule contains the following functional groups:'

# Language Models

In [4]:
# Let's now define a language model.

llm = ChatOpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0.03,
    openai_api_key = OPENAI_API_KEY,
)

print(llm.generate(prompts=[
    "LIAC is a research group and we're doing a tutorial. Your task is to say hi to LIAC!"
]).generations[0][0].text)

ValidationError: 1 validation error for ChatOpenAI
__root__
  `openai` has no `ChatCompletion` attribute, this is likely due to an old version of the openai package. Try upgrading it with `pip install --upgrade openai`. (type=value_error)

# Putting it together: Chains

In [None]:
func_groups = chains.LLMChain(
    prompt = prompt_template,
    llm = llm
)

cdk(smiles)
print(func_groups.run(smiles))

In [None]:
smiles_2 = "c1ccc(-c2cncc(C3CCCCC3)c2)cc1"
cdk(smiles_2)
print(func_groups.run(smiles_2))

# Improving prompting: Formatting, in-context learning, etc.

In [None]:
prompt_template_2 = PromptTemplate(
    input_variables = ["smiles"],
    template = (
        "You are an expert chemist and your task is to identify the functional groups of the given molecules."\
        "You should give the name and the SMILES of each functional group. Begin!"\
        "Input: Brc1cncc(C2CCCCC2)c1"\
        "Output: 1. Halogen (Br)\n 2. Pyridine (c1cnccc1)\n 3. Cyclohexane (C2CCCCC2)"\
        "Input: c1ccccc1CC(=O)Cl"\
        "Output: 1. Phenyl (c1ccccc1)\n 2. Carbonyl (C=O)\n 3. Acyl halide (C(=O)Cl)\n 4. Halogen (Cl)"\
        "Input: {smiles}"\
        "Output:"
    )
)

func_groups = chains.LLMChain(
    prompt = prompt_template_2,
    llm = llm
)
def fcs(smiles):
    cdk(smiles)
    print(func_groups.run(smiles))

In [None]:
smiles_list = [
    smiles,
    smiles_2,
    "C/C=C/C(=O)I",
    "C/C=C/C(=O)S"
]

for s in smiles_list:
    fcs(s)

# Agents:

We would like to integrate other things:
- User input
- External knowledge sources
- External tools
- Memory storage
- ...

In [None]:
# Let's define a tool!
from langchain.utilities import WikipediaAPIWrapper

wikipedia = WikipediaAPIWrapper()
wikipedia.run("Ozzy Osbourne")

In [None]:
from langchain.agents import initialize_agent, Tool

# Define a toolset 
toolset = [
    Tool(
        name="wikipedia search",
        func=wikipedia.run,
        description="Useful to get accurate information from wikipedia."
    )
]

# Define an agent
jamesbond = initialize_agent(
    toolset,
    llm,
    agent="zero-shot-react-description",
    verbose=True
)

In [None]:
jamesbond.run(f"What are the functional groups of {smiles}")

# Probably not the most useful tool for this task...

In [None]:
from linuslingo.mol_utils.generals import list_functional_groups

toolset += [
    Tool(
        name="Get functional groups",
        func=list_functional_groups,
        description="Use this tool to find the functional groups of some molecule. Input the smiles string of a single molecule."
    )
]


# Define an agent
jamesbond = initialize_agent(
    toolset,
    llm,
    agent="zero-shot-react-description",
    verbose=True
)

In [None]:
jamesbond.run(f"What are the functional groups of {smiles}")

# What's happening behind the courtain?

In [None]:
prompt = jamesbond.agent.create_prompt(toolset)
james_prompt = jamesbond.agent.llm_chain.prompt

print(james_prompt.input_variables)

In [None]:
print(james_prompt.template)

# A more useful agent

In [None]:
from linuslingo.mol_utils.generals import (
    query2smiles,
)

toolset += [
    Tool(
        name="Get smiles of molecule",
        func=query2smiles,
        description="Useful when you need the smiles of a molecule. Input the name of the molecule"
    )
]


llm = chat_models.ChatOpenAI(
    model_name="gpt-4",
    temperature=0.03,
    openai_api_key = OPENAI_API_KEY,
)

# Define an agent
jamesbond = initialize_agent(
    toolset,
    llm,
    agent="zero-shot-react-description",
    verbose=True
)

final_answer = jamesbond.run(
    "Find the functional groups of cafeine, p-bromobenzaldehyde, and cyclosarin, "
    "then find what they have in common, and find some information on about it in wikipedia."
)

In [None]:
print(final_answer)