<a href="https://colab.research.google.com/github/stravo1/Colab-Notebooks/blob/main/Langchain_OpenRouter_Integration.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install python-dotenv openai langchain_core

In [None]:
from dotenv import load_dotenv
load_dotenv()  # take environment variables from .env.

In [None]:
from typing import Any, List, Mapping, Optional, Union, Sequence, Tuple, Dict, Iterator

from openai import OpenAI
from os import getenv

from langchain_core.callbacks.manager import CallbackManagerForLLMRun
from langchain_core.language_models.llms import LLM
from langchain_core.prompt_values import PromptValue
from langchain_core.messages.base import BaseMessage
from langchain_core.runnables.config import RunnableConfig

In [None]:
# create an OpenAI API client
client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=getenv("API_KEY"),
)

# more models at https://openrouter.ai/models
model = "google/gemma-7b-it:free"

system_prompt = {
    "role": "system",
    "content": "You are an AI assistant name Gemma.",
}

# response is delivered at once instead of stremaing
def get_completed_response(prompt):
    completion = client.chat.completions.create(
        model = model,
        messages = [
            system_prompt,
            {
              "role": "user",
              "content": prompt,
            }
          ],
    )
    return completion.choices[0].message.content

# response is streamed
def get_stream(prompt):
    stream = client.chat.completions.create(
        model = model,
        stream = True,
        temperature = 0.1,
        messages = [
            system_prompt,
            {
                "role": "user",
                "content": prompt,
            }
        ]
    )
    return stream;

# inherit the LLM class from langchain
class OpenRouterLLM(LLM):

    @property
    def _llm_type(self) -> str:
        return "open-router"

    # implementing the _call function is mandatory
    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        return "gg"

    def invoke(
        self,
        input: str,
        config: Optional[RunnableConfig] = None, *,
        stop: Optional[List[str]] = None,
        **kwargs: Any
    ) -> str:
        return get_completed_response(input)

    def stream(
        self,
        input: str,
        config: Optional[RunnableConfig] = None, *,
        stop: Optional[List[str]] = None,
        **kwargs: Any
        ) -> Iterator[str]:
        return get_stream(input)


class OpenRouterLLM(LLM):

    @property
    def _llm_type(self) -> str:
        return "open-router"

    # implementing the _call function is mandatory
    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        return "gg"

    def invoke(
        self,
        input: str,
        config: Optional[RunnableConfig] = None, *,
        stop: Optional[List[str]] = None,
        **kwargs: Any
    ) -> str:
        return get_completed_response(input)

    def stream(
        self,
        input: str,
        config: Optional[RunnableConfig] = None, *,
        stop: Optional[List[str]] = None,
        **kwargs: Any
        ) -> Iterator[str]:
        return get_stream(input)

In [None]:
llm = OpenRouterLLM()

print(llm.invoke("hi, what is your name?"), "\n")

for chunk in llm.stream("Write me a song about sparkling water."):
    print(chunk.choices[0].delta.content or "", end="")