<a href="https://colab.research.google.com/github/olonok69/LLM_Notebooks/blob/main/langchain/custom/LangChain_Custom_LLMChat_Class.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Langchain Custom Models

- https://python.langchain.com/api_reference/core/language_models/langchain_core.language_models.chat_models.BaseChatModel.html


- https://python.langchain.com/docs/how_to/custom_chat_model/

# Models
- https://huggingface.co/microsoft/Phi-3.5-vision-instruct
- https://github.com/microsoft/Phi-3CookBook

- https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct/tree/main
- https://github.com/QwenLM/Qwen2.5

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Install Libraries

In [None]:
!pip install mlflow   optimum --quiet
! pip install  evaluate  textstat tiktoken -q
! pip install psutil pynvml -q
! pip install -q   bitsandbytes sentencepiece
! pip install datasets evaluate rouge_score -q
! pip install transformers==4.46.3 -q
! pip install accelerate -U -q
! pip install langchain  langchain-community -q
! pip install flash-attn -q

In [None]:
from typing import Any, Dict, List, Optional
from langchain_core.language_models import BaseChatModel
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
from langchain_core.outputs import ChatResult, ChatGeneration
from langchain_core.callbacks.manager import CallbackManagerForLLMRun
from langchain_core.callbacks.manager import AsyncCallbackManagerForLLMRun
from langchain_core.runnables import run_in_executor
from transformers import AutoProcessor, AutoModelForCausalLM
import torch
from PIL import Image

class CustomLLMChat(BaseChatModel):
    model_name: str = "microsoft/Phi-3.5-vision-instruct"
    processor: AutoProcessor = None
    model: AutoModelForCausalLM = None
    model_path: str = None

    def __init__(self, model_path, **kwargs: Any) -> None:
        super().__init__(**kwargs)
        if model_path is not None:
            self.model_name = model_path

        self.processor = AutoProcessor.from_pretrained(self.model_name, trust_remote_code=True)
        self.model = AutoModelForCausalLM.from_pretrained(
            self.model_name, trust_remote_code=True, torch_dtype="auto", device_map="cuda:0"
        )

    def _call(
            self,
            prompt: str,
            image_path: Optional[str] = None,
            stop: Optional[List[str]] = None,
            run_manager: Optional[CallbackManagerForLLMRun] = None,
            **kwargs: Any,
    ) -> str:
        # Load and preprocess the image
        image = Image.open(image_path) if image_path else None
        inputs = self.processor(prompt, images=image, return_tensors="pt").to("cuda")

        # Generate response
        generate_ids = self.model.generate(
            **inputs, max_new_tokens=1000, eos_token_id=self.processor.tokenizer.eos_token_id
        )
        generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
        response = self.processor.batch_decode(generate_ids,
                                               skip_special_tokens=True,
                                               clean_up_tokenization_spaces=False)[0]
        return response

    async def _acall(
            self,
            prompt: str,
            image_path: str,
            stop: Optional[List[str]] = None,
            run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
            **kwargs: Any,
    ) -> str:
        # Implement the async logic to generate a response from the model
        return await run_in_executor(
            None,
            self._call,
            prompt,
            image_path,
            stop,
            run_manager.get_sync() if run_manager else None,
            **kwargs,
        )

    @property
    def _llm_type(self) -> str:
        return "custom-llm-vision-chat"

    @property
    def _identifying_params(self) -> Dict[str, Any]:
        return {"model_name": self.model_name}

    def _generate(
            self,
            messages: List[BaseMessage],
            stop: Optional[List[str]] = None,
            run_manager: Optional[CallbackManagerForLLMRun] = None,
            **kwargs: Any,
    ) -> ChatResult:
        # Assumes the first message contains the prompt and the image path is in metadata
        prompt = messages[0].content
        try:
            image_path = messages[0].metadata.get("image_path")
        except AttributeError:
            image_path = None
        response_text = self._call(prompt, image_path, stop, run_manager, **kwargs)

        # Create AIMessage with the response
        ai_message = AIMessage(content=response_text)
        return ChatResult(generations=[ChatGeneration(message=ai_message)])


def create_prompt(message: str, image_path: Optional[str] = None) -> List[BaseMessage]:
    user_prompt = '<|user|>\n'
    assistant_prompt = '<|assistant|>\n'
    prompt_suffix = "<|end|>\n"
    img_token = "<|image_1|>\n" if image_path else ""
    prompt = f"{user_prompt}{img_token}{message}{prompt_suffix}{assistant_prompt}"

    return [HumanMessage(content=prompt, metadata={"image_path": image_path})]




In [None]:
model_path ="/content/drive/MyDrive/MODELS/Phi-3.5-vision-instruct"

In [None]:
import gc
import torch
torch.cuda.empty_cache()
gc.collect()

In [None]:
prompt = create_prompt("Describe this image", '/content/drive/MyDrive/Google AI Studio/forest.jpg')

In [None]:
image_path = '/content/drive/MyDrive/Google AI Studio/forest.jpg'
image = Image.open(image_path)
image

In [None]:
prompt[0].content

In [None]:
model_path = "/content/drive/MyDrive/MODELS/Phi-3.5-vision-instruct"

In [None]:
model = CustomLLMChat(model_path=model_path)

In [None]:
prompt[0].metadata["image_path"]

In [None]:
import pprint

In [None]:
result = model.invoke(prompt)
print(result)

In [None]:
pprint.pprint(result.content)

In [None]:
result2 =await model.ainvoke(prompt)

In [None]:
pprint.pprint(result2.content)

In [None]:
del model
torch.cuda.empty_cache()
gc.collect()

In [None]:
import torch
from transformers import  AutoTokenizer
from typing import Any, Dict, List, Optional

In [None]:
class CustomLLMChat2(BaseChatModel):

    model_name: str = "Qwen/Qwen2.5-1.5B-Instruct"
    tokenizer : AutoTokenizer = None
    model: AutoModelForCausalLM = None
    model_path: str = None

    def __init__(self, model_path, **kwargs: Any) -> None:
        super().__init__(**kwargs)
        if model_path is not None:
            self.model_name = model_path

        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, trust_remote_code=True)
        self.model = AutoModelForCausalLM.from_pretrained(
            self.model_name, trust_remote_code=True, torch_dtype="auto", device_map="cuda:0"
        )


    def _call(
            self,
            prompt: str,
            stop: Optional[List[str]] = None,
            run_manager: Optional[CallbackManagerForLLMRun] = None,
            **kwargs: Any,
    ) -> str:
        # Load and preprocess the image
        messages = [
            {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]

        text = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        model_inputs  = self.tokenizer([text], return_tensors="pt").to(self.model.device)
        generated_ids = self.model.generate(
              **model_inputs,
              max_new_tokens=512
          )
        generated_ids = [
            output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
          ]

        response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

        return response

    async def _acall(
            self,
            prompt: str,
            stop: Optional[List[str]] = None,
            run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
            **kwargs: Any,
    ) -> str:
        # Implement the async logic to generate a response from the model
        return await run_in_executor(
            None,
            self._call,
            prompt,
            stop,
            run_manager.get_sync() if run_manager else None,
            **kwargs,
        )

    @property
    def _llm_type(self) -> str:
        return "custom-llm-chat"

    @property
    def _identifying_params(self) -> Dict[str, Any]:
        return {"model_name": self.model_name}

    def _generate(
            self,
            messages: List[BaseMessage],
            stop: Optional[List[str]] = None,
            run_manager: Optional[CallbackManagerForLLMRun] = None,
            **kwargs: Any,
    ) -> ChatResult:
        # Assumes the first message contains the prompt and the image path is in metadata
        prompt = messages[0].content
        response_text = self._call(prompt,  stop, run_manager, **kwargs)

        # Create AIMessage with the response
        ai_message = AIMessage(content=response_text)
        return ChatResult(generations=[ChatGeneration(message=ai_message)])


def create_prompt_chat(message: str ) -> List[BaseMessage]:

    assistant_prompt = '<|assistant|>\n'

    prompt = f"{message}{assistant_prompt}"

    return [HumanMessage(content=prompt)]

In [None]:
prompt2 = create_prompt_chat("How to explain Internet for a medieval knight?")

In [None]:
model_path_2 = "/content/drive/MyDrive/MODELS/Qwen2.5-1.5B-Instruct"

In [None]:
model2= CustomLLMChat2(model_path=model_path_2)

In [None]:
model2._llm_type

In [None]:
model2._identifying_params

In [None]:
model2.model_name

In [None]:
result = model2.invoke(prompt2)


In [None]:
pprint.pprint(result.content)