In [1]:
!git clone https://github.com/elizaOS/eliza.git
%cd eliza

Cloning into 'eliza'...
remote: Enumerating objects: 88433, done.[K
remote: Counting objects: 100% (189/189), done.[K
remote: Compressing objects: 100% (87/87), done.[K
remote: Total 88433 (delta 165), reused 102 (delta 102), pack-reused 88244 (from 2)[K
Receiving objects: 100% (88433/88433), 214.12 MiB | 16.37 MiB/s, done.
Resolving deltas: 100% (56961/56961), done.
Updating files: 100% (3557/3557), done.
/content/eliza


In [5]:
!pip install torch transformers accelerate peft

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [6]:
import os
os.environ["USE_GPU"] = "True"

In [8]:
%%writefile /content/eliza/eliza_chat.py
import argparse
import os
import sys

import torch
from accelerate import Accelerator
from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
from peft import LoraConfig, get_peft_model, TaskType
from utils.text_utils import remove_extra_spaces_and_line_breaks

def load_model(model_name_or_path, accelerator, lora_weights=None):
    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
    model = AutoModelForCausalLM.from_pretrained(model_name_or_path, torch_dtype=torch.bfloat16 if accelerator.device.type == "cuda" else torch.float32)
    if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
            model.config.pad_token_id = model.config.eos_token_id
    if lora_weights:
        config = LoraConfig(
            task_type=TaskType.CAUSAL_LM,
            inference_mode=True,
            r=8,
            lora_alpha=32,
            lora_dropout=0.05,
            target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
        )

        model = get_peft_model(model, config)
        accelerator.print("Loading lora_weights", lora_weights)
        model.load_adapter(lora_weights)

    return model, tokenizer

class StoppingCriteriaSub(StoppingCriteria):
    def __init__(self, stops = [], encounters=1):
        super().__init__()
        self.stops = stops
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop in self.stops:
            if torch.all((input_ids[-len(stop):] == stop)):
                return True
        return False

class ElizaBot:
    def __init__(self, model_name, lora_weights=None, use_gpu=True):
        self.accelerator = Accelerator(cpu=not use_gpu)
        self.device = self.accelerator.device
        self.model, self.tokenizer = load_model(model_name, self.accelerator, lora_weights)
        self.model = self.accelerator.prepare(self.model)
        self.stop_words = ["USER:", "ELIZA:"]
        self.stop_token_ids = []
        for stop_word in self.stop_words:
            encoded = self.tokenizer.encode(stop_word, add_special_tokens=False)
            if len(encoded) > 0:
                self.stop_token_ids.append(encoded)
    def prepare_input(self, input_text):
        prompt = f"USER: {input_text}\nELIZA:"
        encoded_input = self.tokenizer.encode(prompt, add_special_tokens=True, return_tensors="pt")
        return encoded_input.to(self.device)
    def generate_response(self, encoded_input, max_length=512, min_length=50, **kwargs):
            stopping_criteria_list = StoppingCriteriaList([StoppingCriteriaSub(stops=self.stop_token_ids)])
            with torch.no_grad():
                output_ids = self.model.generate(encoded_input, max_length=max_length, min_length=min_length, stopping_criteria=stopping_criteria_list, **kwargs)
                decoded_output = self.tokenizer.decode(output_ids, skip_special_tokens=True)
            return decoded_output
    def chat(self, input_text, max_length=512, min_length=50, **kwargs):
        input_ids = self.prepare_input(input_text)
        output = self.generate_response(input_ids, max_length, min_length, **kwargs)
        output = output.split("ELIZA:")[-1]
        output = remove_extra_spaces_and_line_breaks(output)
        return output

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Run the Eliza chatbot.')
    parser.add_argument('--model', type=str, default="microsoft/DialoGPT-medium", help='The name or path of the model to use.')
    parser.add_argument('--lora', type=str, default=None, help='The path to the LORA adapter to use')
    parser.add_argument('--use_gpu', type=bool, default=True, help='Use GPU if available')

    args = parser.parse_args()

    eliza_bot = ElizaBot(args.model, lora_weights=args.lora, use_gpu=args.use_gpu)
    print("Eliza is ready. Type 'exit' to end the chat.")
    while True:
      user_input = input("You: ")
      if user_input.lower() == "exit":
          break
      response = eliza_bot.chat(user_input)
      print("Eliza:", response)



Writing /content/eliza/eliza_chat.py


In [10]:
!mkdir /content/eliza/utils


In [11]:
%%writefile /content/eliza/utils/text_utils.py
import re

def remove_extra_spaces_and_line_breaks(text):
    """Removes extra spaces and line breaks from the given text."""
    text = re.sub(r'\s+', ' ', text)
    text = text.replace('\n', ' ').strip()
    return text


Writing /content/eliza/utils/text_utils.py


In [12]:
!python /content/eliza/eliza_chat.py --model "microsoft/DialoGPT-medium" --use_gpu True


2025-02-03 03:39:13.174839: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1738553953.200394    7320 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1738553953.208503    7320 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-03 03:39:13.235192: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
tokenizer_config.json: 100% 614/614 [00:00<00:00, 2.50MB/s]
vocab.json: 100% 1.04M/1.04M [00:00<00:00, 10.7MB/s]
merges.txt: 

In [14]:
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Run the Eliza chatbot.')
    parser.add_argument('--model', type=str, default="microsoft/DialoGPT-medium", help='The name or path of the model to use.')
    parser.add_argument('--lora', type=str, default=None, help='The path to the LORA adapter to use')
    parser.add_argument('--use_gpu', type=bool, default=True, help='Use GPU if available')
    args = parser.parse_args(args=[]) # The change is right here.
    eliza_interface = ElizaInterface(args.model, lora_weights=args.lora, use_gpu=args.use_gpu)
    eliza_interface.display()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


VBox(children=(Output(), HBox(children=(Text(value='', description='You:', placeholder='Type your message here…

In [15]:
import argparse
import os
import sys

import torch
from accelerate import Accelerator
from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
from peft import LoraConfig, get_peft_model, TaskType
from utils.text_utils import remove_extra_spaces_and_line_breaks

import ipywidgets as widgets
from IPython.display import display, clear_output
from IPython.display import Javascript
import asyncio
from huggingface_hub import login
from google.colab import userdata

def load_model(model_name_or_path, accelerator, lora_weights=None):
    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
    model = AutoModelForCausalLM.from_pretrained(model_name_or_path, torch_dtype=torch.bfloat16 if accelerator.device.type == "cuda" else torch.float32)
    if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
            model.config.pad_token_id = model.config.eos_token_id
    if lora_weights:
        config = LoraConfig(
            task_type=TaskType.CAUSAL_LM,
            inference_mode=True,
            r=8,
            lora_alpha=32,
            lora_dropout=0.05,
            target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
        )

        model = get_peft_model(model, config)
        accelerator.print("Loading lora_weights", lora_weights)
        model.load_adapter(lora_weights)

    return model, tokenizer

class StoppingCriteriaSub(StoppingCriteria):
    def __init__(self, stops = [], encounters=1):
        super().__init__()
        self.stops = stops
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop in self.stops:
            if torch.all((input_ids[-len(stop):] == stop)):
                return True
        return False

class ElizaBot:
    def __init__(self, model_name, lora_weights=None, use_gpu=True):
        self.accelerator = Accelerator(cpu=not use_gpu)
        self.device = self.accelerator.device
        self.model, self.tokenizer = load_model(model_name, self.accelerator, lora_weights)
        self.model = self.accelerator.prepare(self.model)
        self.stop_words = ["USER:", "ELIZA:"]
        self.stop_token_ids = []
        for stop_word in self.stop_words:
            encoded = self.tokenizer.encode(stop_word, add_special_tokens=False)
            if len(encoded) > 0:
                self.stop_token_ids.append(encoded)
        self.chat_history = []  # Initialize chat history

    def prepare_input(self, input_text):
         prompt = "\n".join(self.chat_history + [f"USER: {input_text}", "ELIZA:"])
         encoded_input = self.tokenizer.encode(prompt, add_special_tokens=True, return_tensors="pt")
         return encoded_input.to(self.device)

    def generate_response(self, encoded_input, max_length=512, min_length=50, **kwargs):
            stopping_criteria_list = StoppingCriteriaList([StoppingCriteriaSub(stops=self.stop_token_ids)])
            with torch.no_grad():
                output_ids = self.model.generate(encoded_input, max_length=max_length, min_length=min_length, stopping_criteria=stopping_criteria_list, **kwargs)
                decoded_output = self.tokenizer.decode(output_ids, skip_special_tokens=True)
            return decoded_output

    def chat(self, input_text, max_length=512, min_length=50, **kwargs):
        input_ids = self.prepare_input(input_text)
        output = self.generate_response(input_ids, max_length, min_length, **kwargs)
        output = output.split("ELIZA:")[-1]
        output = remove_extra_spaces_and_line_breaks(output)
        self.chat_history.append(f"USER: {input_text}")
        self.chat_history.append(f"ELIZA: {output}")

        return output

class ElizaInterface:
    def __init__(self, model_name, lora_weights=None, use_gpu=True):
         self.hf_token = userdata.get('HF_TOKEN')
         if self.hf_token:
            login(self.hf_token)
            print("Successfully logged in to Hugging Face!")
         else:
           print("Token is not set. Please save the token first.")
         self.eliza_bot = ElizaBot(model_name, lora_weights, use_gpu)
         self.chat_output = widgets.Output()
         self.input_text = widgets.Text(description="You:", placeholder="Type your message here")
         self.send_button = widgets.Button(description="Send")
         self.clear_button = widgets.Button(description="Clear")
         self.stop_button = widgets.Button(description="Stop")
         self.is_running = True
         self.send_button.on_click(self.on_send)
         self.clear_button.on_click(self.on_clear)
         self.stop_button.on_click(self.on_stop)
         self.chat_display = widgets.VBox([self.chat_output,widgets.HBox([self.input_text, self.send_button]), widgets.HBox([self.clear_button, self.stop_button])])
    def on_send(self, _):
        if not self.is_running:
           return
        user_input = self.input_text.value
        self.input_text.value = ''  # Clear the input text
        if user_input:
           with self.chat_output:
               print(f"You: {user_input}")
               response = self.eliza_bot.chat(user_input)
               print(f"Eliza: {response}")

    def on_clear(self, _):
        self.eliza_bot.chat_history = []
        with self.chat_output:
           clear_output()

    def on_stop(self, _):
        self.is_running = False
        self.send_button.disabled = True
        with self.chat_output:
          print("Eliza is not listening, restart by re-running the whole script.")

    def display(self):
        display(self.chat_display)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Run the Eliza chatbot.')
    parser.add_argument('--model', type=str, default="microsoft/DialoGPT-medium", help='The name or path of the model to use.')
    parser.add_argument('--lora', type=str, default=None, help='The path to the LORA adapter to use')
    parser.add_argument('--use_gpu', type=bool, default=True, help='Use GPU if available')
    args = parser.parse_args(args=[])
    eliza_interface = ElizaInterface(args.model, lora_weights=args.lora, use_gpu=args.use_gpu)
    eliza_interface.display()


Successfully logged in to Hugging Face!


VBox(children=(Output(), HBox(children=(Text(value='', description='You:', placeholder='Type your message here…

In [16]:
class StoppingCriteriaSub(StoppingCriteria):
    def __init__(self, stops = [], encounters=1):
        super().__init__()
        self.stops = stops
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop in self.stops:
            stop_tensor = torch.tensor(stop, device=input_ids.device)
            if torch.all(input_ids[-len(stop):] == stop_tensor):
                return True
        return False


In [17]:
def prepare_input(self, input_text):
    prompt = "\n".join(self.chat_history + [f"USER: {input_text}", "ELIZA:"])
    encoded_input = self.tokenizer.encode(prompt, add_special_tokens=True, return_tensors="pt", return_attention_mask=True)
    return encoded_input.to(self.device)

def generate_response(self, encoded_input, max_length=512, min_length=50, **kwargs):
    stopping_criteria_list = StoppingCriteriaList([StoppingCriteriaSub(stops=self.stop_token_ids)])
    with torch.no_grad():
        output_ids = self.model.generate(input_ids=encoded_input['input_ids'], attention_mask=encoded_input['attention_mask'], max_length=max_length, min_length=min_length, stopping_criteria=stopping_criteria_list, **kwargs)
        decoded_output = self.tokenizer.decode(output_ids, skip_special_tokens=True)
    return decoded_output


In [18]:
import argparse
import os
import sys

import torch
from accelerate import Accelerator
from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
from peft import LoraConfig, get_peft_model, TaskType
from utils.text_utils import remove_extra_spaces_and_line_breaks

import ipywidgets as widgets
from IPython.display import display, clear_output
from IPython.display import Javascript
import asyncio
from huggingface_hub import login
from google.colab import userdata

def load_model(model_name_or_path, accelerator, lora_weights=None):
    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
    model = AutoModelForCausalLM.from_pretrained(model_name_or_path, torch_dtype=torch.bfloat16 if accelerator.device.type == "cuda" else torch.float32)
    if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
            model.config.pad_token_id = model.config.eos_token_id
    if lora_weights:
        config = LoraConfig(
            task_type=TaskType.CAUSAL_LM,
            inference_mode=True,
            r=8,
            lora_alpha=32,
            lora_dropout=0.05,
            target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
        )

        model = get_peft_model(model, config)
        accelerator.print("Loading lora_weights", lora_weights)
        model.load_adapter(lora_weights)

    return model, tokenizer

class StoppingCriteriaSub(StoppingCriteria):
    def __init__(self, stops = [], encounters=1):
        super().__init__()
        self.stops = stops
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop in self.stops:
            stop_tensor = torch.tensor(stop, device=input_ids.device)
            if torch.all(input_ids[-len(stop):] == stop_tensor):
                return True
        return False

class ElizaBot:
    def __init__(self, model_name, lora_weights=None, use_gpu=True):
        self.accelerator = Accelerator(cpu=not use_gpu)
        self.device = self.accelerator.device
        self.model, self.tokenizer = load_model(model_name, self.accelerator, lora_weights)
        self.model = self.accelerator.prepare(self.model)
        self.stop_words = ["USER:", "ELIZA:"]
        self.stop_token_ids = []
        for stop_word in self.stop_words:
            encoded = self.tokenizer.encode(stop_word, add_special_tokens=False)
            if len(encoded) > 0:
                self.stop_token_ids.append(encoded)
        self.chat_history = []  # Initialize chat history

    def prepare_input(self, input_text):
        prompt = "\n".join(self.chat_history + [f"USER: {input_text}", "ELIZA:"])
        encoded_input = self.tokenizer.encode(prompt, add_special_tokens=True, return_tensors="pt", return_attention_mask=True)
        return encoded_input.to(self.device)

    def generate_response(self, encoded_input, max_length=512, min_length=50, **kwargs):
        stopping_criteria_list = StoppingCriteriaList([StoppingCriteriaSub(stops=self.stop_token_ids)])
        with torch.no_grad():
            output_ids = self.model.generate(input_ids=encoded_input['input_ids'], attention_mask=encoded_input['attention_mask'], max_length=max_length, min_length=min_length, stopping_criteria=stopping_criteria_list, **kwargs)
            decoded_output = self.tokenizer.decode(output_ids, skip_special_tokens=True)
        return decoded_output

    def chat(self, input_text, max_length=512, min_length=50, **kwargs):
        input_ids = self.prepare_input(input_text)
        output = self.generate_response(input_ids, max_length, min_length, **kwargs)
        output = output.split("ELIZA:")[-1]
        output = remove_extra_spaces_and_line_breaks(output)
        self.chat_history.append(f"USER: {input_text}")
        self.chat_history.append(f"ELIZA: {output}")

        return output

class ElizaInterface:
    def __init__(self, model_name, lora_weights=None, use_gpu=True):
         self.hf_token = userdata.get('HF_TOKEN')
         if self.hf_token:
            login(self.hf_token)
            print("Successfully logged in to Hugging Face!")
         else:
           print("Token is not set. Please save the token first.")
         self.eliza_bot = ElizaBot(model_name, lora_weights, use_gpu)
         self.chat_output = widgets.Output()
         self.input_text = widgets.Text(description="You:", placeholder="Type your message here")
         self.send_button = widgets.Button(description="Send")
         self.clear_button = widgets.Button(description="Clear")
         self.stop_button = widgets.Button(description="Stop")
         self.is_running = True
         self.send_button.on_click(self.on_send)
         self.clear_button.on_click(self.on_clear)
         self.stop_button.on_click(self.on_stop)
         self.chat_display = widgets.VBox([self.chat_output,widgets.HBox([self.input_text, self.send_button]), widgets.HBox([self.clear_button, self.stop_button])])
    def on_send(self, _):
        if not self.is_running:
           return
        user_input = self.input_text.value
        self.input_text.value = ''  # Clear the input text
        if user_input:
           with self.chat_output:
               print(f"You: {user_input}")
               response = self.eliza_bot.chat(user_input)
               print(f"Eliza: {response}")

    def on_clear(self, _):
        self.eliza_bot.chat_history = []
        with self.chat_output:
           clear_output()

    def on_stop(self, _):
        self.is_running = False
        self.send_button.disabled = True
        with self.chat_output:
          print("Eliza is not listening, restart by re-running the whole script.")

    def display(self):
        display(self.chat_display)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Run the Eliza chatbot.')
    parser.add_argument('--model', type=str, default="microsoft/DialoGPT-medium", help='The name or path of the model to use.')
    parser.add_argument('--lora', type=str, default=None, help='The path to the LORA adapter to use')
    parser.add_argument('--use_gpu', type=bool, default=True, help='Use GPU if available')
    args = parser.parse_args(args=[])
    eliza_interface = ElizaInterface(args.model, lora_weights=args.lora, use_gpu=args.use_gpu)
    eliza_interface.display()


Successfully logged in to Hugging Face!


VBox(children=(Output(), HBox(children=(Text(value='', description='You:', placeholder='Type your message here…

In [19]:
%%writefile /content/eliza/eliza_chat.py
import argparse
import os
import sys

import torch
from accelerate import Accelerator
from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
from peft import LoraConfig, get_peft_model, TaskType
from utils.text_utils import remove_extra_spaces_and_line_breaks

import ipywidgets as widgets
from IPython.display import display, clear_output
from IPython.display import Javascript
import asyncio
from huggingface_hub import login
from google.colab import userdata

def load_model(model_name_or_path, accelerator, lora_weights=None):
    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
    model = AutoModelForCausalLM.from_pretrained(model_name_or_path, torch_dtype=torch.bfloat16 if accelerator.device.type == "cuda" else torch.float32)
    if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
            model.config.pad_token_id = model.config.eos_token_id
    if lora_weights:
        config = LoraConfig(
            task_type=TaskType.CAUSAL_LM,
            inference_mode=True,
            r=8,
            lora_alpha=32,
            lora_dropout=0.05,
            target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
        )

        model = get_peft_model(model, config)
        accelerator.print("Loading lora_weights", lora_weights)
        model.load_adapter(lora_weights)

    return model, tokenizer

class StoppingCriteriaSub(StoppingCriteria):
    def __init__(self, stops = [], encounters=1):
        super().__init__()
        self.stops = stops
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop in self.stops:
            stop_tensor = torch.tensor(stop, device=input_ids.device)
            if torch.all(input_ids[-len(stop):] == stop_tensor):
                return True
        return False

class ElizaBot:
    def __init__(self, model_name, lora_weights=None, use_gpu=True):
        self.accelerator = Accelerator(cpu=not use_gpu)
        self.device = self.accelerator.device
        self.model, self.tokenizer = load_model(model_name, self.accelerator, lora_weights)
        self.model = self.accelerator.prepare(self.model)
        self.stop_words = ["USER:", "ELIZA:"]
        self.stop_token_ids = []
        for stop_word in self.stop_words:
            encoded = self.tokenizer.encode(stop_word, add_special_tokens=False)
            if len(encoded) > 0:
                self.stop_token_ids.append(encoded)
        self.chat_history = []  # Initialize chat history

    def prepare_input(self, input_text):
        prompt = "\n".join(self.chat_history + [f"USER: {input_text}", "ELIZA:"])
        encoded_input = self.tokenizer.encode(prompt, add_special_tokens=True, return_tensors="pt", return_attention_mask=True)
        return encoded_input.to(self.device)

    def generate_response(self, encoded_input, max_length=512, min_length=50, **kwargs):
        stopping_criteria_list = StoppingCriteriaList([StoppingCriteriaSub(stops=self.stop_token_ids)])
        with torch.no_grad():
            output_ids = self.model.generate(input_ids=encoded_input['input_ids'], attention_mask=encoded_input['attention_mask'], max_length=max_length, min_length=min_length, stopping_criteria=stopping_criteria_list, **kwargs)
            decoded_output = self.tokenizer.decode(output_ids, skip_special_tokens=True)
        return decoded_output

    def chat(self, input_text, max_length=512, min_length=50, **kwargs):
        input_ids = self.prepare_input(input_text)
        output = self.generate_response(input_ids, max_length, min_length, **kwargs)
        output = output.split("ELIZA:")[-1]
        output = remove_extra_spaces_and_line_breaks(output)
        self.chat_history.append(f"USER: {input_text}")
        self.chat_history.append(f"ELIZA: {output}")

        return output

class ElizaInterface:
    def __init__(self, model_name, lora_weights=None, use_gpu=True):
         self.hf_token = userdata.get('HF_TOKEN')
         if self.hf_token:
            login(self.hf_token)
            print("Successfully logged in to Hugging Face!")
         else:
           print("Token is not set. Please save the token first.")
         self.eliza_bot = ElizaBot(model_name, lora_weights, use_gpu)
         self.chat_output = widgets.Output()
         self.input_text = widgets.Text(description="You:", placeholder="Type your message here")
         self.send_button = widgets.Button(description="Send")
         self.clear_button = widgets.Button(description="Clear")
         self.stop_button = widgets.Button(description="Stop")
         self.is_running = True
         self.send_button.on_click(self.on_send)
         self.clear_button.on_click(self.on_clear)
         self.stop_button.on_click(self.on_stop)
         self.chat_display = widgets.VBox([self.chat_output,widgets.HBox([self.input_text, self.send_button]), widgets.HBox([self.clear_button, self.stop_button])])
    def on_send(self, _):
        if not self.is_running:
           return
        user_input = self.input_text.value
        self.input_text.value = ''  # Clear the input text
        if user_input:
           with self.chat_output:
               print(f"You: {user_input}")
               response = self.eliza_bot.chat(user_input)
               print(f"Eliza: {response}")

    def on_clear(self, _):
        self.eliza_bot.chat_history = []
        with self.chat_output:
           clear_output()

    def on_stop(self, _):
        self.is_running = False
        self.send_button.disabled = True
        with self.chat_output:
          print("Eliza is not listening, restart by re-running the whole script.")

    def display(self):
        display(self.chat_display)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Run the Eliza chatbot.')
    parser.add_argument('--model', type=str, default="microsoft/DialoGPT-medium", help='The name or path of the model to use.')
    parser.add_argument('--lora', type=str, default=None, help='The path to the LORA adapter to use')
    parser.add_argument('--use_gpu', type=bool, default=True, help='Use GPU if available')
    args = parser.parse_args(args=[])
    eliza_interface = ElizaInterface(args.model, lora_weights=args.lora, use_gpu=args.use_gpu)
    eliza_interface.display()


Overwriting /content/eliza/eliza_chat.py


In [20]:
!python /content/eliza/eliza_chat.py --model "microsoft/DialoGPT-medium" --use_gpu True


2025-02-03 04:06:56.296540: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1738555616.323053   13969 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1738555616.330930   13969 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Traceback (most recent call last):
  File "/content/eliza/eliza_chat.py", line 138, in <module>
    eliza_interface = ElizaInterface(args.model, lora_weights=args.lora, use_gpu=args.use_gpu)
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/content/eliza/eliza_chat.py", line 89, in __init__
    self.hf_token = userdata.get('HF_TOKEN')
                    ^^^^^^^^^^^^^^^^^^^^^^^^
 

In [21]:
class ElizaInterface:
    def __init__(self, model_name, lora_weights=None, use_gpu=True):
         self.eliza_bot = ElizaBot(model_name, lora_weights, use_gpu)
         self.chat_output = widgets.Output()
         self.input_text = widgets.Text(description="You:", placeholder="Type your message here")
         self.send_button = widgets.Button(description="Send")
         self.clear_button = widgets.Button(description="Clear")
         self.stop_button = widgets.Button(description="Stop")
         self.is_running = True
         self.send_button.on_click(self.on_send)
         self.clear_button.on_click(self.on_clear)
         self.stop_button.on_click(self.on_stop)
         self.chat_display = widgets.VBox([self.chat_output,widgets.HBox([self.input_text, self.send_button]), widgets.HBox([self.clear_button, self.stop_button])])
    def on_send(self, _):
        if not self.is_running:
           return
        user_input = self.input_text.value
        self.input_text.value = ''  # Clear the input text
        if user_input:
           with self.chat_output:
               print(f"You: {user_input}")
               response = self.eliza_bot.chat(user_input)
               print(f"Eliza: {response}")

    def on_clear(self, _):
        self.eliza_bot.chat_history = []
        with self.chat_output:
           clear_output()

    def on_stop(self, _):
        self.is_running = False
        self.send_button.disabled = True
        with self.chat_output:
          print("Eliza is not listening, restart by re-running the whole script.")

    def display(self):
        self.hf_token = userdata.get('HF_TOKEN')
        if self.hf_token:
          login(self.hf_token)
          print("Successfully logged in to Hugging Face!")
        else:
           print("Token is not set. Please save the token first.")
        display(self.chat_display)


In [22]:
import argparse
import os
import sys

import torch
from accelerate import Accelerator
from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
from peft import LoraConfig, get_peft_model, TaskType
from utils.text_utils import remove_extra_spaces_and_line_breaks

import ipywidgets as widgets
from IPython.display import display, clear_output
from IPython.display import Javascript
import asyncio
from huggingface_hub import login
from google.colab import userdata

def load_model(model_name_or_path, accelerator, lora_weights=None):
    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
    model = AutoModelForCausalLM.from_pretrained(model_name_or_path, torch_dtype=torch.bfloat16 if accelerator.device.type == "cuda" else torch.float32)
    if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
            model.config.pad_token_id = model.config.eos_token_id
    if lora_weights:
        config = LoraConfig(
            task_type=TaskType.CAUSAL_LM,
            inference_mode=True,
            r=8,
            lora_alpha=32,
            lora_dropout=0.05,
            target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
        )

        model = get_peft_model(model, config)
        accelerator.print("Loading lora_weights", lora_weights)
        model.load_adapter(lora_weights)

    return model, tokenizer

class StoppingCriteriaSub(StoppingCriteria):
    def __init__(self, stops = [], encounters=1):
        super().__init__()
        self.stops = stops
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop in self.stops:
            stop_tensor = torch.tensor(stop, device=input_ids.device)
            if torch.all(input_ids[-len(stop):] == stop_tensor):
                return True
        return False

class ElizaBot:
    def __init__(self, model_name, lora_weights=None, use_gpu=True):
        self.accelerator = Accelerator(cpu=not use_gpu)
        self.device = self.accelerator.device
        self.model, self.tokenizer = load_model(model_name, self.accelerator, lora_weights)
        self.model = self.accelerator.prepare(self.model)
        self.stop_words = ["USER:", "ELIZA:"]
        self.stop_token_ids = []
        for stop_word in self.stop_words:
            encoded = self.tokenizer.encode(stop_word, add_special_tokens=False)
            if len(encoded) > 0:
                self.stop_token_ids.append(encoded)
        self.chat_history = []  # Initialize chat history

    def prepare_input(self, input_text):
        prompt = "\n".join(self.chat_history + [f"USER: {input_text}", "ELIZA:"])
        encoded_input = self.tokenizer.encode(prompt, add_special_tokens=True, return_tensors="pt", return_attention_mask=True)
        return encoded_input.to(self.device)

    def generate_response(self, encoded_input, max_length=512, min_length=50, **kwargs):
        stopping_criteria_list = StoppingCriteriaList([StoppingCriteriaSub(stops=self.stop_token_ids)])
        with torch.no_grad():
            output_ids = self.model.generate(input_ids=encoded_input['input_ids'], attention_mask=encoded_input['attention_mask'], max_length=max_length, min_length=min_length, stopping_criteria=stopping_criteria_list, **kwargs)
            decoded_output = self.tokenizer.decode(output_ids, skip_special_tokens=True)
        return decoded_output

    def chat(self, input_text, max_length=512, min_length=50, **kwargs):
        input_ids = self.prepare_input(input_text)
        output = self.generate_response(input_ids, max_length, min_length, **kwargs)
        output = output.split("ELIZA:")[-1]
        output = remove_extra_spaces_and_line_breaks(output)
        self.chat_history.append(f"USER: {input_text}")
        self.chat_history.append(f"ELIZA: {output}")

        return output

class ElizaInterface:
    def __init__(self, model_name, lora_weights=None, use_gpu=True):
         self.eliza_bot = ElizaBot(model_name, lora_weights, use_gpu)
         self.chat_output = widgets.Output()
         self.input_text = widgets.Text(description="You:", placeholder="Type your message here")
         self.send_button = widgets.Button(description="Send")
         self.clear_button = widgets.Button(description="Clear")
         self.stop_button = widgets.Button(description="Stop")
         self.is_running = True
         self.send_button.on_click(self.on_send)
         self.clear_button.on_click(self.on_clear)
         self.stop_button.on_click(self.on_stop)
         self.chat_display = widgets.VBox([self.chat_output,widgets.HBox([self.input_text, self.send_button]), widgets.HBox([self.clear_button, self.stop_button])])
    def on_send(self, _):
        if not self.is_running:
           return
        user_input = self.input_text.value
        self.input_text.value = ''  # Clear the input text
        if user_input:
           with self.chat_output:
               print(f"You: {user_input}")
               response = self.eliza_bot.chat(user_input)
               print(f"Eliza: {response}")

    def on_clear(self, _):
        self.eliza_bot.chat_history = []
        with self.chat_output:
           clear_output()

    def on_stop(self, _):
        self.is_running = False
        self.send_button.disabled = True
        with self.chat_output:
          print("Eliza is not listening, restart by re-running the whole script.")

    def display(self):
        self.hf_token = userdata.get('HF_TOKEN')
        if self.hf_token:
          login(self.hf_token)
          print("Successfully logged in to Hugging Face!")
        else:
           print("Token is not set. Please save the token first.")
        display(self.chat_display)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Run the Eliza chatbot.')
    parser.add_argument('--model', type=str, default="microsoft/DialoGPT-medium", help='The name or path of the model to use.')
    parser.add_argument('--lora', type=str, default=None, help='The path to the LORA adapter to use')
    parser.add_argument('--use_gpu', type=bool, default=True, help='Use GPU if available')
    args = parser.parse_args(args=[])
    eliza_interface = ElizaInterface(args.model, lora_weights=args.lora, use_gpu=args.use_gpu)
    eliza_interface.display()


Successfully logged in to Hugging Face!


VBox(children=(Output(), HBox(children=(Text(value='', description='You:', placeholder='Type your message here…

In [25]:
%%writefile /content/eliza/eliza_chat.py
import argparse
import os
import sys

import torch
from accelerate import Accelerator
from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
from peft import LoraConfig, get_peft_model, TaskType
from utils.text_utils import remove_extra_spaces_and_line_breaks

import ipywidgets as widgets
from IPython.display import display, clear_output
from IPython.display import Javascript
import asyncio
from huggingface_hub import login
from google.colab import userdata

def load_model(model_name_or_path, accelerator, lora_weights=None):
    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
    model = AutoModelForCausalLM.from_pretrained(model_name_or_path, torch_dtype=torch.bfloat16 if accelerator.device.type == "cuda" else torch.float32)
    if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
            model.config.pad_token_id = model.config.eos_token_id
    if lora_weights:
        config = LoraConfig(
            task_type=TaskType.CAUSAL_LM,
            inference_mode=True,
            r=8,
            lora_alpha=32,
            lora_dropout=0.05,
            target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
        )

        model = get_peft_model(model, config)
        accelerator.print("Loading lora_weights", lora_weights)
        model.load_adapter(lora_weights)

    return model, tokenizer

class StoppingCriteriaSub(StoppingCriteria):
    def __init__(self, stops = [], encounters=1):
        super().__init__()
        self.stops = stops
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop in self.stops:
            stop_tensor = torch.tensor(stop, device=input_ids.device)
            if torch.all(input_ids[-len(stop):] == stop_tensor):
                return True
        return False

class ElizaBot:
    def __init__(self, model_name, lora_weights=None, use_gpu=True):
        self.accelerator = Accelerator(cpu=not use_gpu)
        self.device = self.accelerator.device
        self.model, self.tokenizer = load_model(model_name, self.accelerator, lora_weights)
        self.model = self.accelerator.prepare(self.model)
        self.stop_words = ["USER:", "ELIZA:"]
        self.stop_token_ids = []
        for stop_word in self.stop_words:
            encoded = self.tokenizer.encode(stop_word, add_special_tokens=False)
            if len(encoded) > 0:
                self.stop_token_ids.append(encoded)
        self.chat_history = []  # Initialize chat history

    def prepare_input(self, input_text):
        prompt = "\n".join(self.chat_history + [f"USER: {input_text}", "ELIZA:"])
        encoded_input = self.tokenizer.encode(prompt, add_special_tokens=True, return_tensors="pt", return_attention_mask=True)
        return encoded_input.to(self.device)

    def generate_response(self, encoded_input, max_length=512, min_length=50, **kwargs):
        stopping_criteria_list = StoppingCriteriaList([StoppingCriteriaSub(stops=self.stop_token_ids)])
        with torch.no_grad():
            output_ids = self.model.generate(input_ids=encoded_input['input_ids'], attention_mask=encoded_input['attention_mask'], max_length=max_length, min_length=min_length, stopping_criteria=stopping_criteria_list, **kwargs)
            decoded_output = self.tokenizer.decode(output_ids, skip_special_tokens=True)
        return decoded_output

    def chat(self, input_text, max_length=512, min_length=50, **kwargs):
        input_ids = self.prepare_input(input_text)
        output = self.generate_response(input_ids, max_length, min_length, **kwargs)
        output = output.split("ELIZA:")[-1]
        output = remove_extra_spaces_and_line_breaks(output)
        self.chat_history.append(f"USER: {input_text}")
        self.chat_history.append(f"ELIZA: {output}")

        return output

class ElizaInterface:
    def __init__(self, model_name, lora_weights=None, use_gpu=True):
         self.eliza_bot = ElizaBot(model_name, lora_weights, use_gpu)
         self.chat_output = widgets.Output()
         self.input_text = widgets.Text(description="You:", placeholder="Type your message here")
         self.send_button = widgets.Button(description="Send")
         self.clear_button = widgets.Button(description="Clear")
         self.stop_button = widgets.Button(description="Stop")
         self.is_running = True
         self.send_button.on_click(self.on_send)
         self.clear_button.on_click(self.on_clear)
         self.stop_button.on_click(self.on_stop)
         self.chat_display = widgets.VBox([self.chat_output,widgets.HBox([self.input_text, self.send_button]), widgets.HBox([self.clear_button, self.stop_button])])
    def on_send(self, _):
        if not self.is_running:
           return
        user_input = self.input_text.value
        self.input_text.value = ''  # Clear the input text
        if user_input:
           with self.chat_output:
               print(f"You: {user_input}")
               response = self.eliza_bot.chat(user_input)
               print(f"Eliza: {response}")

    def on_clear(self, _):
        self.eliza_bot.chat_history = []
        with self.chat_output:
           clear_output()

    def on_stop(self, _):
        self.is_running = False
        self.send_button.disabled = True
        with self.chat_output:
          print("Eliza is not listening, restart by re-running the whole script.")

    def display(self):
        self.hf_token = userdata.get('HF_TOKEN')
        if self.hf_token:
          login(self.hf_token)
          print("Successfully logged in to Hugging Face!")
        else:
           print("Token is not set. Please save the token first.")
        display(self.chat_display)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Run the Eliza chatbot.')
    parser.add_argument('--model', type=str, default="microsoft/DialoGPT-medium", help='The name or path of the model to use.')
    parser.add_argument('--lora', type=str, default=None, help='The path to the LORA adapter to use')
    parser.add_argument('--use_gpu', type=bool, default=True, help='Use GPU if available')
    args = parser.parse_args(args=[])
    eliza_interface = ElizaInterface(args.model, lora_weights=args.lora, use_gpu=args.use_gpu)
    eliza_interface.display()


Overwriting /content/eliza/eliza_chat.py


In [26]:
!python /content/eliza/eliza_chat.py --model "microsoft/DialoGPT-medium" --use_gpu False


2025-02-03 04:11:45.761492: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1738555905.913859   15181 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1738555905.957435   15181 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Traceback (most recent call last):
  File "/content/eliza/eliza_chat.py", line 139, in <module>
    eliza_interface.display()
  File "/content/eliza/eliza_chat.py", line 123, in display
    self.hf_token = userdata.get('HF_TOKEN')
                    ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/google/colab/userdata.py", line 62, in get
    resp = _message.blocking_request(
           ^^^^^^^^^^^^^^^^^^^^^

In [27]:
import argparse
import os
import sys

import torch
from accelerate import Accelerator
from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
from peft import LoraConfig, get_peft_model, TaskType
from utils.text_utils import remove_extra_spaces_and_line_breaks

import ipywidgets as widgets
from IPython.display import display, clear_output
from IPython.display import Javascript
import asyncio
from huggingface_hub import login
from google.colab import userdata

os.environ["TRANSFORMERS_SAFETENSORS_CONVERSION"] = "false"

def load_model(model_name_or_path, accelerator, lora_weights=None):
    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
    model = AutoModelForCausalLM.from_pretrained(model_name_or_path, torch_dtype=torch.bfloat16 if accelerator.device.type == "cuda" else torch.float32)
    if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
            model.config.pad_token_id = model.config.eos_token_id
    if lora_weights:
        config = LoraConfig(
            task_type=TaskType.CAUSAL_LM,
            inference_mode=True,
            r=8,
            lora_alpha=32,
            lora_dropout=0.05,
            target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
        )

        model = get_peft_model(model, config)
        accelerator.print("Loading lora_weights", lora_weights)
        model.load_adapter(lora_weights)

    return model, tokenizer

class StoppingCriteriaSub(StoppingCriteria):
    def __init__(self, stops = [], encounters=1):
        super().__init__()
        self.stops = stops
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop in self.stops:
            stop_tensor = torch.tensor(stop, device=input_ids.device)
            if torch.all(input_ids[-len(stop):] == stop_tensor):
                return True
        return False

class ElizaBot:
    def __init__(self, model_name, lora_weights=None, use_gpu=True):
        self.accelerator = Accelerator(cpu=not use_gpu)
        self.device = self.accelerator.device
        self.model, self.tokenizer = load_model(model_name, self.accelerator, lora_weights)
        self.model = self.accelerator.prepare(self.model)
        self.stop_words = ["USER:", "ELIZA:"]
        self.stop_token_ids = []
        for stop_word in self.stop_words:
            encoded = self.tokenizer.encode(stop_word, add_special_tokens=False)
            if len(encoded) > 0:
                self.stop_token_ids.append(encoded)
        self.chat_history = []  # Initialize chat history

    def prepare_input(self, input_text):
        prompt = "\n".join(self.chat_history + [f"USER: {input_text}", "ELIZA:"])
        encoded_input = self.tokenizer.encode(prompt, add_special_tokens=True, return_tensors="pt", return_attention_mask=True)
        return encoded_input.to(self.device)

    def generate_response(self, encoded_input, max_length=512, min_length=50, **kwargs):
        stopping_criteria_list = StoppingCriteriaList([StoppingCriteriaSub(stops=self.stop_token_ids)])
        with torch.no_grad():
            output_ids = self.model.generate(input_ids=encoded_input['input_ids'], attention_mask=encoded_input['attention_mask'], max_length=max_length, min_length=min_length, stopping_criteria=stopping_criteria_list, **kwargs)
            decoded_output = self.tokenizer.decode(output_ids, skip_special_tokens=True)
        return decoded_output

    def chat(self, input_text, max_length=512, min_length=50, **kwargs):
        input_ids = self.prepare_input(input_text)
        output = self.generate_response(input_ids, max_length, min_length, **kwargs)
        output = output.split("ELIZA:")[-1]
        output = remove_extra_spaces_and_line_breaks(output)
        self.chat_history.append(f"USER: {input_text}")
        self.chat_history.append(f"ELIZA: {output}")

        return output

class ElizaInterface:
    def __init__(self, model_name, lora_weights=None, use_gpu=True):
         self.eliza_bot = ElizaBot(model_name, lora_weights, use_gpu)
         self.chat_output = widgets.Output()
         self.input_text = widgets.Text(description="You:", placeholder="Type your message here")
         self.send_button = widgets.Button(description="Send")
         self.clear_button = widgets.Button(description="Clear")
         self.stop_button = widgets.Button(description="Stop")
         self.is_running = True
         self.send_button.on_click(self.on_send)
         self.clear_button.on_click(self.on_clear)
         self.stop_button.on_click(self.on_stop)
         self.chat_display = widgets.VBox([self.chat_output,widgets.HBox([self.input_text, self.send_button]), widgets.HBox([self.clear_button, self.stop_button])])
    def on_send(self, _):
        if not self.is_running:
           return
        user_input = self.input_text.value
        self.input_text.value = ''  # Clear the input text
        if user_input:
           with self.chat_output:
               print(f"You: {user_input}")
               response = self.eliza_bot.chat(user_input)
               print(f"Eliza: {response}")

    def on_clear(self, _):
        self.eliza_bot.chat_history = []
        with self.chat_output:
           clear_output()

    def on_stop(self, _):
        self.is_running = False
        self.send_button.disabled = True
        with self.chat_output:
          print("Eliza is not listening, restart by re-running the whole script.")

    def display(self):
        self.hf_token = userdata.get('HF_TOKEN')
        if self.hf_token:
          login(token=self.hf_token)
          print("Successfully logged in to Hugging Face!")
        else:
           print("Token is not set. Please save the token first.")
        display(self.chat_display)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Run the Eliza chatbot.')
    parser.add_argument('--model', type=str, default="microsoft/DialoGPT-medium", help='The name or path of the model to use.')
    parser.add_argument('--lora', type=str, default=None, help='The path to the LORA adapter to use')
    parser.add_argument('--use_gpu', type=bool, default=True, help='Use GPU if available')
    args = parser.parse_args(args=[])
    eliza_interface = ElizaInterface(args.model, lora_weights=args.lora, use_gpu=args.use_gpu)
    eliza_interface.display()



Successfully logged in to Hugging Face!


VBox(children=(Output(), HBox(children=(Text(value='', description='You:', placeholder='Type your message here…

In [28]:
%%writefile /content/eliza/eliza_chat.py
import argparse
import os
import sys

import torch
from accelerate import Accelerator
from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
from peft import LoraConfig, get_peft_model, TaskType
from utils.text_utils import remove_extra_spaces_and_line_breaks

import ipywidgets as widgets
from IPython.display import display, clear_output
from IPython.display import Javascript
import asyncio
from huggingface_hub import login
from google.colab import userdata

os.environ["TRANSFORMERS_SAFETENSORS_CONVERSION"] = "false"

def load_model(model_name_or_path, accelerator, lora_weights=None):
    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
    model = AutoModelForCausalLM.from_pretrained(model_name_or_path, torch_dtype=torch.bfloat16 if accelerator.device.type == "cuda" else torch.float32)
    if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
            model.config.pad_token_id = model.config.eos_token_id
    if lora_weights:
        config = LoraConfig(
            task_type=TaskType.CAUSAL_LM,
            inference_mode=True,
            r=8,
            lora_alpha=32,
            lora_dropout=0.05,
            target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
        )

        model = get_peft_model(model, config)
        accelerator.print("Loading lora_weights", lora_weights)
        model.load_adapter(lora_weights)

    return model, tokenizer

class StoppingCriteriaSub(StoppingCriteria):
    def __init__(self, stops = [], encounters=1):
        super().__init__()
        self.stops = stops
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop in self.stops:
            stop_tensor = torch.tensor(stop, device=input_ids.device)
            if torch.all(input_ids[-len(stop):] == stop_tensor):
                return True
        return False

class ElizaBot:
    def __init__(self, model_name, lora_weights=None, use_gpu=True):
        self.accelerator = Accelerator(cpu=not use_gpu)
        self.device = self.accelerator.device
        self.model, self.tokenizer = load_model(model_name, self.accelerator, lora_weights)
        self.model = self.accelerator.prepare(self.model)
        self.stop_words = ["USER:", "ELIZA:"]
        self.stop_token_ids = []
        for stop_word in self.stop_words:
            encoded = self.tokenizer.encode(stop_word, add_special_tokens=False)
            if len(encoded) > 0:
                self.stop_token_ids.append(encoded)
        self.chat_history = []  # Initialize chat history

    def prepare_input(self, input_text):
        prompt = "\n".join(self.chat_history + [f"USER: {input_text}", "ELIZA:"])
        encoded_input = self.tokenizer.encode(prompt, add_special_tokens=True, return_tensors="pt", return_attention_mask=True)
        return encoded_input.to(self.device)

    def generate_response(self, encoded_input, max_length=512, min_length=50, **kwargs):
        stopping_criteria_list = StoppingCriteriaList([StoppingCriteriaSub(stops=self.stop_token_ids)])
        with torch.no_grad():
            output_ids = self.model.generate(input_ids=encoded_input['input_ids'], attention_mask=encoded_input['attention_mask'], max_length=max_length, min_length=min_length, stopping_criteria=stopping_criteria_list, **kwargs)
            decoded_output = self.tokenizer.decode(output_ids, skip_special_tokens=True)
        return decoded_output

    def chat(self, input_text, max_length=512, min_length=50, **kwargs):
        input_ids = self.prepare_input(input_text)
        output = self.generate_response(input_ids, max_length, min_length, **kwargs)
        output = output.split("ELIZA:")[-1]
        output = remove_extra_spaces_and_line_breaks(output)
        self.chat_history.append(f"USER: {input_text}")
        self.chat_history.append(f"ELIZA: {output}")

        return output

class ElizaInterface:
    def __init__(self, model_name, lora_weights=None, use_gpu=True):
         self.eliza_bot = ElizaBot(model_name, lora_weights, use_gpu)
         self.chat_output = widgets.Output()
         self.input_text = widgets.Text(description="You:", placeholder="Type your message here")
         self.send_button = widgets.Button(description="Send")
         self.clear_button = widgets.Button(description="Clear")
         self.stop_button = widgets.Button(description="Stop")
         self.is_running = True
         self.send_button.on_click(self.on_send)
         self.clear_button.on_click(self.on_clear)
         self.stop_button.on_click(self.on_stop)
         self.chat_display = widgets.VBox([self.chat_output,widgets.HBox([self.input_text, self.send_button]), widgets.HBox([self.clear_button, self.stop_button])])
    def on_send(self, _):
        if not self.is_running:
           return
        user_input = self.input_text.value
        self.input_text.value = ''  # Clear the input text
        if user_input:
           with self.chat_output:
               print(f"You: {user_input}")
               response = self.eliza_bot.chat(user_input)
               print(f"Eliza: {response}")

    def on_clear(self, _):
        self.eliza_bot.chat_history = []
        with self.chat_output:
           clear_output()

    def on_stop(self, _):
        self.is_running = False
        self.send_button.disabled = True
        with self.chat_output:
          print("Eliza is not listening, restart by re-running the whole script.")

    def display(self):
        self.hf_token = userdata.get('HF_TOKEN')
        if self.hf_token:
          login(token=self.hf_token)
          print("Successfully logged in to Hugging Face!")
        else:
           print("Token is not set. Please save the token first.")
        display(self.chat_display)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Run the Eliza chatbot.')
    parser.add_argument('--model', type=str, default="microsoft/DialoGPT-medium", help='The name or path of the model to use.')
    parser.add_argument('--lora', type=str, default=None, help='The path to the LORA adapter to use')
    parser.add_argument('--use_gpu', type=bool, default=True, help='Use GPU if available')
    args = parser.parse_args(args=[])
    eliza_interface = ElizaInterface(args.model, lora_weights=args.lora, use_gpu=args.use_gpu)
    eliza_interface.display()


Overwriting /content/eliza/eliza_chat.py


In [None]:
!pip install --upgrade --force-reinstall pip
