In [3]:
import gradio as gr


def greet(name):
    return "Hello " + name


demo = gr.Interface(fn=greet, inputs="text", outputs="text")

demo.launch()

Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.




In [4]:
# Chúng tôi khởi tạo lớp Textbox
textbox = gr.Textbox(label="Type your name here:", placeholder="John Doe", lines=2)

gr.Interface(fn=greet, inputs=textbox, outputs="text").launch()

Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.




In [5]:
import os; os.environ["CUDA_DEVICE"] = os.environ.get("CUDA_DEVICE") or "0"

import torch

from peft import PeftModel
from transformers import AutoTokenizer, AutoModelForCausalLM

In [6]:
def make_prompt(instruction):
        return f"""Hãy viết một phản hồi thích hợp cho chỉ dẫn dưới đây.

### Instruction:
{instruction}

### Response:"""
# END generate_qna_prompt

BASE_MODEL = "VietAI/gpt-neo-1.3B-vietnamese-news"
PEFT_WEIGHTS = "chat-gpt-neo-1.3B"
load_in_8bit = False

model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, torch_dtype=torch.bfloat16)
model = PeftModel.from_pretrained(model, PEFT_WEIGHTS, torch_dtype=torch.bfloat16)

if torch.cuda.is_available():
    device = "cuda"
    model.to(device)

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
model.eval()

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): GPTNeoForCausalLM(
      (transformer): GPTNeoModel(
        (wte): Embedding(60000, 2048)
        (wpe): Embedding(2048, 2048)
        (drop): Dropout(p=0.0, inplace=False)
        (h): ModuleList(
          (0-23): 24 x GPTNeoBlock(
            (ln_1): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
            (attn): GPTNeoAttention(
              (attention): GPTNeoSelfAttention(
                (attn_dropout): Dropout(p=0.0, inplace=False)
                (resid_dropout): Dropout(p=0.0, inplace=False)
                (k_proj): Linear(
                  in_features=2048, out_features=2048, bias=False
                  (lora_dropout): ModuleDict(
                    (default): Dropout(p=0.05, inplace=False)
                  )
                  (lora_A): ModuleDict(
                    (default): Linear(in_features=2048, out_features=6, bias=False)
                  )
                  (lora_B): ModuleDict(
      

In [10]:
def get_answer(Question, do_sample = True, max_new_tokens=196, top_k=20, temperature=1, skip_tl=False):
  input_ids = tokenizer(make_prompt(Question), return_tensors="pt")["input_ids"].to(device)
  with torch.no_grad():
      gen_tokens = model.generate(
          input_ids=input_ids,
          max_length=len(input_ids) + max_new_tokens,
          do_sample=do_sample,
          temperature=temperature,
          top_k=top_k,
          repetition_penalty=1.2,
          eos_token_id=0, # for open-end generation.
          pad_token_id=tokenizer.eos_token_id,
      )
  origin_output = tokenizer.batch_decode(gen_tokens)[0]
  output = origin_output.split("###")[2]
  try:
      k = output.index(":")
      if k < 10: output = output[k+1:]
  except:
      output = output
  # print(f"\n- - -{origin_output}- - -\n")
  return output.strip()[:-13]

In [11]:
title = "Ask Rick a Question"
description = """
The bot was trained to answer questions based on Rick and Morty dialogues. Ask Rick anything!
<img src="https://huggingface.co/spaces/course-demos/Rick_and_Morty_QA/resolve/main/rick.png" width=200px>
"""

article = "Check out [the original Rick and Morty Bot](https://huggingface.co/spaces/kingabzpro/Rick_and_Morty_Bot) that this demo is based off of."

gr.Interface(
    fn=get_answer,
    inputs="textbox",
    outputs="text",
    title=title,
    description=description,
    article=article,
    examples=[["What are you doing?"], ["Where should we time travel to?"]],
).launch()


Running on local URL:  http://127.0.0.1:7865

To create a public link, set `share=True` in `launch()`.


