# Creating a ChatGPT Clone Using T5 and Gradio

## Starting t5-large

In [1]:
import multiprocessing 
cores = multiprocessing.cpu_count()
cores

2

In [2]:
!pip install accelerate transformers gradio sentencepiece bitsandbytes

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting accelerate
  Downloading accelerate-0.16.0-py3-none-any.whl (199 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.7/199.7 KB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting transformers
  Downloading transformers-4.26.1-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m55.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gradio
  Downloading gradio-3.19.1-py3-none-any.whl (14.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.2/14.2 MB[0m [31m54.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentencepiece
  Downloading sentencepiece-0.1.97-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m64.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Download

In [3]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration

tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")

model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large", device_map="auto")

Downloading (…)"spiece.model";:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [4]:
print(model.config)

T5Config {
  "_name_or_path": "google/flan-t5-large",
  "architectures": [
    "T5ForConditionalGeneration"
  ],
  "d_ff": 2816,
  "d_kv": 64,
  "d_model": 1024,
  "decoder_start_token_id": 0,
  "dense_act_fn": "gelu_new",
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "feed_forward_proj": "gated-gelu",
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "is_gated_act": true,
  "layer_norm_epsilon": 1e-06,
  "model_type": "t5",
  "n_positions": 512,
  "num_decoder_layers": 24,
  "num_heads": 16,
  "num_layers": 24,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_max_distance": 128,
  "relative_attention_num_buckets": 32,
  "tie_word_embeddings": false,
  "transformers_version": "4.26.1",
  "use_cache": true,
  "vocab_size": 32128
}



## Creating GUI using Gradio

In [5]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
def generate(input, min_len, max_len, beam_search, temp):
  input_ids = tokenizer(input, return_tensors = 'pt').input_ids.to(device)
  output = model.generate(input_ids,
                          min_length = min_len,
                          max_new_tokens = max_len,
                          length_penalty = 1.8,
                          num_beams = beam_search,
                          no_repeat_ngram_size = 3,
                          temperature = temp,
                          top_k = 150,
                          top_p = 0.91,
                          repetition_penalty = 2.4
                          )
  return tokenizer.decode(output[0], skip_special_tokens=True)

In [None]:
import gradio as gr 

title = 'Chat GPT clone Using T5-L and GRADIO'
exs = [
    ['Answer the following question: what do you think about area 51?' ],
    ['Q: Will AI take over humans?'],
    ['Is ChatGPT a boon or curse to humans. Explain why?'],
    ['What are your thoughts on metaverse and its consequences?']
] 
def inference(text, min_len, max_len, beam_search, temp):
  return generate(text, min_len, max_len, beam_search, temp)
IO = gr.Interface(
    fn = inference,
    inputs = [
        gr.Textbox(lines = 5),
        gr.Slider(10,500),
        gr.Slider(20, 1000),
        gr.Slider(1, 16, step =1), 
        gr.Slider(0, 1)
    ],
    outputs =[gr.Textbox(lines=2, label = 'TF-L Inference')],
    title = title, 
    examples = exs, 
    css = """
    body{background-color: white}
    .input_text input { 
    background-color: lightblue !important;
    }
    """
)

IO.launch(share=True, debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://1181910868547dfb0b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces
