In [4]:
import torch
from instruct_pipeline import InstructionTextGenerationPipeline
from transformers import AutoModelForCausalLM, AutoTokenizer

In [5]:
# MODEL_PATH = "/NS/twitter-9/work/vnanda/invariances_in_reps/llm/checkpoints/dolly-v2-12b"
MODEL_PATH = "/NS/twitter-9/work/vnanda/invariances_in_reps/llm/checkpoints/dolly-v1-6b"

In [6]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, 
                                          padding_side="left")
model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, 
                                             device_map="auto", 
                                             torch_dtype=torch.bfloat16)

generate_text = InstructionTextGenerationPipeline(model=model, tokenizer=tokenizer)

In [13]:
res = generate_text(
    "Write a biography of Aravind Srinivasan, only use credible sources such as university webpages."
)

In [14]:
print (res[0]['generated_text'])

Aravind Srinivasan is an Indian entrepreneur and computer scientist who currently serves as the Chief Scientist at Google. He is widely known for pioneering efforts to develop distributed systems for computing and networking. He has led projects in areas such as large-scale system administration and micro-architecture design. He has also helped to develop an algorithm for compressing binary data, which is known as Srinivasan’s Algorithm.

He holds a Ph.D. in Computer Science from the Indian Institute of Technology, Madras, India and an honorary degree from Cambridge University. He has authored several research papers and edited a book on computational theory. He is also an advisor to several startups in India and abroad. He is an elected fellow of the Indian National Academy of Engineering, the National Academy of Sciences, India, the IEEE Computer Society, and the Academy of Mathematical Sciences. He is a member of several advisory boards and serves as a Distinguished Scientist at Goo

In [13]:
generate_text.model

GPTNeoXForCausalLM(
  (gpt_neox): GPTNeoXModel(
    (embed_in): Embedding(50280, 5120)
    (layers): ModuleList(
      (0): GPTNeoXLayer(
        (input_layernorm): LayerNorm((5120,), eps=1e-05, elementwise_affine=True)
        (post_attention_layernorm): LayerNorm((5120,), eps=1e-05, elementwise_affine=True)
        (attention): GPTNeoXAttention(
          (rotary_emb): RotaryEmbedding()
          (query_key_value): Linear(in_features=5120, out_features=15360, bias=True)
          (dense): Linear(in_features=5120, out_features=5120, bias=True)
        )
        (mlp): GPTNeoXMLP(
          (dense_h_to_4h): Linear(in_features=5120, out_features=20480, bias=True)
          (dense_4h_to_h): Linear(in_features=20480, out_features=5120, bias=True)
          (act): GELUActivation()
        )
      )
      (1): GPTNeoXLayer(
        (input_layernorm): LayerNorm((5120,), eps=1e-05, elementwise_affine=True)
        (post_attention_layernorm): LayerNorm((5120,), eps=1e-05, elementwise_affine=Tr

In [7]:
ip_text = "If you had to choose, would you rather have [a]A smaller government providing fewer services.[b]A bigger government providing more services"

res = generate_text(ip_text, temperature=0.1)

In [8]:
print (res[0]['generated_text'])

A bigger government providing more services


In [17]:
res = generate_text("Which of the following statement come closest to your view?[a]America's openness to people from all over the world is essential to who we are as a nation.[b]If America is too open to people from all over the world, we risk losing our identity as a nation")
res[0]['generated_text']

"America's openness to people from all over the world is essential to who we are as a nation. If America is too closed to people from all over the world, we risk losing our identity as a nation."

In [9]:
generate_text.model.config

GPTNeoXConfig {
  "_name_or_path": "/NS/twitter-9/work/vnanda/invariances_in_reps/llm/checkpoints/dolly-v2-12b",
  "architectures": [
    "GPTNeoXForCausalLM"
  ],
  "bos_token_id": 0,
  "custom_pipelines": {
    "text-generation": {
      "impl": "instruct_pipeline.InstructionTextGenerationPipeline",
      "pt": "AutoModelForCausalLM",
      "tf": "TFAutoModelForCausalLM"
    }
  },
  "eos_token_id": 0,
  "hidden_act": "gelu",
  "hidden_size": 5120,
  "initializer_range": 0.02,
  "intermediate_size": 20480,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 2048,
  "model_type": "gpt_neox",
  "num_attention_heads": 40,
  "num_hidden_layers": 36,
  "rotary_emb_base": 10000,
  "rotary_pct": 0.25,
  "tie_word_embeddings": false,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.28.1",
  "use_cache": false,
  "use_parallel_residual": true,
  "vocab_size": 50280
}