In [None]:
def generate_prompt(instruction: str, input_ctxt: str = None) -> str:
    if input_ctxt:
        return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{instruction}

### Input:
{input_ctxt}

### Response:"""
    else:
        return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{instruction}

### Response:"""


In [1]:
!pip install transformers
!pip install sentencepiece

import torch
from transformers import GenerationConfig, LlamaTokenizer, LlamaForCausalLM

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m70.0 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m84.5 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.14.1 tokenizers-0.13.3 transformers-4.28.1
Looking in indexes: https://pypi.org/simple, https://

In [2]:
!pip list

Package                       Version
----------------------------- --------------------
absl-py                       1.4.0
alabaster                     0.7.13
albumentations                1.2.1
altair                        4.2.2
anyio                         3.6.2
appdirs                       1.4.4
argon2-cffi                   21.3.0
argon2-cffi-bindings          21.2.0
arviz                         0.15.1
astropy                       5.2.2
astunparse                    1.6.3
attrs                         23.1.0
audioread                     3.0.0
autograd                      1.5
Babel                         2.12.1
backcall                      0.2.0
beautifulsoup4                4.11.2
bleach                        6.0.0
blis                          0.7.9
blosc2                        2.0.0
bokeh                         2.4.3
branca                        0.6.0
CacheControl                  0.12.11
cached-property               1.5.2
cachetools                    5.3.0
cata

In [None]:
tokenizer = LlamaTokenizer.from_pretrained("chainyo/alpaca-lora-7b")
model = LlamaForCausalLM.from_pretrained(
    "chainyo/alpaca-lora-7b",
    load_in_8bit=True,
    torch_dtype=torch.float16,
    device_map="auto",
)
generation_config = GenerationConfig(
    temperature=0.2,
    top_p=0.75,
    top_k=40,
    num_beams=4,
    max_new_tokens=128,
)

model.eval()
if torch.__version__ >= "2":
    model = torch.compile(model)

questions = [
    'What does μονογενής mean?',
    'What is the grammatical case of μονογενής?',
    'What is the semantic domain information of μονογενής?',
    'What is the lemma form of μονογενής?',
    "What is the Strong's number of μονογενής?",
    'What are the 3 discourse features that this word functions within?',
    'What is the Louw-Nida semantic domain entry for this word?',
    'How does the verb focus contribute to the meaning of the clause?'
]

input_ctxt = '''
and its grammatical case is accusative. μονογενής is a adj, and its semantic 
domain information is 058007. The grammatical gender of this word is masculine, 
meaning “only begotten.” The lemma form of this word is μονογενής, 
The Louw-Nida semantic domain entry for this word is 58.52. and it is 
classified as a A-ASM and its grammatical number is singular. with a Strong’s 
number of 3439. This word functions within 3 discourse feature(s), including  
Focus+, which is defined as Constituents placed in P2 to give them focal 
prominence. Verb Focus+, which is defined as Verb in final position in clause 
demonstrates verb focus. Main clauses, which is defined as Main clauses are the 
top-level clauses in a sentence. 
They are the clauses that are not embedded in other clauses.
'''

for question in questions:
  prompt = generate_prompt(question, input_ctxt)
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids
  input_ids = input_ids.to(model.device)

  with torch.no_grad():
      outputs = model.generate(
          input_ids=input_ids,
          generation_config=generation_config,
          return_dict_in_generate=True,
          output_scores=True,
      )

  response = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
  print(response)
  print('\n')
  print('--------------------------------------------------------------------')
  print('\n')

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/141 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/556 [00:00<?, ?B/s]

Downloading (…)model.bin.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/39 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00039.bin:   0%|          | 0.00/396M [00:00<?, ?B/s]

Downloading (…)l-00002-of-00039.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Downloading (…)l-00003-of-00039.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Downloading (…)l-00004-of-00039.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Downloading (…)l-00005-of-00039.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Downloading (…)l-00006-of-00039.bin:   0%|          | 0.00/315M [00:00<?, ?B/s]

Downloading (…)l-00007-of-00039.bin:   0%|          | 0.00/315M [00:00<?, ?B/s]

Downloading (…)l-00008-of-00039.bin:   0%|          | 0.00/315M [00:00<?, ?B/s]

Downloading (…)l-00009-of-00039.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Downloading (…)l-00010-of-00039.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Downloading (…)l-00011-of-00039.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Downloading (…)l-00012-of-00039.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Downloading (…)l-00013-of-00039.bin:   0%|          | 0.00/315M [00:00<?, ?B/s]

Downloading (…)l-00014-of-00039.bin:   0%|          | 0.00/315M [00:00<?, ?B/s]

Downloading (…)l-00015-of-00039.bin:   0%|          | 0.00/315M [00:00<?, ?B/s]

Downloading (…)l-00016-of-00039.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Downloading (…)l-00017-of-00039.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Downloading (…)l-00018-of-00039.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Downloading (…)l-00019-of-00039.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Downloading (…)l-00020-of-00039.bin:   0%|          | 0.00/315M [00:00<?, ?B/s]

Downloading (…)l-00021-of-00039.bin:   0%|          | 0.00/315M [00:00<?, ?B/s]

Downloading (…)l-00022-of-00039.bin:   0%|          | 0.00/315M [00:00<?, ?B/s]

Downloading (…)l-00023-of-00039.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Downloading (…)l-00024-of-00039.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Downloading (…)l-00025-of-00039.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Downloading (…)l-00026-of-00039.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Downloading (…)l-00027-of-00039.bin:   0%|          | 0.00/315M [00:00<?, ?B/s]

Downloading (…)l-00028-of-00039.bin:   0%|          | 0.00/315M [00:00<?, ?B/s]

Downloading (…)l-00029-of-00039.bin:   0%|          | 0.00/315M [00:00<?, ?B/s]

Downloading (…)l-00030-of-00039.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Downloading (…)l-00031-of-00039.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Downloading (…)l-00032-of-00039.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Downloading (…)l-00033-of-00039.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Downloading (…)l-00034-of-00039.bin:   0%|          | 0.00/315M [00:00<?, ?B/s]

Downloading (…)l-00035-of-00039.bin:   0%|          | 0.00/315M [00:00<?, ?B/s]

Downloading (…)l-00036-of-00039.bin:   0%|          | 0.00/315M [00:00<?, ?B/s]

Downloading (…)l-00037-of-00039.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Downloading (…)l-00038-of-00039.bin:   0%|          | 0.00/304M [00:00<?, ?B/s]

Downloading (…)l-00039-of-00039.bin:   0%|          | 0.00/262M [00:00<?, ?B/s]


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /usr/local/lib/python3.9/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so.11.0
CUDA SETUP: Highest compute capability among GPUs detected: 8.0
CUDA SETUP: Detected CUDA version 118
CUDA SETUP: Loading binary /usr/local/lib/python3.9/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)


Loading checkpoint shards:   0%|          | 0/39 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
What does μονογενής mean?

### Input:

and its grammatical case is accusative. μονογενής is a adj, and its semantic 
domain information is 058007. The grammatical gender of this word is masculine, 
meaning “only begotten.” The lemma form of this word is μονογενής, 
The Louw-Nida semantic domain entry for this word is 58.52. and it is 
classified as a A-ASM and its grammatical number is singular. with a Strong’s 
number of 3439. This word functions within 3 discourse feature(s), including  
Focus+, which is defined as Constituents placed in P2 to give them focal 
prominence. Verb Focus+, which is defined as Verb in final position in clause 
demonstrates verb focus. Main clauses, which is defined as Main clauses are the 
top-level clauses in a sentence. 
They are the clauses that are not embedded in other clauses.


In [None]:
prompt = generate_prompt('Generate a list of questions based off of the input.', input_ctxt)
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
input_ids = input_ids.to(model.device)

with torch.no_grad():
    outputs = model.generate(
        input_ids=input_ids,
        generation_config=generation_config,
        return_dict_in_generate=True,
        output_scores=True,
    )
response = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
print(response)

Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Generage a list of questions based off of the input.

### Input:

and its grammatical case is accusative. μονογενής is a adj, and its semantic 
domain information is 058007. The grammatical gender of this word is masculine, 
meaning “only begotten.” The lemma form of this word is μονογενής, 
The Louw-Nida semantic domain entry for this word is 58.52. and it is 
classified as a A-ASM and its grammatical number is singular. with a Strong’s 
number of 3439. This word functions within 3 discourse feature(s), including  
Focus+, which is defined as Constituents placed in P2 to give them focal 
prominence. Verb Focus+, which is defined as Verb in final position in clause 
demonstrates verb focus. Main clauses, which is defined as Main clauses are the 
top-level clauses in a sentence. 
They are the clauses that are not 