In [1]:
from awq import AutoAWQForCausalLM
from transformers import AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name_or_path = "../model/Mistral-7B-v0.1-AWQ"

# Load model
model = AutoAWQForCausalLM.from_quantized(model_name_or_path, fuse_layers=True,
                                          trust_remote_code=False, safetensors=True)
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=False)

prompt = "Tell me about AI"
prompt_template=f'''{prompt}

'''

print("\n\n*** Generate:")

tokens = tokenizer(
    prompt_template,
    return_tensors='pt'
).input_ids.cuda()

# Generate output
generation_output = model.generate(
    tokens,
    do_sample=True,
    temperature=0.7,
    top_p=0.95,
    top_k=40,
    max_new_tokens=512
)

print("Output: ", tokenizer.decode(generation_output[0]))

"""
# Inference should be possible with transformers pipeline as well in future
# But currently this is not yet supported by AutoAWQ (correct as of September 25th 2023)
from transformers import pipeline

print("*** Pipeline:")
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    do_sample=True,
    temperature=0.7,
    top_p=0.95,
    top_k=40,
    repetition_penalty=1.1
)

print(pipe(prompt_template)[0]['generated_text'])
"""

Replacing layers...: 100%|██████████| 32/32 [00:02<00:00, 11.64it/s]
Fusing layers...: 100%|██████████| 32/32 [00:05<00:00,  5.90it/s]




*** Generate:
Output:  <s> Tell me about AI

# What is AI?

Artificial intelligence is the process of building smart machines that can perform tasks like a human. The term is applied when a machine mimics cognitive functions that humans associate with other human minds, such as learning and problem-solving.

## AI explained

Artificial intelligence (AI) is the process of building smart machines that can perform tasks like a human. The term is applied when a machine mimics cognitive functions that humans associate with other human minds, such as learning and problem-solving. AI has been the subject of science fiction for many decades. However, artificial intelligence is now being used in many fields, including medicine, finance, law, and business. The term was first coined by John McCarthy in 1956.

## AI vs. machine learning

Artificial intelligence is the broad term for the concept of machines performing tasks that would typically require human intelligence. Machine learning is a su

'\n# Inference should be possible with transformers pipeline as well in future\n# But currently this is not yet supported by AutoAWQ (correct as of September 25th 2023)\nfrom transformers import pipeline\n\nprint("*** Pipeline:")\npipe = pipeline(\n    "text-generation",\n    model=model,\n    tokenizer=tokenizer,\n    max_new_tokens=512,\n    do_sample=True,\n    temperature=0.7,\n    top_p=0.95,\n    top_k=40,\n    repetition_penalty=1.1\n)\n\nprint(pipe(prompt_template)[0][\'generated_text\'])\n'