In [None]:
pip install transformers torch

In [4]:
# Import necessary libraries
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch

# Initialize GPT-2 tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

In [None]:
# Sample retrieval data
retrieval_data = {
    "services": """We offer a variety of printing services including:

- **Packaging**: Custom packaging and box making to amaze your customer with a marketing piece that conveys your first-class brand. We can make custom cardboard boxes, custom shipping boxes, and more right here in Las Vegas.
- **Mailing & Personalization**: Drive conversion with a fully personalized message that wows your customer. We can do all your mailing services right here in Las Vegas.
- **Convention Printing**: Never miss out on communicating with a convention delegate again. We can take care of your convention printing services in Las Vegas.
- **Commercial Printing**: We guarantee the quality by completing every part of the operation ourselves. We can do book bindery, commercial printing, and more in Las Vegas.
- **Other Services**: See the other great printing services we offer that your business needs.""",
    "hours": "We are open Monday to Friday from 6 AM to 5 PM.",
    "location": "Our main office is located at 6360 Sunset Corporate Dr, Las Vegas, NV 89120.",
    "contact": "You can contact us at (702) 966-1000 or email us at hatamian@haigsprinting.com.",
    "pricing": "Our pricing depends on the type and quantity of print jobs. For detailed pricing, please visit our website or contact us directly.",
    "delivery": "We offer delivery services within the city. Delivery times vary depending on the size and complexity of the order."
}


# Function to generate response
def generate_response(prompt, chat_history_ids=None):
    # Encode the input prompt with a specific instruction for the model
    input_ids = tokenizer.encode("You are a helpful assistant for a Haig's printing company. You dont talk about anything else except printing " + prompt + tokenizer.eos_token, return_tensors='pt')
    
    # Concatenate new user input with chat history (if exists)
    if chat_history_ids is not None:
        input_ids = torch.cat([chat_history_ids, input_ids], dim=-1)
    
    # Generate response
    output = model.generate(
        input_ids,
        max_length=150,
        pad_token_id=tokenizer.eos_token_id,
        no_repeat_ngram_size=2,
        temperature=0.1,
        top_k=50,
        top_p=0.9,
    )
    
    # Extract the response and append it to chat history
    response_ids = output[:, input_ids.shape[-1]:]
    chat_history_ids = torch.cat([input_ids, response_ids], dim=-1)
    response = tokenizer.decode(response_ids[0], skip_special_tokens=True)
    
    return response, chat_history_ids

# Function for retrieval (example, replace with your retrieval logic)
def retrieve_response(input_text):
    for key in retrieval_data:
        if key in input_text.lower():
            return retrieval_data[key]
    return None

# Initialize chat history
chat_history_ids = None

# Main chat loop
print("Welcome to the Haig's Quality Printing Chatbot! How can I assist you today?")
while True:
    user_input = input("You: ")
    
    # Retrieval step
    retrieved_response = retrieve_response(user_input)
    if retrieved_response:
        print("retrieved_response:", retrieved_response)
    else:
        # Generation step
        generated_response, chat_history_ids = generate_response(user_input, chat_history_ids)
        print("generated_response:", generated_response)

In [12]:
# trying out T5 model from huggingface

from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")

inputs = tokenizer("A step by step recipe to make bolognese pasta:", return_tensors="pt")
outputs = model.generate(**inputs)
print(tokenizer.batch_decode(outputs, skip_special_tokens=True))

['Preheat the oven to 375 degrees F. In a large bowl, combine the olive']


In [21]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch

model = "tiiuae/falcon-40b"

tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto",
)
sequences = pipeline(
   "Girafatron is obsessed with giraffes, the most glorious animal on the face of this Earth. Giraftron believes all other animals are irrelevant when compared to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron:",
    max_length=200,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")


ImportError: Using `low_cpu_mem_usage=True` or a `device_map` requires Accelerate: `pip install accelerate`