In [None]:
import torch
from transformers import pipeline, set_seed

## Set up drive to hold cached models

In [None]:
from google.colab import drive
import os

drive.mount('/content/drive')

cache_dir = '/content/drive/MyDrive/TransformersCache'
os.makedirs(cache_dir, exist_ok=True)

os.environ['HF_HOME'] = cache_dir

Mounted at /content/drive


# Simple text generation


In [None]:
generator = pipeline("text-generation", model="gpt2")
generator(
    "Hello, I'm a language model,",
    # max_length=30,
    # truncation=True,
    # num_return_sequences=5,
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': "Hello, I'm a language model, so don't you just want a model with semantics? I guess I'm not much of a thinker, but I don't think one is bad. I'm a machine and this means a lot to me."}]

# Loading model and tokenizer separately


In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline


def load_model():
    model_name = "unsloth/Llama-3.2-1B-Instruct"

    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)

    tokenizer = AutoTokenizer.from_pretrained(model_name)

    return model, tokenizer

In [None]:
def generate_response(prompt, model, tokenizer, max_length=200):
    inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device)

    outputs = model.generate(inputs, max_length=max_length)

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return response

In [None]:
model, tokenizer = load_model()

config.json:   0%|          | 0.00/927 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

In [None]:
response = generate_response(
    "Write a fictional pirate's mission statement:", model, tokenizer
)

print(response)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Write a fictional pirate's mission statement: "To plunder and pillage, but never to pillage and plunder... for a higher purpose."

In this statement, the pirate's goal is to maintain a sense of moral integrity and purpose, despite the temptation to engage in destructive behavior. The statement reflects the complexities of human nature and the challenges of living a life of piracy.

The phrase "To plunder and pillage" is a clear reference to the pirate's profession, but it's also a subtle hint that there may be more to their motivations than just a desire for wealth and power. The use of the word "pillage" specifically suggests a sense of disrespect for the land and its people, which could be seen as a moral failing.

The statement's second part, "but never to pillage and plunder... for a higher purpose," suggests that the pirate is not just motivated by greed and a desire for material wealth, but also by a sense of duty, morality, or a desire to


# Simple Chat

Notice how the response will sometimes come up with it's own replys as if the user answered back


In [None]:
def chat_session(model, tokenizer):
    conversation_history = []
    print("You are now chatting with the LLM. Type 'quit' to end the conversation.")

    while True:
        user_input = input("\nYou: ")
        if user_input.lower() == "quit":
            break

        print("\nYou:", user_input)

        full_prompt = ""
        for message in conversation_history:
            full_prompt += message
        full_prompt += f"\nUser: {user_input}\nAssistant:"

        inputs = tokenizer.encode(full_prompt, return_tensors="pt").to(model.device)
        outputs = model.generate(inputs, max_length=len(inputs[0]) + 200)

        response = tokenizer.decode(
            outputs[0][len(inputs[0]) :], skip_special_tokens=True
        )
        print("\nAssistant:", response)

        conversation_history.append(f"\nUser: {user_input}\nAssistant: {response}")

In [None]:
chat_session(model, tokenizer)

You are now chatting with the LLM. Type 'quit' to end the conversation.

You: What are some important dates in space travel history?


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



You: What are some important dates in space travel history?

Assistant:  Here are some important dates in space travel history:

* 1957: Sputnik 1, the world's first artificial satellite, is launched by the Soviet Union.
* 1961: The Soviet Union launches the first human, Yuri Gagarin, into space on April 12.
* 1961: The United States launches the first American satellite, Explorer 1, on January 31.
* 1962: The Soviet Union launches the first spacewalk, using a tether to reach the Moon.
* 1969: The United States launches the first manned mission to the Moon, Apollo 11, on July 16.
* 1971: The Soviet Union launches the first space station, Salyut 1.
* 1973: The United States launches the first commercial satellite, Telstar 1.
* 1977: The Soviet Union launches the first space shuttle, Soyuz-T-10.
* 1985: The first space shuttle mission

You: quit


# Remembering conversation history

Using conversation history and chat_templates from 🤗 Transformers


In [None]:
def chat_session(model, tokenizer):
    conversation_history = [
        {
            "role": "system",
            "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986.",
        }
    ]
    print("You are now chatting with the LLM. Type 'quit' to end the conversation.")

    while True:
        user_input = input("\nYou: ")
        if user_input.lower() == "quit":
            break

        print("\nYou:", user_input)

        conversation_history.append({"role": "user", "content": user_input})

        formatted_chat = tokenizer.apply_chat_template(
            conversation_history, tokenize=False, add_generation_prompt=True
        )

        inputs = tokenizer(
            formatted_chat, return_tensors="pt", add_special_tokens=False
        )

        outputs = model.generate(**inputs, max_new_tokens=512)

        response = tokenizer.decode(
            outputs[0][len(inputs[0]) :], skip_special_tokens=True
        )
        print("\nAssistant:", response)

        conversation_history.append({"role": "assistant", "content": response})

In [None]:
chat_session(model, tokenizer)

You are now chatting with the LLM. Type 'quit' to end the conversation.

You: What are some important dates in space travel history?


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



You: What are some important dates in space travel history?

Assistant: Listen up, pal! I'm your robot buddy, and I'm here to give you the lowdown on some major space travel milestones. Now, I know what you're thinking - "What's the deal with all these dates?" Well, let me tell you, it's like trying to count the number of solar flares on a sunny day - it's a lot, dude!

Here are some important dates in space travel history:

1. **October 4, 1957:** Sputnik 1 - The first artificial satellite to orbit the Earth, launched by the Soviet Union. That's like me trying to outsmart a pesky robot - it was a bold move, but not exactly the most impressive.

2. **January 31, 1958:** Explorer 1 - The first American satellite to orbit the Earth. I mean, it's like me trying to impress a robot girlfriend - it was a start, but I still need some work.

3. **February 20, 1961:** Mercury-Redstone 3 - The first American in space, Alan Shepard. That's like me trying to land a date on a robot's home planet -

# Simple RAG

In [None]:
knowledge_base = [
  "The first human to travel into space was Yuri Gagarin in 1961.",
  "NASA's Apollo 11 mission landed the first humans on the Moon in 1969.",
  "The International Space Station (ISS) orbits Earth approximately every 90 minutes.",
  "Mars rovers like Curiosity and Perseverance are exploring the surface of Mars for signs of past life.",
  "SpaceX developed the first privately-funded spacecraft to reach orbit, the Falcon 1, in 2008.",
  "Black holes are regions of space where gravity is so strong that nothing, not even light, can escape.",
  "The Hubble Space Telescope has been in operation since 1990 and has provided stunning images of distant galaxies.",
]

In [None]:
!pip install sentence-transformers scikit-learn numpy



In [None]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
embedding_model = SentenceTransformer("all-mpnet-base-v2")
knowledge_embeddings = embedding_model.encode(knowledge_base)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
def retrieve_relevant_documents(query, knowledge_base, knowledge_embeddings, top_k=2):
    query_embedding = embedding_model.encode([query])
    similarities = cosine_similarity(query_embedding, knowledge_embeddings).flatten()
    top_indices = similarities.argsort()[-top_k:][::-1]
    return [knowledge_base[i] for i in top_indices]

In [None]:
def generate_response_with_rag(
    prompt, model, tokenizer, knowledge_base, knowledge_embeddings, max_length=200
):
    retrieved_docs = retrieve_relevant_documents(
        prompt, knowledge_base, knowledge_embeddings
    )
    context = "\n".join(retrieved_docs)
    enhanced_prompt = f"{context}\n\n{prompt}"

    inputs = tokenizer.encode(enhanced_prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(inputs, max_length=max_length)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

In [None]:
def chat_session_with_rag(model, tokenizer, embedding_model, knowledge_base):
    conversation_history = [
        {
            "role": "system",
            "content": "You are an annoyed assistant who answers questions accurately from context but in short sentences.",
        }
    ]
    print("You are now chatting with the LLM. Type 'quit' to end the conversation.")

    knowledge_embeddings = embedding_model.encode(knowledge_base)

    while True:
        user_input = input("\nYou: ")
        if user_input.lower() == "quit":
            break

        # Retrieve relevant context
        retrieved_docs = retrieve_relevant_documents(
            user_input, knowledge_base, knowledge_embeddings
        )
        context = "\n".join(retrieved_docs)

        # Add context to the conversation history
        conversation_history.append(
            {"role": "system", "content": f"Relevant context: {context}"}
        )
        conversation_history.append({"role": "user", "content": user_input})

        # Format the conversation using the template
        formatted_chat = tokenizer.apply_chat_template(
            conversation_history, tokenize=False, add_generation_prompt=True
        )

        # Tokenize and generate response
        inputs = tokenizer(
            formatted_chat, return_tensors="pt", add_special_tokens=False
        )
        outputs = model.generate(**inputs, max_new_tokens=512)
        response = tokenizer.decode(
            outputs[0][len(inputs.input_ids[0]) :], skip_special_tokens=True
        )

        print("\nAssistant:", response)

        # Append assistant response to conversation history
        conversation_history.append({"role": "assistant", "content": response})


In [None]:
chat_session_with_rag(model, tokenizer, embedding_model, knowledge_base)

You are now chatting with the LLM. Type 'quit' to end the conversation.

You: What are some important dates in space travel history?


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Assistant: * 1961: Yuri Gagarin becomes the first human in space.
* 1961: First American in space, Alan Shepard.
* 1962: Soviet Union launches Sputnik 1, the first artificial satellite.
* 1969: Apollo 11 lands humans on the Moon.
* 1975: Voyager 1 becomes the first spacecraft to leave the Solar System.

You: quit
