In [None]:
# import necessary packages

from transformers import GPT2LMHeadModel, GPT2Tokenizer
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import numpy as np
from collections import deque
import ipywidgets as widgets
from IPython.display import display

slider = widgets.IntSlider(value=5, min=0, max=10, step=1, description='Slider:')
display(slider)

IntSlider(value=5, description='Slider:', max=10)

In [None]:
# Initialize the cache with a maximum size
cache_size = 5  # Maximum number of cache entries
cache = deque(maxlen=cache_size)

In [None]:
# Function to add content to the cache
def add_to_cache(text, cache):
    cache.append(text)

# Function to retrieve relevant content from the cache
def retrieve_from_cache(query, cache, k=3):
    return list(cache)[-k:]

# Modified generate function with attention mask and padding
def generate_with_cache(query, cache, model, tokenizer, max_length=1024, max_new_tokens=50):
    # Retrieve relevant cached information
    relevant_texts = retrieve_from_cache(query, cache)

    # Combine the relevant cache text with the current query to form the complete context
    context = " ".join(relevant_texts) + " " + query

    # Tokenize the input
    inputs = tokenizer.encode(context, return_tensors="pt", truncation=True, padding="max_length", max_length=max_length)

    # Ensure the attention mask is set
    attention_mask = torch.ones(inputs.shape, dtype=torch.long)

    # Ensure the total input length is within the model's max length
    if inputs.shape[1] > max_length:
        inputs = inputs[:, -max_length:]
        attention_mask = attention_mask[:, -max_length:]

    # Check if there are any invalid token ids (out of vocab range)
    if torch.any(inputs >= model.config.vocab_size):
        raise ValueError("One or more tokens are out of vocabulary range.")

    # Generate the output using GPT-2
    with torch.no_grad():
        outputs = model.generate(inputs, max_new_tokens=max_new_tokens, attention_mask=attention_mask, pad_token_id=tokenizer.pad_token_id, num_return_sequences=1, no_repeat_ngram_size=2)

    # Decode the generated response
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return generated_text


In [None]:
# Adding some example text to the cache
add_to_cache("Artificial intelligence (AI) refers to the simulation of human intelligence in machines.", cache)
add_to_cache("The Turing test is used to evaluate a machine's ability to exhibit intelligent behavior.", cache)

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load model and tokenizer
model_name = "gpt2"  # Replace with your actual model name
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Define the query
query = "Tell me about the history and future of artificial intelligence, its applications, and current advancements."

# Tokenize the input
inputs = tokenizer(query, return_tensors="pt")

# Generate response
outputs = model.generate(
    inputs["input_ids"],
    max_length=1024,  # Control total response length
    no_repeat_ngram_size=2,  # Reduce repetitive phrases
    temperature=0.7,  # Control randomness
    top_p=0.9,  # Use nucleus sampling for variety
    top_k=50,  # Limit next token choices
    attention_mask=inputs["attention_mask"],
    pad_token_id=tokenizer.pad_token_id
)

# Decode and display result
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Tell me about the history and future of artificial intelligence, its applications, and current advancements.

I'm a big fan of the idea of AI. I've been working on AI for a long time, but I'm not sure what it is yet. It's a very interesting idea, I think. But I don't think it's going to be a major breakthrough. There are a lot of things that are going on that I haven't really thought about yet, so I can't talk about them. So I'll just say that it will be interesting to see what happens. And I hope that we can get a better understanding of what's happening. We're going through a period of time where we're seeing a huge amount of progress in AI, which is a good thing. The question is, what are the implications of that?
. . .
 (Laughter.)
,
-
(Laughing.) I mean, it seems like there's some kind of a problem with that. You know, there are some things in the world that you can do with artificial intelligences that can be used to solve problems. One of them is that they can learn from experie

In [None]:
# Import necessary packages
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
from collections import deque

# Load GPT-2 pre-trained model and tokenizer
model_name = "gpt2"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name, padding_side='left')

# Ensure the pad token is set (GPT-2 does not have one by default)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Set the model to evaluation mode
model.eval()

# Initialize the cache with a maximum size
cache_size = 5
cache = deque(maxlen=cache_size)

# Function to add content to the cache
def add_to_cache(text, cache):
    cache.append(text)

# Function to retrieve relevant content from the cache
def retrieve_from_cache(query, cache, k=3):
    return list(cache)[-k:]  # Get the last k items

# Function to generate text with cached context
def generate_with_cache(query, cache, model, tokenizer, max_new_tokens=100):
    # Retrieve relevant cached information
    relevant_texts = retrieve_from_cache(query, cache)

    # Combine the relevant cache text with the current query
    context = " ".join(relevant_texts) + " " + query

    # Tokenize the input properly
    inputs = tokenizer(context, return_tensors="pt", truncation=True, padding=True)

    # Generate response
    with torch.no_grad():
        outputs = model.generate(
            inputs["input_ids"],
            max_new_tokens=max_new_tokens,  # Increased token count
            attention_mask=inputs["attention_mask"],
            pad_token_id=tokenizer.pad_token_id,
            num_return_sequences=1,
            no_repeat_ngram_size=2
        )

    # Decode the generated response
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return generated_text

# Adding example text to the cache
add_to_cache("Artificial intelligence (AI) refers to the simulation of human intelligence in machines.", cache)
add_to_cache("The Turing test is used to evaluate a machine's ability to exhibit intelligent behavior.", cache)

# Query for AI history and future
query = "Tell me about the history and future of artificial intelligence, its applications, and current advancements."

# Generate response using cache-aware function
generated_text = generate_with_cache(query, cache, model, tokenizer)

# Print the result
print(generated_text)

Artificial intelligence (AI) refers to the simulation of human intelligence in machines. The Turing test is used to evaluate a machine's ability to exhibit intelligent behavior. Tell me about the history and future of artificial intelligence, its applications, and current advancements.

The Turing Test
. . .
 (I'm not sure if I've ever heard of the Turing-test, but I'm sure it's a good one.)
, a computer program that is programmed to perform a task. It is a program which is designed to do a certain task, such as a particular task or a specific task that requires a given amount of time. A Turing machine is an artificial intelligent system which can perform certain tasks. In the case of a Turing computer


In [None]:
# Import necessary packages
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
from collections import deque

# Load GPT-2 pre-trained model and tokenizer
model_name = "gpt2"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name, padding_side='left')

# Ensure the pad token is set (GPT-2 does not have one by default)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Set the model to evaluation mode
model.eval()

# Initialize the cache with a maximum size
cache_size = 5
cache = deque(maxlen=cache_size)

# Function to add content to the cache
def add_to_cache(text, cache):
    cache.append(text)

# Function to retrieve relevant content from the cache
def retrieve_from_cache(query, cache, k=3):
    return list(cache)[-k:]  # Get the last k items

# Function to generate text with cached context
def generate_with_cache(query, cache, model, tokenizer, max_new_tokens=300):
    # Retrieve relevant cached information
    relevant_texts = retrieve_from_cache(query, cache)

    # Combine the relevant cache text with the current query
    context = " ".join(relevant_texts) + " " + query

    # Tokenize the input properly
    inputs = tokenizer(context, return_tensors="pt", truncation=True, padding=True)

    # Generate response
    with torch.no_grad():
        outputs = model.generate(
            inputs["input_ids"],
            max_new_tokens=max_new_tokens,  # Increased token count
            attention_mask=inputs["attention_mask"],
            pad_token_id=tokenizer.pad_token_id,
            num_return_sequences=1,
            no_repeat_ngram_size=2,  # Prevents repeating phrases
            temperature=0.8,  # Controls randomness (higher = more diverse)
            top_p=0.95,  # Nucleus sampling to filter low-probability tokens
            top_k=100,  # Expands the selection of tokens for diversity
            do_sample=True  # Enables sampling for more creative responses
        )

    # Decode the generated response
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return generated_text

# Adding example text to the cache
add_to_cache("Artificial intelligence (AI) refers to the simulation of human intelligence in machines.", cache)
add_to_cache("The Turing test is used to evaluate a machine's ability to exhibit intelligent behavior.", cache)

# Query for AI history and future
query = "Tell me about the history and future of artificial intelligence, its applications, and current advancements."

# Generate response using cache-aware function
generated_text = generate_with_cache(query, cache, model, tokenizer)

# Print the result
print(generated_text)


Artificial intelligence (AI) refers to the simulation of human intelligence in machines. The Turing test is used to evaluate a machine's ability to exhibit intelligent behavior. Tell me about the history and future of artificial intelligence, its applications, and current advancements.


I believe that AI is the next frontier in cognitive science and the development of computer programming languages. I want to share with you my hope for artificial intelligent computing. While it may be a bit of a long road ahead, I believe we are on the right path to come. AI can not only make machines smarter, it can also make them smarter.

Technology is changing the way we think and think about all of the possible things that could be. Today, we have the technology to create a world where human beings could learn and grow smarter and smarter as they work together to improve the lives of people around the world. In this regard, robots will be the key to humanity's future. But it will also give us a c

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load pre-trained model and tokenizer (You can change 'gpt2' to another model)
MODEL_NAME = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)

# GPT-2 doesn't have a padding token, so set it manually
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.pad_token_id

# Define a simple in-memory cache (Dictionary-based)
cache = {}

def retrieve_from_cache(query, cache):
    """Retrieve relevant cached responses based on the query."""
    return cache.get(query, [])

def store_in_cache(query, response, cache):
    """Store query-response pairs in cache for future retrieval."""
    cache[query] = response

def generate_with_cache(query, cache, model, tokenizer, max_length=1024, max_new_tokens=300):
    """
    Generates text using a Transformer model with caching.

    Parameters:
        query (str): The user input.
        cache (dict): Cache storing previous query-response pairs.
        model: Pre-trained transformer model.
        tokenizer: Pre-trained tokenizer.
        max_length (int): Maximum length of the input sequence.
        max_new_tokens (int): Number of new tokens to generate.

    Returns:
        str: Generated text.
    """
    # Retrieve relevant cached information
    relevant_texts = retrieve_from_cache(query, cache)

    # Combine cache with the current query
    context = " ".join(relevant_texts) + " " + query if relevant_texts else query

    # Tokenize input and truncate if necessary
    inputs = tokenizer.encode(context, return_tensors="pt", truncation=True, padding="max_length", max_length=max_length)

    # Ensure the total input length is within the model's limit
    max_length = min(inputs.shape[1], 1024)
    inputs = inputs[:, -max_length:]

    # Create attention mask
    attention_mask = torch.ones(inputs.shape, dtype=torch.long)

    # Create position IDs
    position_ids = torch.arange(0, inputs.shape[1], dtype=torch.long).unsqueeze(0)

    # Generate output
    with torch.no_grad():
        outputs = model.generate(
            inputs,
            max_new_tokens=max_new_tokens,
            attention_mask=attention_mask,
            position_ids=position_ids,
            pad_token_id=tokenizer.pad_token_id,
            num_return_sequences=1,
            no_repeat_ngram_size=2,
            do_sample=True,  # Enable sampling
            temperature=0.8,  # Control randomness
            top_p=0.95,  # Nucleus sampling
            top_k=50  # Limit token choices
        )

    # Decode the generated response
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Store in cache
    store_in_cache(query, generated_text, cache)

    return generated_text

# Example Query
query = "Tell me about the history and future of artificial intelligence."
generated_text = generate_with_cache(query, cache, model, tokenizer)

# Print the response
print("\nGenerated Response:\n", generated_text)



Generated Response:
 Tell me about the history and future of artificial intelligence.
 by the primary name is the term of the name of a name by using a list of how many of its name, is a term to be the " name. ( to a " the first to the number of being a form the other names, the word that is an name or the most commonly commonly used to which is ( a person to do, to name that the object by being by or by name the by word by that name for the world. that a word or in this, or a one or that in the man to describe the state in which the common to make or do to have the way of that which that as a man, by other name as the person by man is more than man by which he or man the day of man that man or men by any man man are we know who is to man in a male or he is man a general man and his name to me to he that he by men man as man who he in his work by him or him by a certain man of us by his or is by their name he/ man they are he was he have " man his man was the country by one man he man