In [1]:
import torch
import torch.nn as nn
import re
import tensorflow as tf

In [2]:
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'\d+', '', text)
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r'\W', ' ', text)
    return text

In [3]:
class RNNModel(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, num_layers):
        super(RNNModel, self).__init__()
        self.embed = nn.Embedding(vocab_size, embed_size)
        self.rnn = nn.RNN(embed_size, hidden_size, num_layers)
        self.linear = nn.Linear(hidden_size, vocab_size)
        
    def forward(self, x, h):
        x = self.embed(x)
        out, h = self.rnn(x, h)
        out = self.linear(out)
        return out, h

In [4]:
def train(model, data, epochs, lr):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    for epoch in range(epochs):
        hidden = None
        for x, y in data:
            optimizer.zero_grad()
            outputs, hidden = model(x, hidden)
            loss = criterion(outputs, y)
            loss.backward()
            optimizer.step()

In [5]:
def generate_text(model, seed_text, num_words):
    model.eval()
    text = seed_text
    for _ in range(num_words):
        x = torch.tensor([text[-1]])
        output, _ = model(x, None)
        _, predicted = torch.max(output, 1)
        text.append(predicted.item())
    return text

In [6]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
def encode(prompt):
    return tokenizer.encode(prompt, return_tensors="pt")

def decode(encoded_prompt):
    return tokenizer.decode(encoded_prompt[0], skip_special_tokens=True)

In [10]:
# conversation history
history_encoded = tokenizer.encode("Hello, I'm an AI model. ", return_tensors="pt")

# user input
user_input_encoded = tokenizer.encode("Hello, Tell me about our solar system? why we not consider pluto in our solar system. ", return_tensors="pt")

# append the new user input tokens to the chat history
history_with_user_input_encoded = torch.cat([history_encoded, user_input_encoded], dim=-1)

# generate a response
output = model.generate(history_with_user_input_encoded, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, do_sample=True)

history_with_reply_encoded = output

# Print message
output_message = decode(history_with_reply_encoded)
print(output_message)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Hello, I'm an AI model. Hello, Tell me about our solar system? why we not consider pluto in our solar system. How can we make an Earth to planets with planets. What is our current orbit based on? Where do we need to call the time for your solar cycle?

What is your planet's orbit base? What are the positions you can be in this situation? A planet is defined by its orbital position, the altitude above its body, its


In [11]:
import torch
from transformers import AutoModel, AutoTokenizer

# 1. Load the transformer model's encoder from the library package.
encoder_model_name = "distilbert-base-uncased"  # Example model, you can choose any other model
tokenizer = AutoTokenizer.from_pretrained(encoder_model_name)
encoder = AutoModel.from_pretrained(encoder_model_name)

# 2. Prepare a list of sample user queries related to a specific topic.
sample_queries = [
    "What is the weather forecast for tomorrow?",
    "Can you tell me the latest news headlines?",
    "How does climate change affect wildlife?",
]

# 3. Encode each query using the encoder model.
encoded_queries = []
for query in sample_queries:
    encoded_query = tokenizer(query, return_tensors="pt", padding=True, truncation=True)
    encoded_queries.append(encoded_query)

# 4. Generate prompts for the AI to respond to based on the encoded queries.
generated_prompts = []
for encoded_query in encoded_queries:
    generated_prompt = "The user asked: " + tokenizer.decode(encoded_query.input_ids[0])
    generated_prompts.append(generated_prompt)

# 5. Display the prompts and the corresponding encoded queries for verification.
for i, (query, prompt) in enumerate(zip(sample_queries, generated_prompts)):
    print(f"Sample Query {i+1}: {query}")
    print(f"Generated Prompt {i+1}: {prompt}\n")


tokenizer_config.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 48.0/48.0 [00:00<?, ?B/s]
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
config.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 483/483 [00:00<?, ?B/s]
vocab.txt: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 232k/232k [00:00<00:00, 1.39MB/s]
tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 466k/466k [00:00<00:00, 654kB/s]
model.safetensors: 100%|███████████████████████████████████████████████████████████████████████████████████████

Sample Query 1: What is the weather forecast for tomorrow?
Generated Prompt 1: The user asked: [CLS] what is the weather forecast for tomorrow? [SEP]

Sample Query 2: Can you tell me the latest news headlines?
Generated Prompt 2: The user asked: [CLS] can you tell me the latest news headlines? [SEP]

Sample Query 3: How does climate change affect wildlife?
Generated Prompt 3: The user asked: [CLS] how does climate change affect wildlife? [SEP]

