#### Imports & Setup

In [2]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Load model & tokenizer
model = GPT2LMHeadModel.from_pretrained("models/fine_tuned_gpt2/")
tokenizer = GPT2Tokenizer.from_pretrained("models/gpt2_tokenizer/")
tokenizer.pad_token = tokenizer.eos_token

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
print("Model & Tokenizer loaded ✅")

Model & Tokenizer loaded ✅


#### Step 2: Chat Function

In [7]:
def generate_reply(user_input, max_length=60):
    prompt = f"<|startoftext|>User: {user_input}\nBot:"
    
    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_length=len(inputs["input_ids"][0]) + max_length,
            pad_token_id=tokenizer.eos_token_id,
            do_sample=True,
            top_k=50,
            top_p=0.95,
            temperature=0.8,
            eos_token_id=tokenizer.eos_token_id
        )

    # Decode and clean up
    full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Extract reply after "Bot:"
    if "Bot:" in full_text:
        reply = full_text.split("Bot:")[-1].strip()
    else:
        reply = full_text.strip()

    return reply


#### Step 3: Chat Loop (Try it!)

In [8]:
while True:
    user_input = input("You: ")
    if user_input.lower() in ['exit', 'quit']:
        break
    bot_reply = generate_reply(user_input)
    print("Bot:", bot_reply)


You:  hi


Bot: 


You:  kese ho


Bot: 


You:  hi 


Bot: 


You:   hi


Bot: 


You:  hi


Bot: 


You:  exit


In [11]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained("models/gpt2_tokenizer/")
tokenizer.pad_token = tokenizer.eos_token

model = GPT2LMHeadModel.from_pretrained("models/fine_tuned_gpt2/")
model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Inference function
def generate_response(prompt, max_length=100):
    inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model.generate(
            inputs,
            max_length=max_length,
            pad_token_id=tokenizer.eos_token_id,
            do_sample=True,
            top_k=50,
            top_p=0.95,
            temperature=0.7,
            num_return_sequences=1
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Remove the prompt from the response to keep only bot reply
    bot_reply = response[len(prompt):].strip()
    return bot_reply if bot_reply else "[No response generated]"

# Chat loop
while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit"]:
        break

    response = generate_response(user_input)
    print(f"Bot: {response}")


You:  kese ho


Bot: [No response generated]


You:  kya haal hai


Bot: !


You:  hello


Bot: [No response generated]


You:  good morning


Bot: [No response generated]


You:  kese ho


Bot: .


You:  exit
