In [32]:
import time
import torch
from transformers import AutoTokenizer,AutoModelForCausalLM,pipeline
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

Load the pretrained model

In [33]:
start_time=time.time()

In [34]:
model_name="distilgpt2"
device = torch.device("cpu")

In [35]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

In [36]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    dtype=torch.float32,
    low_cpu_mem_usage=True
)

In [37]:
model= model.to(device)

In [38]:
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer= tokenizer,
    max_new_tokens=60,
    temperature=0.7,
    do_sample=True,
    pad_token_id=tokenizer.eos_token_id,
    repetition_penalty=1.1,
    device=-1
)

Device set to use cpu


In [39]:
llm = HuggingFacePipeline(pipeline=pipe)

In [40]:
load_time= time.time() -start_time

In [41]:
myquestion='''
what is the capital city of china?
'''
test_start = time.time()
test_response = llm(myquestion)
test_time = time.time() - test_start

In [42]:
print ( f"Sample: {test_response}...")

Sample: 
what is the capital city of china?
"The capital city in China has a rich history. It was once an important place for Chinese people to live, learn and make their living."...


In [None]:
LangChain Integration

In [43]:
prompt_template =""" You are an AI assistant.

Instructions:
- keep answers clear
- Use simple language
- Focus on core concepts

User Question: {question}

Assistant:"""

PROMPT = PromptTemplate(
    template=prompt_template,
    input_variables =["question"]
)

print (f"Template preview:{prompt_template[:80]}...")
print (f"input variables: {PROMPT.input_variables}")



Template preview: You are an AI assistant.

Instructions:
- keep answers clear
- Use simple langu...
input variables: ['question']


In [44]:
from langchain_core.runnables import RunnableSequence

# Modern chain (no deprecation warning)
modern_chain = PROMPT | llm

In [45]:
chain =LLMChain(
    llm=llm,
    prompt=PROMPT,
    verbose=True
)
print(f"Chain components: LLM +{len(PROMPT.input_variables)} prompt variables")

Chain components: LLM +1 prompt variables


  warn_deprecated(


In [None]:
Build interactive Chatbot

In [46]:
def run_demo():
    questions=[
        "What is AI",
        "What is Python",
        "What is machine learning"
    ]
    for i,q in enumerate(questions,1):
        print(f"\nQ{i}:{q}")
        start = time.time()
        response=chain.run(q)
        end=time.time()
        clean = response.split("Answer:")[-1].strip()
        if len(clean) >60:
            clean = clean[:60] + "..."
        print(f"({end-start:.1f}s) {clean}")

run_demo()


Q1:What is AI


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m You are an AI assistant.

Instructions:
- keep answers clear
- Use simple language
- Focus on core concepts

User Question: What is AI

Assistant:[0m


  warn_deprecated(



[1m> Finished chain.[0m
(0.7s) You are an AI assistant.

Instructions:
- keep answers clear...

Q2:What is Python


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m You are an AI assistant.

Instructions:
- keep answers clear
- Use simple language
- Focus on core concepts

User Question: What is Python

Assistant:[0m

[1m> Finished chain.[0m
(0.7s) You are an AI assistant.

Instructions:
- keep answers clear...

Q3:What is machine learning


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m You are an AI assistant.

Instructions:
- keep answers clear
- Use simple language
- Focus on core concepts

User Question: What is machine learning

Assistant:[0m

[1m> Finished chain.[0m
(0.6s) You are an AI assistant.

Instructions:
- keep answers clear...


An advanced chatbot which i did not show it in the video

In [None]:
Simplified Interactive Chatbot (Demo Version)

class SimpleChatbot:
    """Streamlined chatbot for assignment demo - minimal typing required"""
    
    def __init__(self, chain, model_name):
        self.chain = chain
        self.model_name = model_name
        self.conversation_count = 0
        self.demo_questions = [
            "What is machine learning?",
            "Explain neural networks simply",
            "What is LangChain?",
            "How does Python work?",
            "What is quantum computing?"
        ]
    
    def show_header(self):
        """Clean header for video demo"""
        print("\n" + "="*60)
        print(f" SIMPLE CHATBOT DEMO - {self.model_name.upper()}")
        print(f"MacBook Air | CPU Mode | LangChain Integration")
        print("="*60)
        print(" COMMANDS:")
        print("Type 'demo' - Run 3 sample questions")
        print("Type '1-5' - Quick demo questions")
        print("Type 'stats' - Show performance")
        print("Type 'quit' - Exit")
        print("-" * 60)
    
    def quick_demo(self):
        """Run 3 pre-defined demo questions (perfect for video)"""
        print("\n RUNNING QUICK DEMO (3 Questions)...")
        demo_samples = self.demo_questions[:3]
        
        for i, question in enumerate(demo_samples, 1):
            print(f"\nQ{i}: {question}")
            print(end="", flush=True)
            
            start_time = time.time()
            response = self.chain.run(question)
            response_time = time.time() - start_time
            
            # Simple cleaning
            clean_response = response.split("Assistant:")[-1].strip()
            if len(clean_response) > 80:
                clean_response = clean_response[:80] + "..."
            
            print(f"Response ({response_time:.1f}s): {clean_response}")
            self.conversation_count += 1
        
        print(f"\n✅ Demo complete! {self.conversation_count} conversations")
    
    def numbered_demo(self, question_num):
        """Run specific demo question"""
        if 1 <= question_num <= len(self.demo_questions):
            question = self.demo_questions[question_num - 1]
            print(f"\n Question {question_num}: {question}")
            
            start_time = time.time()
            response = self.chain.run(question)
            response_time = time.time() - start_time
            
            clean_response = response.split("Assistant:")[-1].strip()[:100] + "..."
            print(f"({response_time:.1f}s): {clean_response}")
            self.conversation_count += 1
        else:
            print("Invalid question number (1-5)")
    
    def show_stats(self):
        """Simple stats display"""
        print(f"\n QUICK STATS")
        print(f"   Total conversations: {self.conversation_count}")
        print(f"   Model: {self.model_name}")
        print(f"   Load time: {load_time:.1f}s")
        print(f"   Avg response: ~{test_time:.1f}s")
        print(f"   Memory: ~{model_params * 4:.0f}MB")
    
    def run(self):
        """Simple main loop - minimal typing"""
        self.show_header()
        
        print("\n Ready! Type 'demo' to start...")
        
        while True:
            try:
                user_input = input("\n> ").strip().lower()
                
                if user_input in ['quit', 'exit', 'q']:
                    self.show_final_summary()
                    break
                
                elif user_input == 'demo':
                    self.quick_demo()
                
                elif user_input == 'stats':
                    self.show_stats()
                
                elif user_input in ['1', '2', '3', '4', '5']:
                    self.numbered_demo(int(user_input))
                
                elif user_input:
                    # Single word/short query
                    print(f"\n Custom: {user_input}")
                    start_time = time.time()
                    response = self.chain.run(user_input)
                    response_time = time.time() - start_time
                    
                    clean_response = response.split("Assistant:")[-1].strip()[:80] + "..."
                    print(f" ({response_time:.1f}s): {clean_response}")
                    self.conversation_count += 1
                
                else:
                    print(" Try: 'demo', '1', 'stats', or 'quit'")
            
            except KeyboardInterrupt:
                print("\n\n  Demo interrupted")
                break
            except Exception as e:
                print(f" {str(e)[:40]}")
        
        print(" Demo complete!")
    
    def show_final_summary(self):
        """Clean final summary"""
        print(f"\n" + "="*50)
        print("="*50)
        print(f" Model: {self.model_name} loaded successfully")
        print(f"LangChain: Chain + Prompt integration")
        print(f"Chatbot: {self.conversation_count} interactions")
        print(f" Performance: {load_time:.1f}s load | {test_time:.1f}s response")
        print(f"Memory: {model_params * 4:.0f}MB")
        print("="*50)
    


# LAUNCH SIMPLIFIED CHATBOT

simple_bot = SimpleChatbot(chain, model_name)

# Interactive mode
simple_bot.run()

In [None]:
Testing Guide

In [53]:
test_cases =[
    {"question": "What is the capital of France?" , "expected" : "Paris"},
    {"question": "What is AI?", "expected": "artificial"}
]

accuracy_results =[]
latencies =[]


In [54]:
for case in test_cases:
    start_time = time.time()
    response = chain.run(question=case["question"])
    response_time = time.time()- start_time
    
    accuracy_pass = case["expected"] in response.lower()
    coherence_pass = len(response.split())>5

    accuracy_results.append(accuracy_pass)
    latencies.append(response_time)

    status = "PASS" if accuracy_pass and coherence_pass else "FALL"
    print(f"{case['question'][:25]:<25} {status}")
    print(f"Latency: {response_time:.2f}s | Response: {response[:60]}...")
    print()



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m You are an AI assistant.

Instructions:
- keep answers clear
- Use simple language
- Focus on core concepts

User Question: What is the capital of France?

Assistant:[0m

[1m> Finished chain.[0m
What is the capital of Fr FALL
Latency: 0.68s | Response:  You are an AI assistant.

Instructions:
- keep answers clea...



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m You are an AI assistant.

Instructions:
- keep answers clear
- Use simple language
- Focus on core concepts

User Question: What is AI?

Assistant:[0m

[1m> Finished chain.[0m
What is AI?               FALL
Latency: 0.64s | Response:  You are an AI assistant.

Instructions:
- keep answers clea...

