In [2]:
# STEP 1: Install and Import Required Libraries
print("Installing required packages...")
import subprocess
import sys

# Install specific transformers version
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "transformers==4.36.0"])

import torch
import transformers
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import re
import warnings
warnings.filterwarnings('ignore')

print(f"✓ PyTorch version: {torch.__version__}")
print(f"✓ Transformers version: {transformers.__version__}")
print()

Installing required packages...


  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


✓ PyTorch version: 2.9.0+cpu
✓ Transformers version: 4.36.0



In [3]:
# STEP 2: Load GPT-2 Model and Tokenizer
print("Loading GPT-2 model and tokenizer...")

MODEL_NAME = "gpt2"  # Options: "gpt2", "gpt2-medium", "gpt2-large"

try:
    tokenizer = GPT2Tokenizer.from_pretrained(MODEL_NAME)
    model = GPT2LMHeadModel.from_pretrained(MODEL_NAME)

    # Set padding token to avoid warnings
    tokenizer.pad_token = tokenizer.eos_token

    print(f"✓ Model loaded: {MODEL_NAME}")
    print(f"✓ Parameters: {model.num_parameters():,}")
    print()

except Exception as e:
    print(f"Error loading model: {e}")
    sys.exit(1)

Loading GPT-2 model and tokenizer...


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

✓ Model loaded: gpt2
✓ Parameters: 124,439,808



In [4]:
# STEP 3: Python Question Filter
def is_python_question(prompt):
    """
    Determines if a prompt is related to Python programming.

    Args:
        prompt (str): User's input question

    Returns:
        bool: True if Python-related, False otherwise
    """
    prompt_lower = prompt.lower()

    # Comprehensive list of Python-related keywords
    python_keywords = [
        # Core Python
        'python', 'py', 'code', 'coding', 'program', 'script',
        'function', 'def', 'class', 'method', 'variable',
        'import', 'module', 'package', 'library',

        # Data structures
        'list', 'tuple', 'dictionary', 'dict', 'set', 'array',
        'string', 'integer', 'float', 'boolean', 'none',

        # Control flow
        'loop', 'for', 'while', 'if', 'else', 'elif',
        'break', 'continue', 'pass', 'return', 'yield',

        # OOP
        'object', 'inheritance', 'polymorphism', 'encapsulation',
        'attribute', 'constructor', 'self', 'init',

        # Error handling
        'error', 'exception', 'try', 'except', 'finally',
        'raise', 'assert', 'debug', 'traceback',

        # Common operations
        'print', 'input', 'open', 'read', 'write',
        'append', 'sort', 'filter', 'map', 'lambda',

        # Advanced concepts
        'decorator', 'generator', 'iterator', 'comprehension',
        'context manager', 'metaclass', 'async', 'await',

        # Popular libraries
        'numpy', 'pandas', 'matplotlib', 'scipy', 'sklearn',
        'tensorflow', 'pytorch', 'django', 'flask', 'requests',

        # File operations
        'file', 'csv', 'json', 'xml', 'pickle',

        # Other
        'syntax', 'indentation', 'pep', 'pythonic',
        'pip', 'virtual environment', 'venv', 'conda',
        'algorithm', 'data structure', 'recursion'
    ]

    # Check for keywords
    for keyword in python_keywords:
        if keyword in prompt_lower:
            return True

    # Check for common Python patterns using regex
    python_patterns = [
        r'how to .* in python',
        r'python .* code',
        r'write .* function',
        r'create .* class',
        r'implement .*',
        r'\.py\b',
        r'def\s+\w+\(',
        r'import\s+\w+',
        r'from\s+\w+\s+import',
        r'write.*program',
        r'solve.*python',
    ]

    for pattern in python_patterns:
        if re.search(pattern, prompt_lower):
            return True

    return False

In [5]:
# STEP 4: Response Generation
def generate_python_response(prompt, max_length=200, temperature=0.7):
    """
    Generates a response using GPT-2 for Python coding questions.

    Args:
        prompt (str): The input prompt
        max_length (int): Maximum length of generated text
        temperature (float): Sampling temperature (0.0 to 1.0)

    Returns:
        str: Generated response
    """
    # Enhance prompt for better coding responses
    enhanced_prompt = f"Python programming Q&A:\n\nQuestion: {prompt}\n\nAnswer:"

    # Tokenize input
    inputs = tokenizer.encode(enhanced_prompt, return_tensors="pt")
    attention_mask = torch.ones(inputs.shape, dtype=torch.long)

    # Generate response
    with torch.no_grad():
        outputs = model.generate(
            inputs,
            attention_mask=attention_mask,
            max_length=max_length,
            temperature=temperature,
            num_return_sequences=1,
            pad_token_id=tokenizer.eos_token_id,
            do_sample=True,
            top_k=50,
            top_p=0.95,
            no_repeat_ngram_size=2,
            repetition_penalty=1.2
        )

    # Decode and clean response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract only the answer part
    if "Answer:" in response:
        response = response.split("Answer:")[-1].strip()

    return response


In [8]:
# STEP 5: Main Chatbot Function
def python_chatbot(user_question):
    """
    Main chatbot function that filters questions and generates responses.

    Args:
        user_question (str): User's input question

    Returns:
        str: Response from the chatbot
    """
    print(f"\n{'='*70}")
    print(f"Question: {user_question}")
    print('='*70)

    # Check if it's a Python question
    if is_python_question(user_question):
        print("✓ Python coding question detected. Generating response...\n")

        response = generate_python_response(user_question)
        print(f"Answer:\n{response}")

        return response
    else:
        print("✗ Not a Python coding question.\n")

        rejection_message = (
            "I'm sorry, but I'm specialized in answering Python programming questions only. "
            "Please ask me something related to Python coding, such as:\n"
            "  • How to use lists in Python?\n"
            "  • Write a function to sort a dictionary\n"
            "  • How to handle exceptions in Python?\n"
            "  • What is list comprehension?"
        )

        print(f"Answer:\n{rejection_message}")
        return rejection_message



In [9]:
# STEP 6: Interactive Mode
def interactive_mode():
    """
    Interactive chatbot mode for continuous Q&A.
    """
    print("\n" + "="*70)
    print("PYTHON CODING ASSISTANT - INTERACTIVE MODE")
    print("="*70)
    print("I'm a specialized chatbot trained to answer Python coding questions!")
    print("\nCommands:")
    print("  • Type your Python question to get an answer")
    print("  • Type 'examples' to see example questions")
    print("  • Type 'quit', 'exit', or 'q' to exit")
    print("="*70 + "\n")

    while True:
        user_input = input("💬 Your question: ").strip()

        # Exit commands
        if user_input.lower() in ['quit', 'exit', 'q']:
            print("\nThank you for using Python Coding Assistant. Goodbye!")
            break

        # Empty input
        if not user_input:
            print("Please enter a question.\n")
            continue

        # Show examples
        if user_input.lower() == 'examples':
            print("\nExample Python Questions:")
            print("  • How do I create a dictionary in Python?")
            print("  • Write a function to reverse a string")
            print("  • What is the difference between list and tuple?")
            print("  • How to read a CSV file in Python?")
            print("  • Explain list comprehension with examples")
            print("  • How to handle FileNotFoundError?")
            print("  • What are decorators in Python?\n")
            continue

        # Process question
        python_chatbot(user_input)
        print()



In [11]:
# STEP 7: Testing Suite
def run_tests():
    """
    Runs a series of test cases to demonstrate the chatbot.
    """
    print("\n" + "="*70)
    print("RUNNING TEST CASES")
    print("="*70)

    test_cases = [
        # Python questions
        "How do I create a list in Python?",
        "Write a function to calculate factorial",
        "What is the difference between list and tuple?",
        "How to handle IndexError in Python?",
        "Explain lambda functions",

        # Non-Python questions
        "What is the capital of France?",
        "Tell me a joke",
        "What's the weather today?",
        "Who won the World Cup?"
    ]

    for i, question in enumerate(test_cases, 1):
        print(f"\n--- Test Case {i} ---")
        python_chatbot(question)

    print("\n" + "="*70)
    print("✓ ALL TESTS COMPLETED")
    print("="*70)


In [14]:
# STEP 8: Main Execution
# ============================================================================
if __name__ == "__main__":
    print("\n" + "="*70)
    print("PYTHON-FOCUSED GPT-2 CHATBOT INITIALIZED")
    print("="*70)

    # First, run automated tests to show examples
    run_tests()

    # After tests, ask if user wants to try interactive mode
    print("\n" + "="*70)
    user_choice = input("\nWould you like to try the interactive mode? (yes/no): ").strip().lower()

    if user_choice in ['yes', 'y']:
        interactive_mode()
    else:
        print("\nThank you for testing! You can uncomment interactive_mode() to chat anytime.")

print("\n✓ Program completed successfully!")


PYTHON-FOCUSED GPT-2 CHATBOT INITIALIZED

RUNNING TEST CASES

--- Test Case 1 ---

Question: How do I create a list in Python?
✓ Python coding question detected. Generating response...

Answer:
To start with, you simply call the method on each element of your tree. In this way it's easy to know when an item is missing or if any elements have been added too many times (it also means that all items will be removed immediately). When dealing directly within python 2 we can add another class which implements its own methods but does not provide access for those used by other classes like get_all(), sort(), etc... and so forth.... We'll take care here as well! The easiest approach would go something similar however because there are some common problems involved using these libraries at runtime such things as memory allocation - more specifically how long they should keep running until everything gets loaded into RAM again before being called back from PyDataStore. And then finally perhaps