# a. BASIC RULE-BASED CHATBOT USING PYTHON NLTK

In [None]:

!pip install nltk



In [None]:
import nltk
from nltk.chat.util import Chat, reflections

# Example rules (patterns and responses)
pairs = [
    (r"my name is (.*)", ["Hello %1, How are you today?"]),
    (r"hi|hey|hello", ["Hello!", "Hey there!"]),
    (r"what is your name?", ["I am a bot created by [Your Name]."]),
    (r"how are you?", ["I'm doing good. How about you?"]),
    (r"sorry (.*)", ["No problem", "It's okay", "You don't need to be sorry"]),
    (r"quit", ["Bye! Take care."]),
]

# Create the Chatbot
def chatbot():
    print("Hi, I'm the chatbot you created. Type 'quit' to exit.")
    chat = Chat(pairs, reflections)
    chat.converse()

if __name__ == "__main__":
    chatbot()


Hi, I'm the chatbot you created. Type 'quit' to exit.
>my name is ishaa
Hello ishaa, How are you today?
>quit
Bye! Take care.


# b. BUILDING A CHATBOT USING SEQ2SEQ MODELS

In [None]:
!pip install tensorflow keras



In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# 1. Prepare data
qs = ["hi", "how are you", "what is your name", "bye"]
ans = ["hello", "i am fine", "i am a chatbot", "goodbye"]

# Add start and end tokens to answers
ans_in = ["<start> " + a for a in ans]
ans_out = [a + " <end>" for a in ans]

# 2. Data preprocessing
tok = Tokenizer(filters='')
tok.fit_on_texts(qs + ans_in + ans_out)
vocab = len(tok.word_index) + 1
maxlen = max(len(s.split()) for s in qs + ans_in + ans_out)

# Encode input and output sequences and pad them
enc_in = pad_sequences(tok.texts_to_sequences(qs), maxlen=maxlen)
dec_in = pad_sequences(tok.texts_to_sequences(ans_in), maxlen=maxlen)
dec_out = pad_sequences(tok.texts_to_sequences(ans_out), maxlen=maxlen)

# 3. Build the model
encoder_inputs = Input(shape=(maxlen,))
decoder_inputs = Input(shape=(maxlen,))

embedding = Embedding(vocab, 64)

encoder_embedded = embedding(encoder_inputs)
decoder_embedded = embedding(decoder_inputs)

# Encoder LSTM
encoder_lstm = LSTM(128, return_state=True)
_, state_h, state_c = encoder_lstm(encoder_embedded)
encoder_states = [state_h, state_c]

# Decoder LSTM
decoder_lstm = LSTM(128, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedded, initial_state=encoder_states)

# Output layer
decoder_dense = Dense(vocab, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
model.summary()

# 4. Train the model
model.fit([enc_in, dec_in], np.expand_dims(dec_out, -1), epochs=300, verbose=0)

# 5. Generate responses
def chat(msg):
    seq = pad_sequences(tok.texts_to_sequences([msg]), maxlen=maxlen)
    dec = np.zeros((1, maxlen))
    dec[0, 0] = tok.word_index['<start>']

    for i in range(1, maxlen):
        preds = model.predict([seq, dec], verbose=0)
        w = np.argmax(preds[0, i-1])
        dec[0, i] = w
        if w == tok.word_index.get('<end>'):
            break

    txt = tok.sequences_to_texts(dec)[0]
    return txt.replace('<start>', '').replace('<end>', '').strip()

# Example interaction
print("User: how are you")
print("Bot:", chat("how are you"))


User: how are you
Bot: i i am fine


#  C. CONVERSATIONAL AI WITH TRANSFORMER-BASED MODELS

In [None]:
!pip install transformers torch



In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# 1. Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")

chat_history_ids = None
print("Conversational AI Chatbot (type 'quit' to exit)")

while True:
    user_input = input("You: ")
    if user_input.lower() in ["quit", "exit"]:
        print("Bot: Goodbye!")
        break

    # Encode user input with EOS token
    new_input_ids = tokenizer.encode(user_input + tokenizer.eos_token, return_tensors='pt')

    # Build input by appending chat history if exists
    if chat_history_ids is not None:
        bot_input_ids = torch.cat([chat_history_ids, new_input_ids], dim=-1)
    else:
        bot_input_ids = new_input_ids

    # Generate response
    chat_history_ids = model.generate(
        bot_input_ids,
        max_length=1000,
        pad_token_id=tokenizer.eos_token_id,
        do_sample=True,
        temperature=0.7,
        top_k=50,
        top_p=0.95
    )

    # Decode and print only the new tokens (excluding history)
    bot_output = tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)
    print("Bot:", bot_output)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/863M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/863M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Conversational AI Chatbot (type 'quit' to exit)
You: What's the weather like today?


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Bot: It's cold
You: quit
Bot: Goodbye!
