<a href="https://colab.research.google.com/github/sergekamanzi/Chat-Bot-/blob/main/chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
!pip install gradio
!pip install faiss-cpu pyspellchecker
!pip install huggingface_hub[hf_xet]

Collecting pyspellchecker
  Downloading pyspellchecker-0.8.3-py3-none-any.whl.metadata (9.5 kB)
Downloading pyspellchecker-0.8.3-py3-none-any.whl (7.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m71.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyspellchecker
Successfully installed pyspellchecker-0.8.3


In [10]:
import numpy as np
import pandas as pd
import faiss
from sentence_transformers import SentenceTransformer
import gradio as gr
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf

# Define the question-answer data
data = pd.read_csv("/content/rwanda_history.csv")

# Load the sentence transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Extract questions and answers
questions = list(data.keys())
answers = list(data.values())

# Encode the questions
question_embeddings = model.encode(questions, convert_to_tensor=False)
question_embeddings = np.array(question_embeddings).astype('float32')

# Create a FAISS index
dimension = question_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(question_embeddings)

# Lists for detecting greetings, gratitude, farewells, emotions, and follow-ups
GREETINGS = ["hello", "hi", "hey", "greetings", "good morning", "good afternoon", "good evening"]
GRATITUDE = ["thank you", "thanks", "appreciate it", "thank you so much", "thanks a lot"]
FAREWELLS = ["bye", "goodbye", "see you", "farewell", "take care"]
FOLLOW_UPS = ["tell me more", "more", "go on", "continue"]

# Emotion keyword lists
HAPPINESS = ["happy", "glad", "excited", "joyful", "pleased"]
SADNESS = ["sad", "heartbreaking", "tragic", "depressing", "devastating"]
ANGER = ["angry", "frustrated", "outraged", "mad", "furious"]
FEAR = ["scared", "terrified", "afraid", "horrified", "fearful"]
SURPRISE = ["shocked", "amazed", "surprised", "astonished", "stunned"]

def detect_emotion(query):
    query_lower = query.lower().strip()
    if any(word in query_lower for word in HAPPINESS):
        return "happiness", "I’m glad to hear that brings you happiness."
    elif any(word in query_lower for word in SADNESS):
        return "sadness", "I understand, that can be truly heartbreaking."
    elif any(word in query_lower for word in ANGER):
        return "anger", "I can see why that might make you feel angry."
    elif any(word in query_lower for word in FEAR):
        return "fear", "I can understand why that might feel terrifying."
    elif any(word in query_lower for word in SURPRISE):
        return "surprise", "That’s quite surprising, isn’t it?"
    return None, ""

def is_greeting(query):
    query_lower = query.lower().strip()
    return any(greeting in query_lower for greeting in GREETINGS)

def is_gratitude(query):
    query_lower = query.lower().strip()
    return any(gratitude in query_lower for gratitude in GRATITUDE)

def is_farewell(query):
    query_lower = query.lower().strip()
    return any(farewell in query_lower for farewell in FAREWELLS)

def is_follow_up(query):
    query_lower = query.lower().strip()
    return any(follow_up in query_lower for follow_up in FOLLOW_UPS)

# Prepare data for LSTM training
all_text = questions + answers + [f"User: {q} Bot: {a}" for q, a in zip(questions, answers)]
tokenizer = Tokenizer()
tokenizer.fit_on_texts(all_text)
vocab_size = len(tokenizer.word_index) + 1

# Create sequences for training (simplified example)
sequences = tokenizer.texts_to_sequences(all_text)
max_length = max(len(seq) for seq in sequences)

# Pad sequences for X
X = pad_sequences(sequences[:-1], maxlen=max_length, padding='post')

# Prepare y by taking the next word index for each sequence in X
# This involves taking the token *after* the sequence in X.
# We need to make sure the target sequences are not empty after slicing.
y_sequences = [seq[max_length] if len(seq) > max_length else 0 for seq in sequences[1:]] # Get the next word index, handle shorter sequences

# Use to_categorical on the next word indices
y = tf.keras.utils.to_categorical(y_sequences, num_classes=vocab_size)


# Build and train LSTM model
lstm_model = Sequential([
    Embedding(vocab_size, 64, input_length=max_length),
    LSTM(128, return_sequences=True),
    LSTM(64),
    Dense(vocab_size, activation='softmax')
])
lstm_model.compile(loss='categorical_crossentropy', optimizer='adam')
# Ensure the shape of X matches the input_length
# We need X to have shape (num_samples, max_length) and y to have shape (num_samples, vocab_size)
# Check shapes before training
# print(f"Shape of X: {X.shape}")
# print(f"Shape of y: {y.shape}")
lstm_model.fit(X, y, epochs=50, batch_size=1, verbose=0)  # Reduced verbosity for Colab

def generate_lstm_response(history_text):
    # Tokenize the history text
    sequence = tokenizer.texts_to_sequences([history_text])
    # Check if the sequence is empty or has length 0
    if not sequence or not sequence[0]:
        return "I can provide more details. What topic would you like to explore?"

    # Pad the sequence
    padded = pad_sequences(sequence, maxlen=max_length, padding='post')
    # Predict the next token
    prediction = lstm_model.predict(padded, verbose=0)
    predicted_index = np.argmax(prediction)
    # Convert index back to word
    word = tokenizer.index_word.get(predicted_index, "details") # Use .get with a default value
    return word


def chatbot(query, history):
    # Initialize history if None
    if history is None:
        history = []

    # Detect emotion
    emotion, emotion_response = detect_emotion(query)

    # Check for greetings
    if is_greeting(query):
        response = "Hello! I'm here to assist you with questions about Rwandan history. What would you like to learn about today?"
        history.append(("User: " + query, "Bot: " + response))
        return response, history

    # Check for gratitude
    if is_gratitude(query):
        response = "You're welcome! I'm glad I could help. Would you like to know more about Rwandan history?"
        history.append(("User: " + query, "Bot: " + response))
        return response, history

    # Check for farewells
    if is_farewell(query):
        response = "Goodbye! Feel free to come back if you have more questions about Rwandan history."
        history.append(("User: " + query, "Bot: " + response))
        return response, history

    # Check for follow-up questions
    if is_follow_up(query) and history:
        # Construct history text from the last turn
        # This might be too simple for complex follow-ups, could consider last N turns
        last_turn = history[-1]
        history_text = last_turn[0] + " " + last_turn[1] # Combine user query and bot response from the last turn
        lstm_response_word = generate_lstm_response(history_text)
        # Generate a more coherent response using the predicted word
        response = f"Based on our conversation, perhaps you're interested in '{lstm_response_word}'. Would you like to know more about Rwandan history?"
        history.append(("User: " + query, "Bot: " + response))
        return response, history

    # Encode the input query
    query_embedding = model.encode([query], convert_to_tensor=False).astype('float32')

    # Search for the closest question
    k = 1  # Number of nearest neighbors
    distances, indices = index.search(query_embedding, k)

    # Get the distance to the closest match
    distance = distances[0][0]

    # Set a threshold for out-of-domain detection
    DISTANCE_THRESHOLD = 1.5 # This threshold might need tuning

    if distance > DISTANCE_THRESHOLD:
        response = "I'm sorry, I don't have information on that topic. I can help with questions about Rwandan history. What would you like to know?"
        history.append(("User: " + query, "Bot: " + response))
        return response, history

    # Get the most similar question and its answer
    matched_question = questions[indices[0][0]]
    matched_answer = answers[indices[0][0]]

    # Add empathetic response if emotion is detected
    if emotion:
        response = f"{emotion_response} Here’s what I found: {matched_answer} Would you like to know more about Rwandan history?"
    else:
        response = f"Here’s what I found: {matched_answer} Would you like to know more about Rwandan history?"

    history.append(("User: " + query, "Bot: " + response))
    return response, history

# Create a Gradio interface with state to maintain history
inputs = [
    gr.Textbox(lines=2, placeholder="Ask a question about Rwandan history..."),
    gr.State(value=[])
]
outputs = [
    gr.Textbox(label="Response"),
    gr.State()
]

iface = gr.Interface(
    fn=chatbot,
    inputs=inputs,
    outputs=outputs,
    title="Rwandan History Chatbot",
    description="Ask questions about Rwandan history, and the chatbot will respond based on the provided knowledge base."
)

# Test the chatbot with sample queries
test_queries = [
    "Hello",  # Greeting
    "The 1994 genocide is so heartbreaking",  # Sadness
    "Tell me more",  # Follow-up
    "I’m so happy to learn about Rwanda’s progress",  # Happiness
    "What happened in 1973 in Rwanda?",  # In-domain
    "That’s shocking!",  # Surprise
    "Thanks a lot",  # Gratitude
    "Goodbye",  # Farewell
]

print("Testing the chatbot with sample queries:")
history = []
for query in test_queries:
    response, history = chatbot(query, history)
    print(f"Query: {query}")
    print(f"Response: {response}\n")

# Launch the Gradio interface
iface.launch()

In [10]:


import numpy as np
import pandas as pd
import faiss
from sentence_transformers import SentenceTransformer
import gradio as gr
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf
from spellchecker import SpellChecker

# Initialize spell checker
spell = SpellChecker()

# Define the question-answer data
try:
    data = pd.read_csv("/content/rwanda_history.csv")
except FileNotFoundError:
    print("Error: rwanda_history.csv not found. Please upload the file.")
    from google.colab import files
    uploaded = files.upload()
    data = pd.read_csv("/content/rwanda_history.csv")

# Extract questions and answers
questions = data["Question"].tolist()
answers = data["Answer"].tolist()

# Load the sentence transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Encode the questions
question_embeddings = model.encode(questions, convert_to_tensor=False)
question_embeddings = np.array(question_embeddings).astype('float32')

# Create a FAISS index
dimension = question_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(question_embeddings)

# Lists for detecting greetings, gratitude, farewells, emotions, and follow-ups
GREETINGS = ["hello", "hi", "hey", "greetings", "good morning", "good afternoon", "good evening"]
GRATITUDE = ["thank you", "thanks", "appreciate it", "thank you so much", "thanks a lot"]
FAREWELLS = ["bye", "goodbye", "see you", "farewell", "take care"]
FOLLOW_UPS = ["tell me more", "more", "go on", "continue"]

# Emotion keyword lists
HAPPINESS = ["happy", "glad", "excited", "joyful", "pleased"]
SADNESS = ["sad", "heartbreaking", "tragic", "depressing", "devastating"]
ANGER = ["angry", "frustrated", "outraged", "mad", "furious"]
FEAR = ["scared", "terrified", "afraid", "horrified", "fearful"]
SURPRISE = ["shocked", "amazed", "surprised", "astonished", "stunned"]

def correct_spelling(query):
    """Correct misspelled words in the query using pyspellchecker."""
    words = query.split()
    corrected_words = [spell.correction(word) if spell.correction(word) is not None else word for word in words]
    corrected_query = " ".join(corrected_words)
    return corrected_query

def detect_emotion(query):
    query_lower = query.lower().strip()
    if any(word in query_lower for word in HAPPINESS):
        return "happiness", "I’m glad to hear that brings you happiness."
    elif any(word in query_lower for word in SADNESS):
        return "sadness", "I understand, that can be truly heartbreaking."
    elif any(word in query_lower for word in ANGER):
        return "anger", "I can see why that might make you feel angry."
    elif any(word in query_lower for word in FEAR):
        return "fear", "I can understand why that might feel terrifying."
    elif any(word in query_lower for word in SURPRISE):
        return "surprise", "That’s quite surprising, isn’t it?"
    return None, ""

def is_greeting(query):
    query_lower = query.lower().strip()
    return any(greeting in query_lower for greeting in GREETINGS)

def is_gratitude(query):
    query_lower = query.lower().strip()
    return any(gratitude in query_lower for gratitude in GRATITUDE)

def is_farewell(query):
    query_lower = query.lower().strip()
    return any(farewell in query_lower for farewell in FAREWELLS)

def is_follow_up(query):
    query_lower = query.lower().strip()
    return any(follow_up in query_lower for follow_up in FOLLOW_UPS)

# Prepare data for LSTM training
all_text = questions + answers + [f"User: {q} Bot: {a}" for q, a in zip(questions, answers)]
tokenizer = Tokenizer()
tokenizer.fit_on_texts(all_text)
vocab_size = len(tokenizer.word_index) + 1

# Create sequences for training
sequences = tokenizer.texts_to_sequences(all_text)
max_length = max(len(seq) for seq in sequences)

# Pad sequences for X
X = pad_sequences(sequences[:-1], maxlen=max_length, padding='post')

# Prepare y by taking the next word index for each sequence in X
y_sequences = [seq[max_length] if len(seq) > max_length else 0 for seq in sequences[1:]]
y = tf.keras.utils.to_categorical(y_sequences, num_classes=vocab_size)

# Build and train LSTM model
lstm_model = Sequential([
    Embedding(vocab_size, 64, input_length=max_length),
    LSTM(128, return_sequences=True),
    LSTM(64),
    Dense(vocab_size, activation='softmax')
])
lstm_model.compile(loss='categorical_crossentropy', optimizer='adam')
lstm_model.fit(X, y, epochs=50, batch_size=1, verbose=0)

def generate_lstm_response(history_text):
    sequence = tokenizer.texts_to_sequences([history_text])
    if not sequence or not sequence[0]:
        return "I can provide more details. What topic would you like to explore?"
    padded = pad_sequences(sequence, maxlen=max_length, padding='post')
    prediction = lstm_model.predict(padded, verbose=0)
    predicted_index = np.argmax(prediction)
    word = tokenizer.index_word.get(predicted_index, "details")
    return word

def chatbot(query, history):
    if history is None:
        history = []

    # Correct spelling in the query
    corrected_query = correct_spelling(query)

    # Detect emotion on the corrected query
    emotion, emotion_response = detect_emotion(corrected_query)

    # Check for greetings
    if is_greeting(corrected_query):
        response = "Hello! I'm here to assist you with questions about Rwandan history. What would you like to learn about today?"
        history.append(("User: " + query, "Bot: " + response))
        return response, history

    # Check for gratitude
    if is_gratitude(corrected_query):
        response = "You're welcome! I'm glad I could help. Would you like to know more about Rwandan history?"
        history.append(("User: " + query, "Bot: " + response))
        return response, history

    # Check for farewells
    if is_farewell(corrected_query):
        response = "Goodbye! Feel free to come back if you have more questions about Rwandan history."
        history.append(("User: " + query, "Bot: " + response))
        return response, history

    # Check for follow-up questions
    if is_follow_up(corrected_query) and history:
        last_turn = history[-1]
        history_text = last_turn[0] + " " + last_turn[1]
        lstm_response_word = generate_lstm_response(history_text)
        response = f"Based on our conversation, perhaps you're interested in '{lstm_response_word}'. Would you like to know more about Rwandan history?"
        history.append(("User: " + query, "Bot: " + response))
        return response, history

    # Encode the corrected query
    query_embedding = model.encode([corrected_query], convert_to_tensor=False).astype('float32')

    # Search for the closest question
    k = 1
    distances, indices = index.search(query_embedding, k)
    distance = distances[0][0]
    DISTANCE_THRESHOLD = 1.5

    if distance > DISTANCE_THRESHOLD:
        response = "I'm sorry, I don't have information on that topic. I can help with questions about Rwandan history. What would you like to know?"
        history.append(("User: " + query, "Bot: " + response))
        return response, history

    matched_question = questions[indices[0][0]]
    matched_answer = answers[indices[0][0]]

    if emotion:
        response = f"{emotion_response} Here’s what I found: {matched_answer} Would you like to know more about Rwandan history?"
    else:
        response = f"Here’s what I found: {matched_answer} Would you like to know more about Rwandan history?"

    history.append(("User: " + query, "Bot: " + response))
    return response, history

# Test the chatbot with sample queries, including some with misspellings
test_queries = [
    "Hello",  # Greeting
    "The 1994 genoside is so hertbreaking",  # Misspelled: genoside -> genocide, hertbreaking -> heartbreaking
    "Tell me mor",  # Misspelled: mor -> more
    "I’m so hapy to learn about Rwanda’s progres",  # Misspelled: hapy -> happy, progres -> progress
    "What hapened in 1973 in Rwanda?",  # Misspelled: hapened -> happened
    "That’s shoking!",  # Misspelled: shoking -> shocking
    "Thanks alot",  # Misspelled: alot -> a lot
    "Goodby",  # Misspelled: Goodby -> Goodbye
]

print("Testing the chatbot with sample queries (including misspellings):")
history = []
for query in test_queries:
    response, history = chatbot(query, history)
    print(f"Query: {query}")
    print(f"Response: {response}\n")

# Uncomment to launch Gradio interface
inputs = [
    gr.Textbox(lines=2, placeholder="Ask a question about Rwandan history..."),
    gr.State(value=[])
]
outputs = [
    gr.Textbox(label="Response"),
    gr.State()
]
iface = gr.Interface(
    fn=chatbot,
    inputs=inputs,
    outputs=outputs,
    title="Rwandan History Chatbot",
    description="Ask questions about Rwandan history, and the chatbot will respond based on the provided knowledge base, even if you misspell words."
)
iface.launch()