<a href="https://colab.research.google.com/github/maleal2/Developing-a-Human-like-Conversational-Chatbot/blob/main/Chabot_Interface_Team_10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Developing a Human-like Conversational Chatbot**

Maria Carolina Leal Cardenas

Department of Graduate Studies-Engineering, University of San Diego

**Natural Language Processing and GenAI (AAI-520-04)**


# Instructions on runing the code for Chatbot Interface

1. Run "Setting up Environment".
2. Run "Importing necessary Libraries".
3. Run "Gradio Chatbot Interface".

Once you run all the code cells in order you will receive a prompt and a valid link to access via web-browser to the ChatBot API interface.

# Setting up Environment.

In [None]:
!pip install transformers datasets


In [None]:
!pip install gradio


In [None]:
!pip install datasets


# Importing necessary libraries.

In [None]:
# -------------------------------------------
# Operating System and File I/O Libraries
# -------------------------------------------

import os  # Provides a way of using operating system dependent functionality like reading or writing to the file system.
import zipfile  # Provides a tool to create, read, write, append, and list a ZIP file.
import csv  # Used to read from and write to CSV (Comma-Separated Values) files.
import urllib.request  # Allows for fetching data across the web (e.g., downloading files).
import pickle  # Serializes and deserializes Python object structures.

# -------------------------------------------
# Data Processing and Manipulation Libraries
# -------------------------------------------

import pandas as pd  # Provides data structures and data analysis tools.
import numpy as np  # Supports large, multi-dimensional arrays and matrices, along with a collection of mathematical functions to operate on these arrays.

# -------------------------------------------
# Regular Expressions and Data Counting Libraries
# -------------------------------------------

import re  # Provides regular expression matching operations.
from collections import Counter  # A dict subclass for counting hashable objects.

# -------------------------------------------
# PyTorch Libraries (Deep Learning)
# -------------------------------------------

import torch  # Core PyTorch library for tensor operations.
import torch.nn as nn  # Contains neural network layers, functions, and losses.
import torch.optim as optim  # Implements various optimization algorithms.
from torch.utils.data import DataLoader  # Provides tools to load datasets in batches for training.
import math  # Provides mathematical functions like exp, log, sqrt, etc.

# -------------------------------------------
# Hugging Face Transformers Libraries
# -------------------------------------------

from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments  # For fine-tuning and working with transformer models like GPT-2.
from datasets import Dataset, load_from_disk  # Provides tools for loading and managing datasets for machine learning.

# -------------------------------------------
# Visualization Libraries
# -------------------------------------------

import matplotlib.pyplot as plt  # For creating static, animated, and interactive visualizations in Python.

# -------------------------------------------
# Progress Monitoring Libraries
# -------------------------------------------

from tqdm import tqdm  # Allows for displaying progress bars during loops or long computations.

# -------------------------------------------
# User Interface (Gradio)
# -------------------------------------------

import gradio as gr  # Library for building user interfaces in Python.


# Gradio Chatbot Interface.

In [None]:
# -------------------------------------------
# Load Tokenizer and Model from Checkpoint
# -------------------------------------------
model_output_dir = "GohanF2/Developing-a-Human-like-Conversational-Chatbot"

# Load the fine-tuned model and tokenizer from the checkpoint
tokenizer = GPT2Tokenizer.from_pretrained(model_output_dir)
model = GPT2LMHeadModel.from_pretrained(model_output_dir)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# -------------------------------------------
# Define Conversation History and Generation Parameters
# -------------------------------------------
conversation_history = ""  # Keep track of conversation history
temperature = 0.8  # For more creative responses
top_k = 50  # Top-K sampling for limiting random word choices
max_new_tokens = 50  # Limit the number of newly generated tokens
max_length = 100  # Max total length for response
min_length = 10  # Minimum number of tokens in the response
repetition_penalty = 1.2  # Penalize repetition

# Define max token length for conversation history
max_history_tokens = 300  # Limit conversation history to the last 300 tokens


# -------------------------------------------
# Function to Trim Conversation History
# -------------------------------------------
def trim_conversation_history():
    global conversation_history
    tokenized_history = tokenizer.encode(conversation_history)

    if len(tokenized_history) > max_history_tokens:
        # Keep only the last `max_history_tokens` worth of tokens
        trimmed_history = tokenizer.decode(tokenized_history[-max_history_tokens:], skip_special_tokens=True)
        conversation_history = trimmed_history


# -------------------------------------------
# Function to Generate a Single Response
# -------------------------------------------
def chatbot_response(prompt):
    # Tokenize the prompt and create attention mask
    inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)
    attention_mask = torch.ones(inputs.shape, device=device)  # Create attention mask

    # Generate response with the adjusted parameters
    outputs = model.generate(
        inputs,
        attention_mask=attention_mask,  # Pass the attention mask
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        top_k=top_k,
        min_length=min_length,  # Ensure responses are at least 10 tokens long
        max_length=max_length,  # Avoid responses longer than 100 tokens
        pad_token_id=tokenizer.eos_token_id,  # Set EOS token as the pad token
        no_repeat_ngram_size=2,  # Avoid repetitive n-grams
        repetition_penalty=repetition_penalty,  # Apply repetition penalty
        do_sample=True,  # Enable sampling to use temperature
    )

    # Decode and return the response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response


# -------------------------------------------
# Function to Generate a Response with Conversation History
# -------------------------------------------
def generate_conversational_response(user_input):
    global conversation_history
    # Add user input to conversation history
    conversation_history += f"User: {user_input}\nAI: "

    # Trim conversation history to avoid excessive length
    trim_conversation_history()

    # Generate the chatbot's response
    response = chatbot_response(conversation_history)
    conversation_history += response + "\n"  # Update conversation history

    return response


# -------------------------------------------
# Function to Reset the Conversation
# -------------------------------------------
def reset_conversation():
    global conversation_history
    conversation_history = ""
    return "Conversation reset."


# -------------------------------------------
# Gradio Interface for the Chatbot
# -------------------------------------------
with gr.Blocks() as demo:
    chatbot_input = gr.Textbox(label="Chatbot Interface", placeholder="Talk to the chatbot", lines=2)
    send_button = gr.Button("Send")

    output_box = gr.Textbox(label="Chatbot Response", placeholder="Chatbot will respond here", lines=3)

    reset_button = gr.Button("Reset Conversation")

    # Set up event handling for sending message
    send_button.click(generate_conversational_response, inputs=chatbot_input, outputs=output_box)

    # Set up event handling for resetting conversation
    reset_button.click(fn=reset_conversation, outputs=output_box)

# Launch the interface with sharing enabled
demo.launch(share=True)

