In [1]:
# !pip install langchain langchain_community langchain-huggingface pyPDF2 python-dateutil dateparser faiss-cpu sentence-transformers ipywidgets 

In [2]:
import re
from datetime import datetime, timedelta
import dateparser
from typing import List, Dict
from langchain.agents import Tool, AgentExecutor, create_react_agent
from langchain.chains import RetrievalQA
from PyPDF2 import PdfReader
from langchain_core.documents import Document
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain.text_splitter import CharacterTextSplitter
from langchain_huggingface import HuggingFaceEndpoint , HuggingFaceEmbeddings
# from langchain_community.embeddings import 

from huggingface_hub import login

In [3]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [4]:
from dotenv import load_dotenv
import os
load_dotenv()

# Get the Hugging Face token from the .env file
huggingface_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
# print(huggingface_token)
# login(token=huggingface_token)

In [5]:
from huggingface_hub import HfApi

# Test the token
api = HfApi(token=huggingface_token)
user_info = api.whoami()
print("Logged in as:", user_info["name"])

Logged in as: jkelver


In [6]:
# Initialize free LLM (Zephyr-7B-beta)
llm = HuggingFaceEndpoint(
    repo_id="HuggingFaceH4/zephyr-7b-beta",
    task="text-generation",
    max_new_tokens=512,
    temperature=0.5,
    huggingfacehub_api_token=huggingface_token, 
)

In [7]:
# llm = HuggingFaceEndpoint(
#     repo_id="mistralai/Mistral-7B-v0.1",  # Open-access model
#     task="text-generation",
#     max_new_tokens=512,
#     temperature=0.5,
#     huggingfacehub_api_token=huggingface_token,
# )


In [8]:
# llm = HuggingFaceEndpoint(
#     repo_id="gpt2",  # Use a model that works with the API
#     task="text-generation",
#     max_new_tokens=256,
#     temperature=0.3,
#     huggingfacehub_api_token=huggingface_token,
# )

In [9]:
# Initialize free local embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [10]:
# Document Processing
def load_and_process_documents(pdf_path: str):
    # Load PDF using PyPDF2
    reader = PdfReader(pdf_path)
    pages = []
    
    # Extract text from each page
    for page_num, page in enumerate(reader.pages):
        text = page.extract_text()
        if text:  # Only add non-empty pages
            pages.append(Document(
                page_content=text,
                metadata={"source": pdf_path, "page": page_num + 1}
            ))
    
    # Split text into chunks
    text_splitter = CharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        separator="\n"
    )
    return text_splitter.split_documents(pages)


In [11]:
file_path = "documents\charniak.pdf"

In [12]:
docsT = load_and_process_documents(file_path)
for doc in docsT:
  print("-"*40)
  print(doc.page_content)

----------------------------------------
See discussions, st ats, and author pr ofiles f or this public ation at : https://www .researchgate.ne t/public ation/333899740
Charniak, E. An Introduction to Deep Learning
Article    in  Perception  · June 2019
DOI: 10.1177/0301006619857273
CITATIONS
10READS
1,390
1 author:
Brian T Sulliv an
Univ ersity of Brist ol
60 PUBLICA TIONS    1,235  CITATIONS    
SEE PROFILE
All c ontent f ollo wing this p age was uplo aded b y Brian T Sulliv an on 18 No vember 2022.
The user has r equest ed enhanc ement of the do wnlo aded file.
----------------------------------------
Book Review
Charniak, E. An Introduction to Deep Learning . Cambridge, MA: MIT Press, 2019; 192 pp.: ISBN:
9780262039512, $35.00 X | £27.00 Hardback.
Reviewed by: Brian Sullivan, School of Psychological Sciences, University of Bristol, UK
Deep learning with artiﬁcial neural networks has become an incredibly interesting and fast-
paced ﬁeld of research that has exploded since the introd

In [13]:
# Initialize Vector Store
try:
    docs = load_and_process_documents(file_path)
    vectorstore = FAISS.from_documents(docs, embeddings)
    retriever = vectorstore.as_retriever()
except Exception as e:
    print(f"Error initializing vector store: {e}")
    exit()


In [14]:
# Validation Functions
def validate_email(email: str) -> bool:
    pattern = r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$"
    return re.match(pattern, email) is not None

def validate_phone(phone: str) -> bool:
    pattern = r"^\+?[1-9]\d{1,14}$"
    return re.match(pattern, phone) is not None


In [38]:
from datetime import datetime
import dateparser

In [72]:
def parse_date(text: str) -> str:
    """Enhanced date parsing with validation and fallback."""
    try:
        # Parse with context-aware settings
        date = dateparser.parse(
            text,
            settings={
                'PREFER_DATES_FROM': 'future',
                'RELATIVE_BASE': datetime.now(),
                'RETURN_AS_TIMEZONE_AWARE': True
            }
        )

        # If dateparser fails to parse, try to manually handle relative weekdays or explicit dates.
        if not date:
            # Manually handle explicit date format like "March 25"
            try:
                # Try parsing explicit dates like "March 25"
                date = datetime.strptime(text, "%B %d")
                
                # If the date has already passed this year, update to the next year
                if date < datetime.now():
                    date = date.replace(year=datetime.now().year + 1)
            
            except ValueError:
                pass  # Continue if not an explicit date

            # If still not parsed, handle relative weekdays like "next Monday"
            if not date:
                # Mapping of weekday names to their respective weekday number
                days_of_week = {
                    'monday': 0,
                    'tuesday': 1,
                    'wednesday': 2,
                    'thursday': 3,
                    'friday': 4,
                    'saturday': 5,
                    'sunday': 6
                }
                
                # Check for the weekday in the text and calculate the next occurrence
                for day_str, weekday in days_of_week.items():
                    if day_str in text.lower():
                        days_ahead = (weekday - datetime.now().weekday()) + 7
                        if days_ahead <= 0:  # If the specified day is today, move to the next one
                            days_ahead += 7
                        date = datetime.now() + timedelta(days=days_ahead)
                        break

        if not date:
            return None

        # Validate future date (minimum 24 hours ahead)
        if date < datetime.now() + timedelta(hours=24):
            return "date_too_soon"
        
        # Return the date in the desired format
        return date.strftime("%Y-%m-%d")
        
    except Exception as e:
        return f"error_{str(e)}"

"error_can't compare offset-naive and offset-aware datetimes"

In [73]:
parse_date("next friday")

'2025-01-31'

In [47]:
def book_appointment_handler(query: str) -> str:
    """Enhanced appointment booking tool"""
    parsed = parse_date(query)
    
    if parsed is None:
        return "DATE_ERROR: Couldn't recognize the date. Please specify like:\n- 'Next Tuesday at 2PM'\n- 'March 25th'\n- 'Tomorrow morning'"
    
    if isinstance(parsed, str) and parsed.startswith("error"):
        return f"DATE_ERROR: Invalid date format - {parsed[6:]}"
    
    if parsed == "date_too_soon":
        return "DATE_ERROR: Please choose a date at least 24 hours in advance"
    
    return f"SUGGESTION: {parsed}\nCONFIRM: Does this work for you? (yes/no)"


"DATE_ERROR: Invalid date format - can't compare offset-naive and offset-aware datetimes"

In [17]:

# Conversation State
class FormState:
    def __init__(self):
        self.active = False
        self.current_form = None
        self.collected_data = {}
        self.required_fields = []

form_state = FormState()


In [18]:
# Tools
tools = [
    Tool(
        name="DocumentQA",
        func=lambda q: RetrievalQA.from_chain_type(llm=llm, retriever=retriever).invoke(q),
        description="Answers questions from PDF documents"
    ),
    Tool(
        name="ScheduleCall",
        func=lambda _: "Please provide your name, phone, and email in this format: [Name] [Phone] [Email]",
        description="Initiates call scheduling process"
    ),
    Tool(
        name="BookAppointment",
        func=book_appointment_handler,
        description="Handles appointment booking. Input examples: 'next Monday', 'March 25th at 2PM', 'tomorrow morning'"
    ),
]

In [19]:
# Define the prompt template with all required variables
prompt_template = """You are an intelligent assistant designed to answer user questions based on the provided document. Follow these rules strictly:

1. **Document-Based Answers**:
   - If the question is about the document, provide only relevant information from the document.
   - Do not make up answers or include external knowledge.

2. **Tool Usage**:
   - You have access to the following tools: {tools}.
   - Use the tools only when necessary to answer the question.
   - Always follow this format:
     - Thought: Think about what to do next.
     - Action: Choose the appropriate tool from [{tool_names}].
     - Action Input: Provide the input for the tool.
     - Observation: Record the result of the tool's action.

3. **Final Answer**:
   - After gathering all necessary information, provide a concise and accurate final answer.
   - Do not repeat the question or include unnecessary details.

4. **Efficiency**:
   - Use the minimum number of steps to answer the question.
   - Avoid unnecessary tool usage or repetitive actions.

5. **Format**:
   - Always follow this structure:
     - Question: The input question.
     - Thought: Your reasoning.
     - Action: The tool to use (if needed).
     - Action Input: The input for the tool (if needed).
     - Observation: The result of the tool (if used).
     - Final Answer: The final response to the question.

Begin!

Question: {input}
{agent_scratchpad}"""

# Create the PromptTemplate
prompt = PromptTemplate.from_template(prompt_template)

# Create the ReAct agent
agent = create_react_agent(llm, tools, prompt)
# Create the AgentExecutor
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, handle_parsing_errors=True)

In [20]:
from ipywidgets import widgets, Layout, Output
from IPython.display import display, clear_output

In [21]:
def chatbot():
    print("Welcome to the Free Document Chatbot!")
    print("You can:")
    print("1. Ask questions about the PDF document")
    print("2. Say 'schedule call' to book a meeting")
    print("3. Say 'book appointment' to schedule an appointment")
    # Create widgets for interaction
    input_box = widgets.Text(placeholder="Type your message here...", layout=Layout(width="80%"))
    send_button = widgets.Button(description="Send")
    output_area = Output()

    # Display widgets
    display(widgets.HBox([input_box, send_button]), output_area)

    def handle_submit(_):
        user_input = input_box.value.strip()
        input_box.value = ""  # Clear the input box

        with output_area:
            if not user_input:
                return

            # Handle form state
            if form_state.active:
                # Validate input based on current field
                current_field = form_state.required_fields[0]
                
                if current_field == "email" and not validate_email(user_input):
                    print("Agent: Invalid email format. Please try again.")
                    return
                
                if current_field == "phone" and not validate_phone(user_input):
                    print("Agent: Invalid phone format. Please use international format (+XX...)")
                    return
                
                form_state.collected_data[current_field] = user_input
                form_state.required_fields.pop(0)
                
                if not form_state.required_fields:
                    print("Agent: Thank you! We'll contact you soon.")
                    print(f"Collected info: {form_state.collected_data}")
                    form_state.active = False
                else:
                    print(f"Agent: Please provide your {form_state.required_fields[0]}:")
                return

            # Handle special commands
            if "schedule call" in user_input.lower():
                form_state.active = True
                form_state.required_fields = ["name", "phone", "email"]
                form_state.collected_data = {}
                print("Agent: Please provide your name:")
                return

            #book appointment 
            if "book appointment" in user_input.lower():
                response = agent_executor.invoke({"input": user_input})
                output = response['output']
                
                if "SUGGESTION:" in output:
                    print(f"Agent: {output}")
                    confirmation = input("You: ").lower()
                    if confirmation == "yes":
                        print("Agent: Appointment booked successfully!")
                        # Store the appointment in form_state.collected_data
                    else:
                        print("Agent: Let's try another date. Please specify:")
                elif "DATE_ERROR:" in output:
                    print(f"Agent: {output[10:]}")
                else:
                    print(f"Agent: {output}")
                    # continue
            # Handle document questions
            response = agent_executor.invoke({"input": user_input})
            print(f"Agent: {response['output']}")

    # Attach the handler to the button
    send_button.on_click(handle_submit)


In [25]:
chatbot()

Welcome to the Free Document Chatbot!
You can:
1. Ask questions about the PDF document
2. Say 'schedule call' to book a meeting
3. Say 'book appointment' to schedule an appointment


HBox(children=(Text(value='', layout=Layout(width='80%'), placeholder='Type your message here...'), Button(des…

Output()

In [23]:
# def main():
#     print("Welcome to the Free Document Chatbot!")
#     print("You can:")
#     print("1. Ask questions about the PDF document")
#     print("2. Say 'schedule call' to book a meeting")
#     print("3. Say 'book appointment' to schedule an appointment")
    
#     while True:
#         try:
#             user_input = input("\nYou: ").strip()
            
#             if not user_input:
#                 continue

#             # Handle form state
#             if form_state.active:
#                 # Validate input based on current field
#                 current_field = form_state.required_fields[0]
                
#                 if current_field == "email" and not validate_email(user_input):
#                     print("Agent: Invalid email format. Please try again.")
#                     continue
                
#                 if current_field == "phone" and not validate_phone(user_input):
#                     print("Agent: Invalid phone format. Please use international format (+XX...)")
#                     continue
                
#                 form_state.collected_data[current_field] = user_input
#                 form_state.required_fields.pop(0)
                
#                 if not form_state.required_fields:
#                     print("Agent: Thank you! We'll contact you soon.")
#                     print(f"Collected info: {form_state.collected_data}")
#                     form_state.active = False
#                 else:
#                     print(f"Agent: Please provide your {form_state.required_fields[0]}:")
#                 continue

#             # Handle special commands
#             if "schedule call" in user_input.lower():
#                 form_state.active = True
#                 form_state.required_fields = ["name", "phone", "email"]
#                 form_state.collected_data = {}
#                 print("Agent: Please provide your name:")
#                 continue

#             #book appointment 
#             if "book appointment" in user_input.lower():
#                 response = agent_executor.invoke({"input": user_input})
#                 output = response['output']
                
#                 if "SUGGESTION:" in output:
#                     print(f"Agent: {output}")
#                     confirmation = input("You: ").lower()
#                     if confirmation == "yes":
#                         print("Agent: Appointment booked successfully!")
#                         # Store the appointment in form_state.collected_data
#                     else:
#                         print("Agent: Let's try another date. Please specify:")
#                 elif "DATE_ERROR:" in output:
#                     print(f"Agent: {output[10:]}")
#                 else:
#                     print(f"Agent: {output}")
#                 continue

#             # Handle document questions
#             response = agent_executor.invoke({"input": user_input})
#             print(f"Agent: {response['output']}")

#         except KeyboardInterrupt:
#             print("\nGoodbye!")
#             break
#         except Exception as e:
#             print(f"Error: {str(e)}")

# if __name__ == "__main__":
#     main()

In [24]:
# chatbot()