In [11]:
pip install -U langchain-community

Collecting langchain-community
  Downloading langchain_community-0.3.22-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-core<1.0.0,>=0.3.55 (from langchain-community)
  Downloading langchain_core-0.3.55-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain<1.0.0,>=0.3.24 (from langchain-community)
  Downloading langchain-0.3.24-py3-none-any.whl.metadata (7.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-

In [32]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from transformers import pipeline
from datetime import datetime
import json

In [33]:
# Expanded Knowledge Base
# ======================
with open("hsuhk_kb.txt", "w", encoding="utf-8") as f:
    f.write("""## University Overview
HSUHK (Hang Seng University of Hong Kong) is a private liberal-arts-oriented university in Hong Kong.
Established: 2010 (Gained university status in 2018)
Location: Shatin, New Territories
Motto: "Erudition • Perseverance • Integrity • Commitment"
Accreditations: Approved by Hong Kong Council for Accreditation of Academic and Vocational Qualifications

## Academic Structure
- School of Business
  Programs: BBA in Financial Analysis, Digital Marketing, Global Business Management
- School of Communication
  Programs: Journalism and Communication, Film and Television Arts
- School of Decision Sciences
  Programs: Data Science, Business Analytics
- School of Humanities & Social Science
  Programs: Chinese History, Applied Psychology

## Admissions
Undergraduate Requirements:
- HKDSE: 3322 in core subjects
- IELTS: 6.0 overall
- Application Deadline: June 30 for September intake
- Tuition Fees: HKD 90,000 - 110,000/year
Scholarships: Academic Excellence Scholarship (Full tuition waiver), Sports Scholarship

## Campus Facilities
- 4 Residential Colleges (1,500 hostel places)
- Library: 250,000+ print/digital resources
- Sports Complex: Olympic-size swimming pool, gymnasium
- Innovation Hub: 3D printing lab, VR studio

## Student Life
Clubs: 50+ student organizations including Debate Team, AI Club
Exchange Programs: Partnered with 100+ universities worldwide
Career Services: 92% graduate employment rate within 6 months

## Contact
Website: https://www.hsu.edu.hk
Admissions Office: +852 3963 5555
Address: 8 Hang Shin Link, Siu Lek Yuen, Shatin""")

In [34]:
# Error Reporting System
# ====================
class ErrorReporter:
    def __init__(self):
        self.report_file = "error_reports.txt"

    def log_error(self, query, answer, user_report=None):
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        log_entry = {
            "timestamp": timestamp,
            "query": query,
            "system_answer": answer,
            "user_report": user_report
        }

        with open(self.report_file, "a", encoding="utf-8") as f:
            f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")

In [35]:
def initialize_chatbot():
    # Load and process documents
    loader = TextLoader("hsuhk_kb.txt")
    documents = loader.load()

    text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=50)
    texts = text_splitter.split_documents(documents)

    # Create embeddings and vector store
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
    vectorstore = Chroma.from_documents(texts, embeddings)

    # Create LLM pipeline
    model_name = "google/flan-t5-base"
    llm_pipeline = pipeline(
        "text2text-generation",
        model=model_name,
        tokenizer=model_name,
        max_length=200
    )
    return RetrievalQA.from_chain_type(
        llm=HuggingFacePipeline(pipeline=llm_pipeline),
        chain_type="stuff",
        retriever=vectorstore.as_retriever(search_kwargs={"k": 4}),
        return_source_documents=True
    )

In [36]:
pip install chromadb



In [37]:
def main():
    qa_chain = initialize_chatbot()
    reporter = ErrorReporter()

    print("HSUHK Chatbot (type ':exit' to quit, ':report <message>' to report errors)")
    while True:
        try:
            user_input = input("\nQuestion: ").strip()

            if user_input.lower() == ':exit':
                break

            if user_input.startswith(':report'):
                _, *report_message = user_input.split(' ', 1)
                reporter.log_error(None, None, ' '.join(report_message))
                print("Error report submitted. Thank you!")
                continue

            result = qa_chain({"query": user_input})
            answer = result['result']

            print(f"\nAnswer: {answer}")

            # Validate answer quality
            if "don't know" in answer.lower() or not result['source_documents']:
                reporter.log_error(user_input, answer)
                print("\n[System] This answer may be incomplete. Our team will review it.")

            print("\nSources:")
            for idx, doc in enumerate(result['source_documents'][:3], 1):
                print(f"{idx}. {doc.page_content[:120]}...")

        except Exception as e:
            print(f"\n[System Error] {str(e)}")
            reporter.log_error(user_input, "SYSTEM ERROR", str(e))

if __name__ == "__main__":
    main()

Device set to use cpu


HSUHK Chatbot (type ':exit' to quit, ':report <message>' to report errors)

Question: What is HSUHK's motto?


  result = qa_chain({"query": user_input})



Answer: "Erudition • Perseverance • Integrity • Commitment"

Sources:
1. ## University Overview
HSUHK (Hang Seng University of Hong Kong) is a private liberal-arts-oriented university in Hong K...
2. HSUHK (Hang Seng University of Hong Kong) is a private liberal-arts-oriented university in Hong Kong.
The university off...
3. ## Contact
Website: https://www.hsu.edu.hk
Admissions Office: +852 3963 5555
Address: 8 Hang Shin Link, Siu Lek Yuen, Sh...

Question: Is HSUHK a public or private university?

Answer: private

Sources:
1. ## University Overview
HSUHK (Hang Seng University of Hong Kong) is a private liberal-arts-oriented university in Hong K...
2. HSUHK (Hang Seng University of Hong Kong) is a private liberal-arts-oriented university in Hong Kong.
The university off...
3. ## Contact
Website: https://www.hsu.edu.hk
Admissions Office: +852 3963 5555
Address: 8 Hang Shin Link, Siu Lek Yuen, Sh...

Question: When did HSUHK gain university status

Answer: 2018

Sources:
1. ## Universit

KeyboardInterrupt: 