In [6]:
# import streamlit as st
import os
from langchain_community.document_loaders import WebBaseLoader
from langchain_groq import ChatGroq
from langchain.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.retrieval import create_retrieval_chain
from langchain_community.vectorstores import FAISS
import time

from dotenv import load_dotenv
load_dotenv()

##load the Groq API key
groq_api_key=os.environ["GROQ2_API_KEY"]

# if "vector" not in st.session_state:
embeddings=OllamaEmbeddings(model="mxbai-embed-large")
loader=WebBaseLoader(["https://www.conestogac.on.ca/career-centre/faq","https://www.conestogac.on.ca/admissions/registrar-office/grading-transcripts","https://orientation.conestogac.on.ca/questions/faq","https://www.conestogac.on.ca/student-rights/faq","https://www.conestogac.on.ca/employment/applicant-faq","https://www.conestogac.on.ca/convocation/frequently-asked-questions","https://www.conestogac.on.ca/onecard/faq"])
docs=loader.load()

text_splitter=RecursiveCharacterTextSplitter(chunk_size=700,chunk_overlap=200)
final_documents=text_splitter.split_documents(docs)
vectors=FAISS.from_documents(final_documents,embeddings)


In [142]:
final_documents

[Document(page_content='Frequently asked questions - Community Career Centre\r\n\n\n\n\n\n\n\n\n\n\nSkip to main content\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\r\n                                                Search Button\r\n                                            \n\n\n\n\n\n\n\n\n\n\n\nInternational\n\n\nDirectory\n\n\n\n\nLogin\n\n\nEmail\n\nStudent Portal\n\neConestoga\n\nEmployee Portal\n\n\n\n\n\n\n\nInformation for:\n\n\nFuture students\n\nCurrent students\n\nInternational students\n\nNew to Canada\n\nConestoga employees\n\nGiving to Conestoga\n\nAlumni\n\nEmployers\n\n\n\n\n\n\n\n\n\nApply\n\n\nVisit\n\n\nGive\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nPrograms & courses\nAdmissions\nCampus life & services\nAbout Conestoga', metadata={'source': 'https://www.conestogac.on.ca/career-centre/faq', 'title': '    Frequently asked questions - Community Career Centre\r\n', 'description': '    How to access Community Career Centre services and other FAQs.\r\n', '

In [17]:
final_documents_old=final_documents

In [105]:
#Comparisons of x vs 1000 chunk_size:

print(len(final_documents_old),len(final_documents))
print(len(str(final_documents_old[0])),len(str(final_documents[0])))

11 221
4667 1005


In [56]:
len(str(final_documents[:]))

197782

-> Website -> Load-> chunks -> embedings-> Vectors

---

In [8]:

# st.title("ChatGroq Demo")
llm = ChatGroq(groq_api_key=groq_api_key,model_name="mixtral-8x7b-32768")

prompt = ChatPromptTemplate.from_template(
    """
    Answer the questions based on the provided context only.
    Look for keywords and exact faq
    Please provide a good response based on the question and context
    <context>
    {context}
    </context>
    Questions:{input}-

    """
)

document_chain = create_stuff_documents_chain(llm,prompt)
retriever = vectors.as_retriever(search_kwargs={"k": 5})
retriever_chain = create_retrieval_chain(retriever,document_chain)


In [26]:
prompt =input("Input your prompt here")

if prompt:
# start= time.process_time()
    response = retriever_chain.invoke({"input":prompt})
    # print("Response Time: ",time.process_time()-start)
    print(response['answer'])

    # #With a streamlit expander
    # with st.expander("Document Similarity Search"):
    #     #Find the relevant chunks
    # print("Below are to to be ignored. only for displaying similar context")
    print("-"*50,end="\n\n\n")
    for i, doc in enumerate(response["context"]):
        print(doc.page_content)
        # print(doc.score)
        print("---------------------------------------------------------")

If you are receiving an error on the ONE Card Portal, you can try the following methods to troubleshoot:

1. Clear your cache
2. Clear your browser history
3. Use incognito mode
4. Use a desktop/laptop
5. Try using Firefox or Internet Explorer

If you continue to receive an error message, please take a screenshot of the error and email it to onecard@conestogac.on.ca along with a description of the challenges you are experiencing.
--------------------------------------------------


What do I do if I receive an error on the ONE Card Portal?
Please try the following methods to troubleshoot:
Clear your cacheClear your browser historyUse incognito modeUse a desktop/laptopTry Firefox or Internet Explorer
If you continue to receive an error message, please take a screen shot of the error you are receiving and email us at 
      onecard@conestogac.on.ca with your screen shot and information about the challenges you are experiencing. 
General questions
-----------------------------------------

In [28]:
#set GROQ_API_KEY in the secrets

import os
from groq import Groq

# Create the Groq client
client = Groq(api_key=os.environ.get("GROQ2_API_KEY"), )


# Set the system prompt
system_prompt = {
    "role": "system",
    "content":
    "You are a helpful assistant. You reply with very short answers."
}

# Initialize the chat history
chat_history = [system_prompt]

while True:
  # Get user input from the console
  user_input = input("You: ")

  # Append the user input to the chat history
  chat_history.append({"role": "user", "content": user_input})

  response = client.chat.completions.create(model="llama3-70b-8192",
                                            messages=chat_history,
                                            max_tokens=100,
                                            temperature=1.2)
  # Append the response to the chat history
  chat_history.append({
      "role": "assistant",
      "content": response.choices[0].message.content
  })
  # Print the response
  print("Assistant:", response.choices[0].message.content)


Assistant: "Believe you can and you're halfway there." - Theodore Roosevelt
Assistant: "Believe you can, focus on the goal, and you'll be there sooner!"
Assistant: :)
Assistant: :)
Assistant: :)


# Questions:
1] What does Student Achievement Report include?

Ans) According to the context, the Student Achievement Report includes:

"an Academic Standing Decision and all courses with or without a formal assessment component."

---

2] When will Convocation be?

Ans) The context does not mention Convocation, so I cannot provide an answer to that question. The provided context only discusses Student Achievement Reports, digital transcripts, and the disclosure of personal information

---

3] When does Official graduation Ceremony take place?

Ans)Based on the provided context, there is no mention of an "Official graduation Ceremony" or when it takes place. The context only discusses student records, academic standing decisions, and transcript fees, but does not mention graduation ceremonies. Therefore, I cannot provide an accurate answer to this question. 

Ans:when size is 500 & 100 overlap) According to the context, Official graduation ceremonies take place in:

1. February
2. June
3. November

Ans: 700 & 200) According to the context, the Convocation ceremony, also known as the Official graduation ceremonies, will take place in:

* February
* June
* November



---

4] When does Official graduation ceremonies take place?

Ans) Based on the provided context, it does not mention when Official graduation ceremonies take place. The context only provides information on requesting transcripts, logging in to the MyCreds learner portal, and submitting academic appeals. It does not mention graduation ceremonies. Therefore, the answer to this question would be "Not mentioned" or "Unknown".

---

5] When does Application to graduate form needs to be submitted?

Ans) According to the provided context, the Application to Graduate form must be completed and submitted to the registrar's office if a student is not registered in a full-time program, and is completing the final course(s) of the program on a part-time basis.

---

6] Give me Information about Convocation?

Ans) Based on the provided context, here are the details of Convocation:

* **When:** Official graduation ceremonies take place in February, June, and November.
* **What:** The ceremonies feature the presentation of diplomas, certificates, degrees, and special college-wide awards.
* **Eligibility:** To be eligible to participate, students must have successfully completed all program requirements by a specific date (which permits the registrar’s office to process your records) and the program must qualify for participation in the ceremonies.
* **Registration:** Students must register their attendance through the Student Portal to attend the ceremony.
* **Information:** Information on convocation is mailed to all potential graduates.

Ans: when 1000 & 100) Here's the information about Convocation:

* Official graduation ceremonies take place in February, June, and November.
* The ceremonies feature the presentation of diplomas, certificates, degrees, and special college-wide awards.

Ans: 500 & 50)Based on the provided context, here are the answers to your questions about Convocation:

1. What is the eligibility criteria to participate in Convocation?

To be eligible to participate, the student must have successfully completed all program requirements by a specific date and the program must qualify for participation in the ceremonies.

2. Can a student participate in Convocation if program requirements are not completed until after the specified date?

No, if program requirements are not completed until after the specified date, the student would be eligible to participate in the next scheduled Convocation.


# Blockers:

- Contexts are not getting properly. 
- Reasons: 
1. Unstructured data from Website Containing \n\x0r\n\nContinuation words.
2. Sweet spot for Chunk size is to be discovered.


---

---

In [2]:
# import streamlit as st
import os
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_groq import ChatGroq
from langchain.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.retrieval import create_retrieval_chain
from langchain_community.vectorstores import FAISS
import time

from dotenv import load_dotenv
load_dotenv()

##load the Groq API key
groq_api_key=os.environ["GROQ2_API_KEY"]

# if "vector" not in st.session_state:
embeddings=OllamaEmbeddings(model="llama3")
# file_path=("./clean_FAQ.csv")

loader = CSVLoader(file_path='./clean_FAQ.csv',encoding="utf8",
    csv_args={
    'delimiter': ',',
    'quotechar': '"',
    'fieldnames': ['question', 'answer']
})

# loader=WebBaseLoader(["https://www.conestogac.on.ca/career-centre/faq","https://www.conestogac.on.ca/admissions/registrar-office/grading-transcripts","https://orientation.conestogac.on.ca/questions/faq","https://www.conestogac.on.ca/student-rights/faq","https://www.conestogac.on.ca/employment/applicant-faq","https://www.conestogac.on.ca/convocation/frequently-asked-questions","https://www.conestogac.on.ca/onecard/faq"])
# loader = CSVLoader(file_path=file_path)
docs=loader.load()



In [3]:

text_splitter=RecursiveCharacterTextSplitter(chunk_size=700,chunk_overlap=200)
final_documents=text_splitter.split_documents(docs)
vectors=FAISS.from_documents(final_documents,embeddings)

In [21]:
query="How many guests can I bring?"
result=vectors.similarity_search(query,k=10)
result

[Document(page_content='question: Is condor cash the same thing as print balance?\nanswer: Balance will be reset if it’s not used. Once your non-refundable print balance reaches zero, print charges will begin deducting from your condor cash funds. Learn more aboutprinting and photocopying on campus.', metadata={'source': './clean_FAQ.csv', 'row': 12}),
 Document(page_content='question: Freedom of information\nanswer: Personal, academic and non-academic information (for example, identification photographs).', metadata={'source': './clean_FAQ.csv', 'row': 121}),
 Document(page_content='question: What if i have a visa/debit card or master card/debit card?\nanswer: Use the interac option on the cash loading portal and if you have difficulty, use the credit card option. The one card office is available to answer questions via phone at 519 748-5220, ext. 3530.', metadata={'source': './clean_FAQ.csv', 'row': 10}),
 Document(page_content='question: Can i report concerns anonymously?\nanswer: C

In [4]:
final_documents

[Document(page_content='question: question\nanswer: answer', metadata={'source': './clean_FAQ.csv', 'row': 0}),
 Document(page_content='question: How do i upload my one card student photo?\nanswer: After your deposit is paid, you can upload your one card photo. Complete the steps outlined onget your card.', metadata={'source': './clean_FAQ.csv', 'row': 1}),
 Document(page_content="question: Why do i need to upload a one card student photo to access my timetable?\nanswer: A student's one card photo completes their student record and confirms that they are a conestoga college student.your one card student photo is also required to print and mail your one card to you. Your one card provides you access to a variety of student services. Learn more about how touse your card.", metadata={'source': './clean_FAQ.csv', 'row': 2}),
 Document(page_content='question: How do i confirm if i have an approved one card student photo?\nanswer: Log into theone card portalusing your conestoga credentials.s

In [47]:

# st.title("ChatGroq Demo")
llm = ChatGroq(groq_api_key=groq_api_key,model_name="mixtral-8x7b-32768")

prompt = ChatPromptTemplate.from_template(
    """
    Please provide a good response based on the question and context. Sometimes question is similar or same to the context's question. Answer according to the context only.
    <context>
    {context}
    </context>
    Questions:{input}

    """
)

document_chain = create_stuff_documents_chain(llm,prompt)
retriever = vectors.as_retriever(search_kwargs={"k": 15})
retriever_chain = create_retrieval_chain(retriever,document_chain)

prompt =input("Input your prompt here")

if prompt:
# start= time.process_time()
    response = retriever_chain.invoke({"input":prompt})
    # print("Response Time: ",time.process_time()-start)
    print(response['answer'])

    # #With a streamlit expander
    # with st.expander("Document Similarity Search"):
    #     #Find the relevant chunks
    # print("Below are to to be ignored. only for displaying similar context")
    print("-"*50,end="\n\n\n")
    for i, doc in enumerate(response["context"]):
        print(doc.page_content)
        print("---------------------------------------------------------")

One cards do not have expiry dates. Our cards are intended to follow students to convocation and beyond. We consider you part of the Conestoga community even when an alumni.
--------------------------------------------------


question: Is condor cash the same thing as print balance?
answer: Balance will be reset if it’s not used. Once your non-refundable print balance reaches zero, print charges will begin deducting from your condor cash funds. Learn more aboutprinting and photocopying on campus.
---------------------------------------------------------
question: What do i do if my card is lost or stolen?
answer: Stolen and lost cards should be suspended immediately. For information about how to suspend your card and request a replacement, refer toreplace your card.
---------------------------------------------------------
question: How do i see the transactions i have made?
answer: Login to theone card portalto view your transactions.
-------------------------------------------------

In [32]:
import pickle
with open('embeddings_Websites.pickle', 'wb') as f:
  pickle.dump(vectors, f)

In [53]:
with open('embeddings.pickle', 'rb') as f:
    loaded_embeddings = pickle.load(f)


In [54]:
# st.title("ChatGroq Demo")
llm = ChatGroq(groq_api_key=groq_api_key,model_name="mixtral-8x7b-32768")

prompt = ChatPromptTemplate.from_template(
    """
    Please provide a good response based on the question and context. Sometimes question is similar or same to the context's question. Answer according to the context only.
    <context>
    {context}
    </context>
    Questions:{input}

    """
)

document_chain = create_stuff_documents_chain(llm,prompt)
retriever = loaded_embeddings.as_retriever(search_kwargs={"k": 15})
retriever_chain = create_retrieval_chain(retriever,document_chain)

prompt =input("Input your prompt here")

if prompt:
# start= time.process_time()
    response = retriever_chain.invoke({"input":prompt})
    # print("Response Time: ",time.process_time()-start)
    print(response['answer'])

    # #With a streamlit expander
    # with st.expander("Document Similarity Search"):
    #     #Find the relevant chunks
    # print("Below are to to be ignored. only for displaying similar context")
    print("-"*50,end="\n\n\n")
    for i, doc in enumerate(response["context"]):
        print(doc.page_content)
        print("---------------------------------------------------------")

To get your One Card, your one card is available to you in both digital and physical form. You can refer to the detailed instructions on 'get your card' to review how you will get your One Card.
--------------------------------------------------


question: Is condor cash the same thing as print balance?
answer: Balance will be reset if it’s not used. Once your non-refundable print balance reaches zero, print charges will begin deducting from your condor cash funds. Learn more aboutprinting and photocopying on campus.
---------------------------------------------------------
question: How do i see the transactions i have made?
answer: Login to theone card portalto view your transactions.
---------------------------------------------------------
question: What if i have a visa/debit card or master card/debit card?
answer: Use the interac option on the cash loading portal and if you have difficulty, use the credit card option. The one card office is available to answer questions via phon

In [49]:
!pip install pickle5


Collecting pickle5
  Downloading pickle5-0.0.11.tar.gz (132 kB)
     ---------------------------------------- 0.0/132.1 kB ? eta -:--:--
     --- ------------------------------------ 10.2/132.1 kB ? eta -:--:--
     -------- ---------------------------- 30.7/132.1 kB 325.1 kB/s eta 0:00:01
     ------------------------------ ----- 112.6/132.1 kB 930.9 kB/s eta 0:00:01
     ------------------------------------ 132.1/132.1 kB 975.1 kB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: pickle5
  Building wheel for pickle5 (setup.py): started
  Building wheel for pickle5 (setup.py): finished with status 'error'
  Running setup.py clean for pickle5
Failed to build pickle5


  error: subprocess-exited-with-error
  
  × python setup.py bdist_wheel did not run successfully.
  │ exit code: 1
  ╰─> [58 lines of output]
      running bdist_wheel
      running build
      running build_py
      creating build
      creating build\lib.win-amd64-cpython-312
      creating build\lib.win-amd64-cpython-312\pickle5
      copying pickle5\pickle.py -> build\lib.win-amd64-cpython-312\pickle5
      copying pickle5\pickletools.py -> build\lib.win-amd64-cpython-312\pickle5
      copying pickle5\__init__.py -> build\lib.win-amd64-cpython-312\pickle5
      creating build\lib.win-amd64-cpython-312\pickle5\test
      copying pickle5\test\pickletester.py -> build\lib.win-amd64-cpython-312\pickle5\test
      copying pickle5\test\test_pickle.py -> build\lib.win-amd64-cpython-312\pickle5\test
      copying pickle5\test\test_picklebuffer.py -> build\lib.win-amd64-cpython-312\pickle5\test
      copying pickle5\test\__init__.py -> build\lib.win-amd64-cpython-312\pickle5\test
      run

In [32]:
len(str(response))

3022

---

In [6]:
# Define the metadata extraction function.
def metadata_func(record: dict, metadata: dict) -> dict:

    metadata["category"] = record.get("category")
    metadata["question"] = record.get("question")

    if "source" in metadata:
        source = metadata["source"].split("/")
        source = source[source.index("langchain"):]
        metadata["source"] = "/".join(source)

    return metadata


loader = JSONLoader(
    file_path='.faqs2.json',
    jq_schema='',
    content_key="answer",
    metadata_func=metadata_func
)

data = loader.load()

ImportError: jq package not found, please install it with `pip install jq`

In [7]:
!pip install jq

Collecting jq
  Using cached jq-1.7.0.tar.gz (2.0 MB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: jq
  Building wheel for jq (pyproject.toml): started
  Building wheel for jq (pyproject.toml): finished with status 'error'
Failed to build jq


  error: subprocess-exited-with-error
  
  × Building wheel for jq (pyproject.toml) did not run successfully.
  │ exit code: 1
  ╰─> [6 lines of output]
      running bdist_wheel
      running build
      running build_ext
      Executing: ./configure CFLAGS=-fPIC -pthread --disable-maintainer-mode --with-oniguruma=builtin
      error: [WinError 2] The system cannot find the file specified
      [end of output]
  
  note: This error originates from a subprocess, and is likely not a problem with pip.
  ERROR: Failed building wheel for jq
ERROR: Could not build wheels for jq, which is required to install pyproject.toml-based projects

[notice] A new release of pip is available: 24.0 -> 24.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [13]:
!pip install pyproject.toml




[notice] A new release of pip is available: 24.0 -> 24.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import jq

ModuleNotFoundError: No module named 'jq'

In [1]:
jq

NameError: name 'jq' is not defined

In [5]:
from langchain_community.document_loaders import JSONLoader