In [29]:
# %%
from docx import Document
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from pymongo import MongoClient
from bson import ObjectId
import numpy as np
from groq import Groq


In [30]:
def create_vector_store(text, embedding_function, qdrant_client, collection_name="documents"):
    """
    Creates a vector store in Qdrant from the given text.
    
    Args:
        text (str): Text to be processed and stored
        embedding_function: Function to create embeddings
        qdrant_client: Initialized Qdrant client
        collection_name (str): Name of the collection to create
    
    Returns:
        QdrantClient: The configured client
    """
    # Split text into chunks
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200
    )
    documents = text_splitter.split_text(text)
    
    # Create embeddings
    embeddings = embedding_function.embed_documents(documents)
    
    # Create vector params
    vector_size = len(embeddings[0])
    
    # Recreate collection
    qdrant_client.recreate_collection(
        collection_name=collection_name,
        vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE)
    )
    
    # Generate IDs
    ids = list(range(len(documents)))
    
    # Upload vectors and documents
    qdrant_client.upload_collection(
        collection_name=collection_name,
        vectors=embeddings,
        payload=[{"text": doc} for doc in documents],
        ids=ids
    )
    
    return qdrant_client

In [31]:
def get_bot_response(user_input, qdrant_client, embedding_function, groq_client, collection_name="documents"):
    """Get the bot response based on user input."""
    # Generate embedding for the user query
    query_embedding = embedding_function.embed_query(user_input)

    # Perform similarity search using Qdrant
    search_result = qdrant_client.search(
        collection_name=collection_name,
        query_vector=query_embedding,
        limit=5  # Adjust the limit to get more results if needed
    )

    # Gather context from similar chunks
    context = " ".join(hit.payload["text"] for hit in search_result)

    # Construct the detailed prompt
    detailed_prompt = f"You are a question-answering chatbot. Answer the following question: {user_input} \nContext: {context}\n if the context is about marks, just print the marks of the student, their name, their roll number and the subject code and name. If the context is about events and club, give the name of club hosting it, where it is hosted, venue, date and time, and a description of the event in not more than 100 words."

    # Make a call to Groq API for chat completions (groq-related part remains untouched)
    chat_completion = groq_client.chat.completions.create(
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": detailed_prompt}
        ],
        model="llama3-8b-8192",
        max_tokens=1000
    )

    return chat_completion.choices[0].message.content

In [32]:

# %%
def getEventText(event_info):
    """Extract title and description from events and return as a single text string."""
    if not event_info.get('Info'):
        print("No 'eventsInfo' field found in the document.")
        return ""

    events = event_info['Info'].get('events', [])
    if not events:
        print("No 'events' found in 'eventsInfo'.")
        return ""

    text = ""
    for i, event in enumerate(events):
        title = event.get('title', '')
        description = event.get('description', '')
        if title or description:
            text += f"Event {i + 1}:\n"
            text += f"Title: {title}\nDescription: {description}\n\n"
        else:
            print(f"Event {i + 1} is missing title and description.")
    
    return text.strip()

# %%
def getMarksText(event_info):
    """Extract title and description from events and return as a single text string."""
    if not event_info.get('Info'):
        print("No 'marksInfo' field found in the document.")
        return ""

    events = event_info['Info'].get('marks', [])
    if not events:
        print("No 'marks' found in 'marksInfo'.")
        return ""

    text = ""
    for i, event in enumerate(events):
        title = event.get('subject', '')
        description = event.get('marks', '')
        if title or description:
            text += f"Subject {i + 1}:\n"
            text += f"Subject Code and Name: {title}\nmarks of all Students: {description}\n\n"
        else:
            print(f"Subject {i + 1} is missing title and description.")
    
    return text.strip()

# %%
def getGeneralText(event_info):
    """Extract title and description from events and return as a single text string."""
    if not event_info.get('Info'):
        print("No 'generalInfo' field found in the document.")
        return ""

    events = event_info['Info'].get('general', [])
    if not events:
        print("No 'general' found in 'generalInfo'.")
        return ""

    text = ""
    for i, event in enumerate(events):
        title = event.get('subject', '')
        description = event.get('description', '')
        if title or description:
            text += f"Info {i + 1}:\n"
            text += f"The starting title:  {title}\ngeneral of all College: {description}\n\n"
        else:
            print(f"Subject {i + 1} is missing title and description.")
    
    return text.strip()


In [33]:

from fuzzywuzzy import fuzz

def classify_query(query):
    query = query.lower()
    
    marks_keywords = ["marks", "exam", "quiz", "midsem", "endsem", "practical", "score", "result"]
    events_keywords = ["event", "club", "workshop", "fest", "competition", "seminar"]
    general_keywords = ["information", "college", "timing", "library", "administration", "contact"]
    
    def fuzzy_match(keywords, query):
        for word in keywords:
            if any(fuzz.partial_ratio(word, query_word) > 80 for query_word in query.split()):
                return True
        return False
    
    if fuzzy_match(marks_keywords, query):
        return "marks"
    elif fuzzy_match(events_keywords, query):
        return "events"
    elif fuzzy_match(general_keywords, query):
        return "general"
    else:
        return "unknown"

# %%


In [35]:
def main():
    # MongoDB connection
    mongo_db_uri = "mongodb+srv://mrinalgaur:mrinalgaur22@compus.s1fob.mongodb.net/?retryWrites=true&w=majority&appName=compus"
    client = MongoClient(mongo_db_uri)
    db = client['test']
    collection = db['aiChatBot']

    # User input and query classification
    user_input = "Give me breif summary about what events are happening in the college."
    classified_query = classify_query(user_input)
    
    # Get appropriate text based on classification
    if classified_query == "events":
        info = collection.find_one({'_id': ObjectId('676d8bf49e48cdfb0b216f3f')})
        text = getEventText(info)
    elif classified_query == "marks":
        info = collection.find_one({'_id': ObjectId('676da65f9e48cdfb0b216f48')})
        text = getMarksText(info)
    elif classified_query == "general":
        info = collection.find_one({'_id': ObjectId('676da9b09e48cdfb0b216f49')})
        text = getGeneralText(info)
    else:
        text = ""  # Handle unknown query types
    
    # Initialize clients
    groq_api = "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"
    groq_client = Groq(api_key=groq_api)

    API_KEY = "OoTdlGcLGvcwqYRhjnA8pvdV2eMnBjgp_ck-9sscciweJUA_cS7zBA"
    QDRANT_URL = "https://1a7ae735-6bd1-4254-ac73-0f55870ec6ae.europe-west3-0.gcp.cloud.qdrant.io"
    qdrant_client = QdrantClient(url=QDRANT_URL, api_key=API_KEY)

    embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

    # Create vector store and get response
    if text:  # Only proceed if we have text to process
        qdrant_client = create_vector_store(text, embedding_function, qdrant_client)
        response = get_bot_response(user_input, qdrant_client, embedding_function, groq_client)
        print(response)
    else:
        print("No relevant information found for the query type:", classified_query)

if __name__ == "__main__":
    main()

  qdrant_client.recreate_collection(


Based on the provided information, here's a brief summary of the events happening in the college on December 27th and 28th:

**Event 3:** Python Workshop

* Date: 27th and 28th December
* Time: 10:00 PM to 12:00 AM
* Mode: Online
* Club: Not specified
* Description: A 2-hour online workshop to teach the basics of Python programming, with hands-on practice sessions and beginner-friendly content.

This is the only event scheduled on these dates, so you can expect to attend the Python workshop online.
