## Import

In [9]:
from sentence_transformers import SentenceTransformer
import numpy as np
import pandas as pd
import ast
from sklearn.metrics.pairwise import cosine_similarity




## Hostel Rules Data

In [2]:
from google.colab import files
uploaded = files.upload()

Saving miit_hostel_qa.csv to miit_hostel_qa.csv


In [12]:
# 1. Load the flat CSV
df = pd.read_csv('miit_hostel_qa.csv')

# 2. Reconstruct the qa_pairs list
qa_pairs = []

grouped = df.groupby(['main_question', 'answer'])

for (main_q, answer), group in grouped:
    # Get all unique variations and keywords for this specific QA pair
    variations = group['question_variation'].unique().tolist()

    # Keywords are stored as strings in CSV, so we convert them back to lists
    raw_keywords = group['keywords'].iloc[0]

    try:
        keywords = ast.literal_eval(raw_keywords)
    except:
        keywords = [k.strip() for k in str(raw_keywords).split(',')]

    qa_pairs.append({
        "main_question": main_q,
        "answer": answer,
        "variations": variations,
        "keywords": keywords
    })

print(f"Reconstructed {len(qa_pairs)} unique QA pairs.")

Reconstructed 20 unique QA pairs.


## Main Functions

In [10]:
class MIITHostelBotQASystem:
    def __init__(self, qa_pairs):
        self.model = SentenceTransformer('all-MiniLM-L6-v2')
        self.qa_pairs = qa_pairs

        # Create embeddings for all question variations
        self.all_questions = []
        self.question_to_qa_index = []

        for qa_idx, qa_pair in enumerate(qa_pairs):
            for question in qa_pair["variations"]:
                self.all_questions.append(question.lower())
                self.question_to_qa_index.append(qa_idx)

        # Encode all questions
        self.question_embeddings = self.model.encode(self.all_questions)

        print(f"Loaded {len(self.all_questions)} question variations for {len(qa_pairs)} Q&A pairs")

    def keyword_boost(self, user_question, similarities):
        """Boost similarity scores for keyword matches"""
        user_question_lower = user_question.lower()
        boosted_similarities = similarities.copy()

        for i, qa_idx in enumerate(self.question_to_qa_index):
            qa_pair = self.qa_pairs[qa_idx]

            # Check for keyword matches
            keyword_matches = sum(1 for keyword in qa_pair["keywords"]
                                if keyword.lower() in user_question_lower)

            if keyword_matches > 0:
                # Boost similarity score based on keyword matches
                boost_factor = 1 + (keyword_matches * 0.1)  # 10% boost per keyword
                boosted_similarities[i] *= boost_factor

        return boosted_similarities

    def get_answer(self, user_question, confidence_threshold=0.4, use_keyword_boost=True):
        """Get answer with enhanced matching"""
        # Encode user question
        user_embedding = self.model.encode([user_question.lower()])

        # Calculate similarities
        similarities = cosine_similarity(user_embedding, self.question_embeddings)[0]

        # Apply keyword boost if enabled
        if use_keyword_boost:
            similarities = self.keyword_boost(user_question, similarities)

        # Get the best match
        best_match_idx = np.argmax(similarities)
        confidence = similarities[best_match_idx]

        # Get the Q&A pair index
        qa_pair_idx = self.question_to_qa_index[best_match_idx]
        matched_question = self.all_questions[best_match_idx]

        if confidence > confidence_threshold:
            return {
                "answer": self.qa_pairs[qa_pair_idx]["answer"],
                "confidence": confidence,
                "matched_question": matched_question,
                "main_question": self.qa_pairs[qa_pair_idx]["main_question"]
            }
        else:
            return {
                "answer": "I'm sorry, I couldn't find a relevant answer. Please contact Student Affairs for assistance. âœ‰ studentaffair@miit.edu.mm",
                "confidence": confidence,
                "matched_question": None,
                "main_question": None
            }

    def get_top_matches(self, user_question, top_k=3):
        """Get top-k matches for debugging"""
        user_embedding = self.model.encode([user_question.lower()])
        similarities = cosine_similarity(user_embedding, self.question_embeddings)[0]

        # Apply keyword boost
        similarities = self.keyword_boost(user_question, similarities)

        # Get top-k matches
        top_indices = np.argsort(similarities)[-top_k:][::-1]

        results = []
        for idx in top_indices:
            qa_idx = self.question_to_qa_index[idx]
            results.append({
                "question": self.all_questions[idx],
                "similarity": similarities[idx],
                "answer": self.qa_pairs[qa_idx]["answer"],
                "main_question": self.qa_pairs[qa_idx]["main_question"]
            })

        return results

## Testing

In [14]:
# Initialize the system
qa_system = MIITHostelBotQASystem(qa_pairs)

# Test the problematic question
test_questions = [
    "what if I fail the exam?",
    "What happens if I get bad grades?",
    "Can I be kicked out for poor performance?",
    "How to apply for hostel?",
    "How is the meals?"
]

print("Testing MIIT Hostel Bot Q&A System:")
print("=" * 50)

for question in test_questions:
    result = qa_system.get_answer(question)
    print(f"\nQuestion: {question}")
    print(f"Confidence: {result['confidence']:.3f}")
    print(f"Matched Question: {result['matched_question']}")
    print(f"Answer: {result['answer'][:100]}..." if result['answer'] else "No answer found")
    print("-" * 30)

Loaded 136 question variations for 20 Q&A pairs
Testing MIIT Hostel Bot Q&A System:

Question: what if I fail the exam?
Confidence: 1.200
Matched Question: what if i fail the exam?
Answer: Yes, if a student receives two F grades in their CGPA grade sheet, they will be disqualified from st...
------------------------------

Question: What happens if I get bad grades?
Confidence: 0.867
Matched Question: what happens if i get f grades?
Answer: Yes, if a student receives two F grades in their CGPA grade sheet, they will be disqualified from st...
------------------------------

Question: Can I be kicked out for poor performance?
Confidence: 0.470
Matched Question: what can get me kicked out of the hostel?
Answer: Yes, if a student receives two F grades in their CGPA grade sheet, they will be disqualified from st...
------------------------------

Question: How to apply for hostel?
Confidence: 1.120
Matched Question: what is the process to apply for a hostel?
Answer: You can apply for a hos

## Debugging

In [None]:
# Show top matches for debugging
print("\n\nDebugging - Top 3 matches for 'What about the meals':")
top_matches = qa_system.get_top_matches("Any rules")
for i, match in enumerate(top_matches):
    print(f"{i+1}. Question: {match['question']}")
    print(f"   Similarity: {match['similarity']:.3f}")
    print(f"   Main Q: {match['main_question']}")
    print()