# AI CONSULTANT 2.0

In [1]:
# So i wann connect to the knowledge base
import sqlite3
conn = sqlite3.connect("faculty_evaluation.db")
cursor = conn.cursor()

# then i just wanna confirm that its the right db that ive connected to
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
print(cursor.fetchall())

[('sqlite_sequence',), ('Deans',), ('Faculty',), ('Courses',), ('Sections',), ('TeachingAssignments',), ('Evaluations',), ('FeedbackCorpus',)]


In [2]:
# then let me verify the schema of the FeedbackCorpus table
cursor.execute("PRAGMA table_info(FeedbackCorpus)")
print(cursor.fetchall())

[(0, 'feedback_id', 'INTEGER', 0, None, 1), (1, 'faculty_id', 'INTEGER', 0, None, 0), (2, 'instructor', 'TEXT', 0, None, 0), (3, 'course_code', 'TEXT', 0, None, 0), (4, 'section_code', 'TEXT', 0, None, 0), (5, 'semester', 'TEXT', 0, None, 0), (6, 'comment_type', 'TEXT', 0, None, 0), (7, 'comment_text', 'TEXT', 0, None, 0)]


Ayt im connected to the right db. Lets now move forward

In [3]:
# Just gotta perform a quick test
import sqlite3
conn = sqlite3.connect("faculty_evaluation.db")
cursor = conn.cursor()

# Preview Faculty table
cursor.execute("SELECT * FROM Faculty LIMIT 5")
print(cursor.fetchall())

[(600716, 'Mohamed, Mwanashehe S.', 'MS', 'Master of Business Admin', 'Y', 2, 75), (620040, 'Owuor, John David_Ouma', 'MR', 'Master of Business Admin', 'Y', 3, 132), (620489, 'Gitahi, Jesse Elikanah_Machirah', 'MR', 'Master of Business Admin', 'Y', 4, 123), (657923, 'Odoyo, Fredrick', 'DR', 'Doctor of Business Admin', 'N', 1, 38), (611121, 'Gatumo, Francis Mambo', 'MR', 'Master of Business Admin', 'N', 4, 94)]


ok thats good lets move forward

In [4]:
import sqlite3

# Connect to the correct DB
conn = sqlite3.connect("faculty_evaluation.db")
cursor = conn.cursor()

# Quick sanity check
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
print("Tables:", cursor.fetchall())

Tables: [('sqlite_sequence',), ('Deans',), ('Faculty',), ('Courses',), ('Sections',), ('TeachingAssignments',), ('Evaluations',), ('FeedbackCorpus',)]


thats good we got all tables present

In [5]:
cursor.execute("PRAGMA table_info(Evaluations)")
print(cursor.fetchall())

[(0, 'faculty_id', 'INTEGER', 0, None, 0), (1, 'full_name', 'TEXT', 0, None, 0), (2, 'course_code', 'TEXT', 0, None, 0), (3, 'section_code', 'TEXT', 0, None, 0), (4, 'total_students', 'INTEGER', 0, None, 0), (5, 'total_respondents', 'INTEGER', 0, None, 0), (6, 'response_rate', 'REAL', 0, None, 0), (7, 'mean_score', 'REAL', 0, None, 0), (8, 'percent_score', 'REAL', 0, None, 0), (9, 'letter_grade', 'TEXT', 0, None, 0), (10, 'section_id', 'TEXT', 0, None, 0)]


In [6]:
cursor.execute("PRAGMA table_info(Faculty)")
print(cursor.fetchall())

[(0, 'faculty_id', 'INTEGER', 0, None, 0), (1, 'full_name', 'TEXT', 0, None, 0), (2, 'title', 'TEXT', 0, None, 0), (3, 'highest_degree', 'TEXT', 0, None, 0), (4, 'adjunct_flag', 'TEXT', 0, None, 0), (5, 'total_classes_taught', 'INTEGER', 0, None, 0), (6, 'total_students_taught', 'INTEGER', 0, None, 0)]


In [7]:
import re

def extract_name(user_input):
    # crude heuristic: take last word if it's capitalized or looks like a surname
    tokens = user_input.replace("?", "").split()
    # look for capitalized tokens
    candidates = [t.strip(",.") for t in tokens if t[0].isupper()]
    if candidates:
        return candidates[-1]  # last capitalized token
    return user_input  # fallback

def get_instructor_evaluation(user_input):
    name = extract_name(user_input)

    query = """
    SELECT f.full_name, e.mean_score, e.letter_grade
    FROM Faculty f
    JOIN TeachingAssignments ta ON f.faculty_id = ta.faculty_id
    JOIN Sections s ON ta.section_id = s.section_id
    JOIN Evaluations e ON s.section_id = e.section_id
    WHERE f.full_name LIKE ?
    """
    cursor.execute(query, (f"%{name}%",))
    rows = cursor.fetchall()

    if rows:
        return "\n".join(
            f"Based on evaluations, {full_name} received a grade of {grade} with a mean score of {score:.2f}."
            for full_name, score, grade in rows
        )
    return f"No evaluation data found for {name}."

In [8]:
def get_top_performers(n=5):
    query = """
    SELECT f.full_name, AVG(e.mean_score) as avg_score, e.letter_grade
    FROM Faculty f
    JOIN TeachingAssignments ta ON f.faculty_id = ta.faculty_id
    JOIN Sections s ON ta.section_id = s.section_id
    JOIN Evaluations e ON s.section_id = e.section_id
    GROUP BY f.full_name
    ORDER BY avg_score DESC
    LIMIT ?
    """
    cursor.execute(query, (n,))
    rows = cursor.fetchall()

    if rows:
        response = [f"Top {n} performers:"]
        for full_name, score, grade in rows:
            response.append(f"- {full_name}: {score:.2f} ({grade})")
        return "\n".join(response)
    return f"No data available for top {n} performers."

In [9]:
def get_instructors_above_score(threshold):
    query = """
    SELECT f.full_name, AVG(e.mean_score) as avg_score, e.letter_grade
    FROM Faculty f
    JOIN TeachingAssignments ta ON f.faculty_id = ta.faculty_id
    JOIN Sections s ON ta.section_id = s.section_id
    JOIN Evaluations e ON s.section_id = e.section_id
    GROUP BY f.full_name
    HAVING avg_score > ?
    ORDER BY avg_score DESC
    """
    cursor.execute(query, (threshold,))
    rows = cursor.fetchall()

    if rows:
        response = [f"Instructors with scores above {threshold}:"]
        for full_name, score, grade in rows:
            response.append(f"- {full_name}: {score:.2f} ({grade})")
        return "\n".join(response)
    return f"No instructors scored above {threshold}."

In [10]:
def get_instructors_by_grade(grade):
    query = """
    SELECT f.full_name, AVG(e.mean_score) as avg_score
    FROM Faculty f
    JOIN TeachingAssignments ta ON f.faculty_id = ta.faculty_id
    JOIN Sections s ON ta.section_id = s.section_id
    JOIN Evaluations e ON s.section_id = e.section_id
    WHERE e.letter_grade = ?
    GROUP BY f.full_name
    ORDER BY avg_score DESC
    """
    cursor.execute(query, (grade,))
    rows = cursor.fetchall()

    if rows:
        response = [f"Instructors with grade {grade}:"]
        for full_name, score in rows:
            response.append(f"- {full_name}: {score:.2f}")
        return "\n".join(response)
    return f"No instructors found with grade {grade}."

In [11]:
# then an intent parser
import re

def parse_intent(user_input):
    text = user_input.lower()

    if "top" in text and re.search(r"\d+", text):
        count = int(re.search(r"\d+", text).group())
        return {"intent": "top_ranked", "count": count}

    if "score" in text and "above" in text:
        threshold = float(re.search(r"\d+(\.\d+)?", text).group())
        return {"intent": "score_filter", "threshold": threshold}

    if "grade" in text or "got an" in text:
        grade = re.search(r"[abc]", text).group().upper()
        return {"intent": "grade_filter", "grade": grade}

    if "how did" in text or "tell me about" in text or "evaluation for" in text:
        return {"intent": "lookup", "name": user_input}

    return {"intent": "unknown"}

In [12]:
# a response engine is next
def get_consultant_response(user_query):
    intent = parse_intent(user_query)

    if intent["intent"] == "top_ranked":
        return get_top_performers(intent["count"])

    elif intent["intent"] == "score_filter":
        return get_instructors_above_score(intent["threshold"])

    elif intent["intent"] == "grade_filter":
        return get_instructors_by_grade(intent["grade"])

    elif intent["intent"] == "lookup":
        return get_instructor_evaluation(intent["name"])

    return "I couldn’t interpret that question. Try asking about an instructor, scores, grades, or top performers."

In [13]:
# then we finally test each one
print(get_consultant_response("How did Afundi perform this semester?"))
print(get_consultant_response("Who scored above 4.5?"))
print(get_consultant_response("Top 5 performers"))
print(get_consultant_response("Which instructors got an A?"))

Based on evaluations, Afundi, Patrick Omuhinda received a grade of B with a mean score of 4.04.
Based on evaluations, Afundi, Patrick Omuhinda received a grade of B with a mean score of 4.15.
Based on evaluations, Afundi, Patrick Omuhinda received a grade of A- with a mean score of 4.35.
Based on evaluations, Afundi, Patrick Omuhinda received a grade of B with a mean score of 4.12.
Based on evaluations, Afundi, Patrick Omuhinda received a grade of B+ with a mean score of 4.18.
Instructors with scores above 4.5:
- Nguru, Faith Wariara: 4.70 (A)
- Adan, Ali: 4.60 (A)
- Mwilu, Asha Ahmed: 4.59 (A)
- Kimani, Nathan Muya: 4.55 (A)
- Mbugua, Paul Mungai: 4.54 (A)
- Achoki, George: 4.54 (A-)
- Koshal, Jeremiah Ntaloi_Ole: 4.54 (A)
- Kabongah, George Odhiambo: 4.54 (A)
- Brown, Dana Basnight: 4.53 (A)
- Were, Jamen H.: 4.53 (A)
- Ali, Fatuma Ahmed: 4.53 (A-)
- Kimotho, Stephen Gichuhi: 4.53 (A)
- Namuye, Maria: 4.52 (A)
- Ndungu, Francis Gitonga: 4.51 (A)
- Nakamura, Katsuji: 4.51 (A-)
- F   I

bet so thats good. Let me add in fuzzy matching so that it can recongize ppl buy their names 

In [14]:
# Well be using rapid fuzz
from rapidfuzz import fuzz

def lookup_instructor_evaluation(user_input, threshold=70):
    # Normalize input
    q = user_input.lower()

    # Get all faculty names
    cursor.execute("SELECT faculty_id, full_name FROM Faculty")
    faculty_rows = cursor.fetchall()

    best = None
    best_score = -1

    for fid, name in faculty_rows:
        sim = max(fuzz.partial_ratio(q, name.lower()), fuzz.token_set_ratio(q, name.lower()))
        if sim > best_score:
            best_score = sim
            best = (fid, name)

    if best and best_score >= threshold:
        fid, full_name = best

        # Now join to evaluations
        query = """
        SELECT AVG(e.mean_score), e.letter_grade
        FROM TeachingAssignments ta
        JOIN Sections s ON ta.section_id = s.section_id
        JOIN Evaluations e ON s.section_id = e.section_id
        WHERE ta.faculty_id = ?
        """
        cursor.execute(query, (fid,))
        row = cursor.fetchone()

        if row and row[0]:
            score, grade = row
            return f"Based on evaluations, {full_name} received a grade of {grade} with a mean score of {score:.2f}."

    return f"No evaluation data found for {user_input}."

In [15]:
def get_feedback_summary(fid, limit=3):
    try:
        query = """
        SELECT comment_text
        FROM FeedbackCorpus
        WHERE faculty_id = ?
        LIMIT ?
        """
        cursor.execute(query, (fid, limit))
        rows = cursor.fetchall()
        if rows:
            return "Sample feedback:\n" + "\n".join(f"- {r[0]}" for r in rows)
        return "No qualitative feedback available."
    except sqlite3.OperationalError as e:
        return f"Database error: {e}"

In [16]:
from rapidfuzz import fuzz

DEBUG = True

def log(*args):
    if DEBUG:
        print("[DEBUG]", *args)

def lookup_instructor_evaluation(user_input, threshold=70):
    q = user_input.lower()
    cursor.execute("SELECT faculty_id, full_name FROM Faculty")
    faculty_rows = cursor.fetchall()

    best = None
    best_score = -1

    for fid, name in faculty_rows:
        sim = max(fuzz.partial_ratio(q, name.lower()), fuzz.token_set_ratio(q, name.lower()))
        log("Comparing:", q, "vs", name, "-> similarity:", sim)
        if sim > best_score:
            best_score = sim
            best = (fid, name)

    if best and best_score >= threshold:
        fid, full_name = best
        log("Best match:", full_name, "with score:", best_score)

        # Numeric evaluation
        query = """
        SELECT AVG(e.mean_score), e.letter_grade
        FROM TeachingAssignments ta
        JOIN Sections s ON ta.section_id = s.section_id
        JOIN Evaluations e ON s.section_id = e.section_id
        WHERE ta.faculty_id = ?
        """
        cursor.execute(query, (fid,))
        row = cursor.fetchone()

        if row and row[0]:
            score, grade = row
            response = f"Based on evaluations, {full_name} received a grade of {grade} with a mean score of {score:.2f}."

            # Add feedback
            feedback = get_feedback_summary(fid)
            return response + "\n" + feedback

    return f"No evaluation data found for {user_input}."

# Stress testing

In [17]:
# Stress test queries
test_queries = [
    "Tell me about Afundi",
    "What’s the evaluation for Mohamed, Mwanashehe S.?",
    "How did Gitahi perform?",
    "Results for Odoyo",
    "Evaluation of Gatumo, Francis Mambo",
    "Who scored above 4.8?",
    "List instructors with scores below 3.5",
    "Which instructors scored exactly 4.0?",
    "Which instructors got an A?",
    "Show me all with grade B",
    "Who received a C?",
    "Top 10 performers",
    "Top 3 instructors this semester",
    "Top 20 performers",
    "What feedback did students give Afundi?",
    "Summarize feedback for Mohamed",
    "Show sample feedback for Odoyo",
    "How did Alo perform?",   # typo test
    "Tell me about Basweti",  # partial match
    "Evaluation for a nonexistent instructor"
]

for q in test_queries:
    print("\nQuery:", q)
    print(get_consultant_response(q))


Query: Tell me about Afundi
Based on evaluations, Afundi, Patrick Omuhinda received a grade of B with a mean score of 4.04.
Based on evaluations, Afundi, Patrick Omuhinda received a grade of B with a mean score of 4.15.
Based on evaluations, Afundi, Patrick Omuhinda received a grade of A- with a mean score of 4.35.
Based on evaluations, Afundi, Patrick Omuhinda received a grade of B with a mean score of 4.12.
Based on evaluations, Afundi, Patrick Omuhinda received a grade of B+ with a mean score of 4.18.

Query: What’s the evaluation for Mohamed, Mwanashehe S.?
Based on evaluations, Mohamed, Mwanashehe S. received a grade of B+ with a mean score of 4.28.
Based on evaluations, Mohamed, Mwanashehe S. received a grade of B with a mean score of 4.17.
Based on evaluations, Gitahi, Jesse Elikanah_Machirah received a grade of B with a mean score of 4.04.
Based on evaluations, Gitahi, Jesse Elikanah_Machirah received a grade of A- with a mean score of 4.43.
Based on evaluations, Gitahi, Jesse

Where It Failed
- “Results for Odoyo” / “Evaluation of Gatumo, Francis Mambo”
- Issue: The intent parser didn’t recognize “Results for” or “Evaluation of” as lookup triggers.
- Fix: Expand parser keywords to catch synonyms like results, evaluation of, performance of, review of.
- “List instructors with scores below 3.5” / “Which instructors scored exactly 4.0”
- Issue: Current parser only handles “above” thresholds. Equality and “below” weren’t implemented.
- Fix: Add regex for “below”, “less than”, “equal to”, “exactly”.
- “Which instructors got an A?” / “Show me all with grade B”
- Issue: Parser mis‑mapped grade intent. It returned grade C for “A” and grade A for “B”.
- Fix: Regex extraction needs to be more precise — right now it’s grabbing the wrong letter from the query string.


In [18]:
# oK so lets revamp that intent parser a bit
def parse_intent(user_input):
    text = user_input.lower()

    # Top performers
    m_top = re.search(r"top\s+(\d+)", text)
    if m_top:
        return {"intent": "top_ranked", "count": int(m_top.group(1))}

    # Score filters
    m_above = re.search(r"(above|greater than|over)\s*(\d+(\.\d+)?)", text)
    if m_above:
        return {"intent": "score_filter", "op": ">", "threshold": float(m_above.group(2))}

    m_below = re.search(r"(below|less than|under)\s*(\d+(\.\d+)?)", text)
    if m_below:
        return {"intent": "score_filter", "op": "<", "threshold": float(m_below.group(2))}

    m_equal = re.search(r"(equal|exactly|=|is)\s*(\d+(\.\d+)?)", text)
    if m_equal:
        return {"intent": "score_filter", "op": "=", "threshold": float(m_equal.group(2))}

    # Grade filters
    m_grade = re.search(r"(grade|got an|received an)\s*([abc])", text)
    if m_grade:
        return {"intent": "grade_filter", "grade": m_grade.group(2).upper()}

    # Instructor lookup
    if any(kw in text for kw in ["how did", "tell me about", "results for", "evaluation of", "performance of", "review of"]):
        return {"intent": "lookup", "name": user_input}

    return {"intent": "unknown"}

In [19]:
# Stress test queries
test_queries = [
    "Tell me about Afundi",
    "What’s the evaluation for Mohamed, Mwanashehe S.?",
    "How did Gitahi perform?",
    "Results for Odoyo",
    "Evaluation of Gatumo, Francis Mambo",
    "Who scored above 4.8?",
    "List instructors with scores below 3.5",
    "Which instructors scored exactly 4.0?",
    "Which instructors got an A?",
    "Show me all with grade B",
    "Who received a C?",
    "Top 10 performers",
    "Top 3 instructors this semester",
    "Top 20 performers",
    "What feedback did students give Afundi?",
    "Summarize feedback for Mohamed",
    "Show sample feedback for Odoyo",
    "How did Alo perform?",   # typo test
    "Tell me about Basweti",  # partial match
    "Evaluation for a nonexistent instructor"
]

for q in test_queries:
    print("\nQuery:", q)
    print(get_consultant_response(q))


Query: Tell me about Afundi
Based on evaluations, Afundi, Patrick Omuhinda received a grade of B with a mean score of 4.04.
Based on evaluations, Afundi, Patrick Omuhinda received a grade of B with a mean score of 4.15.
Based on evaluations, Afundi, Patrick Omuhinda received a grade of A- with a mean score of 4.35.
Based on evaluations, Afundi, Patrick Omuhinda received a grade of B with a mean score of 4.12.
Based on evaluations, Afundi, Patrick Omuhinda received a grade of B+ with a mean score of 4.18.

Query: What’s the evaluation for Mohamed, Mwanashehe S.?
I couldn’t interpret that question. Try asking about an instructor, scores, grades, or top performers.

Query: How did Gitahi perform?
Based on evaluations, Gitahi, Jesse Elikanah_Machirah received a grade of B with a mean score of 4.04.
Based on evaluations, Gitahi, Jesse Elikanah_Machirah received a grade of A- with a mean score of 4.43.
Based on evaluations, Gitahi, Jesse Elikanah_Machirah received a grade of A with a mean s

ok so it worked. It just broke down on knowing who got a C and on the feedback questions on students and what feedback they gave. So its not yet capable of going over the comment and giving a summary on it based on positive, neutral and negative. So that means we have to move to sentiment analysis, topic analysis and indetification possibly with bertopic and other NLTK NLP libraries and work with hugging face so that the response is in a consultant manner(as a side one, once we have something basic, I would like to do research on how a consultant would speak and give out feedback in sth that is closely related to this field). But atleast it knows when theres no instructor present, we just have to adjust the output it gives when its presented with such a question. This is the new output.


In [20]:
# So lets patch that letter intent section
import re

def parse_intent(user_input):
    text = user_input.lower()

    # Top performers
    m_top = re.search(r"top\s+(\d+)", text)
    if m_top:
        return {"intent": "top_ranked", "count": int(m_top.group(1))}

    # Score filters
    m_above = re.search(r"(above|greater than|over)\s*(\d+(\.\d+)?)", text)
    if m_above:
        return {"intent": "score_filter", "op": ">", "threshold": float(m_above.group(2))}

    m_below = re.search(r"(below|less than|under)\s*(\d+(\.\d+)?)", text)
    if m_below:
        return {"intent": "score_filter", "op": "<", "threshold": float(m_below.group(2))}

    m_equal = re.search(r"(equal|exactly|=|is)\s*(\d+(\.\d+)?)", text)
    if m_equal:
        return {"intent": "score_filter", "op": "=", "threshold": float(m_equal.group(2))}

    # Grade filters (A/B/C/D/F with optional + or -)
    m_grade = re.search(r"(grade|got\s+an|received\s+an|with\s+grade|who\s+received)\s*([abcdf])([+-]?)\b", text)
    if m_grade:
        grade = (m_grade.group(2) + m_grade.group(3)).upper()
        return {"intent": "grade_filter", "grade": grade}

    # Feedback intents
    feedback_kws = [
        "what feedback did", "summarize feedback", "sample feedback", "student comments",
        "qualitative feedback", "how did students feel", "what did students say"
    ]
    if any(kw in text for kw in feedback_kws):
        return {"intent": "feedback", "name": user_input}

    # Instructor lookup
    lookup_kws = ["how did", "tell me about", "results for", "evaluation of", "performance of", "review of", "what’s the evaluation for"]
    if any(kw in text for kw in lookup_kws):
        return {"intent": "lookup", "name": user_input}

    return {"intent": "unknown"}

In [21]:
# Then we adjust the grade filtering
def get_instructors_by_grade(grade):
    query = """
    SELECT f.full_name, ROUND(AVG(e.mean_score), 2) AS avg_score
    FROM Faculty f
    JOIN TeachingAssignments ta ON f.faculty_id = ta.faculty_id
    JOIN Sections s ON ta.section_id = s.section_id
    JOIN Evaluations e ON s.section_id = e.section_id
    WHERE e.letter_grade = ?
    GROUP BY f.full_name
    ORDER BY avg_score DESC
    """
    cursor.execute(query, (grade,))
    rows = cursor.fetchall()
    if not rows:
        return f"No instructors found with grade {grade}."
    lines = [f"- {name}: {score:.2f}" for name, score in rows]
    return "Instructors with grade " + grade + ":\n" + "\n".join(lines)


In [22]:
# Lets adjust the fuzzy matchgin with a feedback loop
from rapidfuzz import fuzz

def find_best_faculty(user_input, threshold=70):
    q = user_input.lower()
    cursor.execute("SELECT faculty_id, full_name FROM Faculty")
    rows = cursor.fetchall()
    best, best_score = None, -1
    for fid, name in rows:
        sim = max(fuzz.partial_ratio(q, name.lower()), fuzz.token_set_ratio(q, name.lower()))
        if sim > best_score:
            best_score = sim
            best = (fid, name)
    return best if best_score >= threshold else None

def get_feedback_rows(fid, limit=1000):
    cursor.execute("""
        SELECT comment_text
        FROM FeedbackCorpus
        WHERE faculty_id = ?
    """, (fid,))
    return [r[0] for r in cursor.fetchall()]

# Sentiment Analysis

In [23]:
# Lets start off with some basic sentiment analysis
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment import SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()

def sentiment_summary(comments):
    scores = [sia.polarity_scores(c)["compound"] for c in comments if c and c.strip()]
    if not scores:
        return {"avg": 0, "pos_pct": 0, "neg_pct": 0, "neu_pct": 0}
    pos = sum(1 for s in scores if s > 0.05)
    neg = sum(1 for s in scores if s < -0.05)
    neu = len(scores) - pos - neg
    total = len(scores)
    return {
        "avg": sum(scores) / total,
        "pos_pct": pos / total,
        "neg_pct": neg / total,
        "neu_pct": neu / total
    }

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [24]:
# Some topic hints
import re
from collections import Counter

def keyword_hints(comments, top_k=5):
    # naive keyword extraction: split, lowercase, filter short/common words
    stop = set(["the","and","to","of","in","for","on","is","was","are","with","that","this","it","as","at","by","from","or","an","be","but","not"])
    tokens = []
    for c in comments:
        words = re.findall(r"[a-zA-Z][a-zA-Z'-]+", c.lower())
        tokens.extend(w for w in words if len(w) > 3 and w not in stop)
    common = Counter(tokens).most_common(top_k)
    return [w for w, _ in common]

In [25]:
# Then lets work on the feedback response in a consultant style manner
def feedback_response(user_input, sample_n=3):
    match = find_best_faculty(user_input)
    if not match:
        return "I couldn’t find that instructor. Try a full or closer name."
    fid, full_name = match
    comments = get_feedback_rows(fid)
    if not comments:
        return f"No qualitative feedback available for {full_name}."

    sent = sentiment_summary(comments)
    keys = keyword_hints(comments)

    # pick sample quotes: one positive, one neutral, one negative if available
    pos_q = next((c for c in comments if sia.polarity_scores(c)["compound"] > 0.3), None)
    neg_q = next((c for c in comments if sia.polarity_scores(c)["compound"] < -0.3), None)
    neu_q = next((c for c in comments if -0.05 <= sia.polarity_scores(c)["compound"] <= 0.05), None)

    lines = [
        f"{full_name}: feedback is mostly "
        + ("positive" if sent["avg"] > 0.1 else "mixed" if abs(sent['avg']) <= 0.1 else "negative")
        + f" (pos {sent['pos_pct']:.0%}, neu {sent['neu_pct']:.0%}, neg {sent['neg_pct']:.0%}).",
        f"Common themes: {', '.join(keys) or 'none detected'}."
    ]

    samples = []
    if pos_q: samples.append(f"- Strength: “{pos_q}”")
    if neu_q: samples.append(f"- Neutral: “{neu_q}”")
    if neg_q: samples.append(f"- Improvement: “{neg_q}”")

    if samples:
        lines.append("Sample comments:")
        lines.extend(samples)

    return "\n".join(lines)

In [26]:
# Then we integrate the function in
def get_consultant_response(user_input):
    intent = parse_intent(user_input)

    if intent["intent"] == "lookup":
        return lookup_instructor_evaluation(intent["name"])
    if intent["intent"] == "top_ranked":
        return get_top_performers(intent["count"])
    if intent["intent"] == "score_filter":
        return get_instructors_by_score(intent["op"], intent["threshold"])
    if intent["intent"] == "grade_filter":
        return get_instructors_by_grade(intent["grade"])
    if intent["intent"] == "feedback":
        return feedback_response(intent["name"])

    return "I couldn’t interpret that question. Try asking about an instructor, scores, grades, top performers, or feedback."

In [27]:
def get_instructors_by_score(op, threshold):
    """
    Return instructors filtered by score threshold with operator >, <, or =.
    """
    # Base query with aggregation
    base_query = """
    SELECT f.full_name, ROUND(AVG(e.mean_score), 2) AS avg_score, e.letter_grade
    FROM Faculty f
    JOIN TeachingAssignments ta ON f.faculty_id = ta.faculty_id
    JOIN Sections s ON ta.section_id = s.section_id
    JOIN Evaluations e ON s.section_id = e.section_id
    GROUP BY f.full_name
    HAVING avg_score {op} ?
    ORDER BY avg_score DESC
    """.format(op=op)

    cursor.execute(base_query, (threshold,))
    rows = cursor.fetchall()

    if not rows:
        return f"No instructors scored {op} {threshold}."

    header = f"Instructors with scores {op} {threshold}:"
    lines = [f"- {name}: {score:.2f} ({grade})" for name, score, grade in rows]
    return header + "\n" + "\n".join(lines)

ayt so weve made new changes and now i wanna stress test the system with the following questions

Instructor Lookup
- “Tell me about Afundi” → direct surname lookup.
- “Evaluation of Mohamed, Mwanashehe S.” → full name with comma.
- “Results for Odoyo” → surname only.
- “How did Gitahi perform this semester?” → natural phrasing.
- “Review of Gatumo, Francis Mambo” → synonym trigger.

🔹 Score Filters
- “Who scored above 4.5?” → should list top scorers.
- “List instructors with scores below 3.5” → test the new < operator.
- “Which instructors scored exactly 4.0?” → equality case.
- “Show me instructors with scores greater than 4.2” → alternate phrasing.

🔹 Grade Filters
- “Which instructors got an A?” → should return grade A.
- “Who received a C?” → test grade C.
- “Show me all with grade B+” → plus/minus handling.
- “List instructors with grade D” → lower performers.

🔹 Feedback Queries
- “What feedback did students give Afundi?” → fuzzy match + sentiment summary.
- “Summarize feedback for Mohamed” → sentiment + themes.
- “Show sample feedback for Odoyo” → sample quotes.
- “How did students feel about Gitahi?” → synonym trigger.
- “Qualitative feedback for Gatumo” → alternate phrasing.

🔹 Ranking
- “Top 5 performers” → small list.
- “Top 10 performers” → larger list.
- “Top 20 instructors this semester” → stress test with big limit.

🔹 Edge Cases
- “Tell me about Alo” → fuzzy match to Aloo.
- “Evaluation for Basweti” → partial surname.
- “Who received a Z?” → invalid grade, should gracefully handle.
- “Feedback for a nonexistent instructor” → should return “No instructor found.”


In [28]:
# Now just gotta stress test the system
# Stress test queries
test_queries = [
    "Tell me about Afundi",
    "What’s the evaluation for Mohamed, Mwanashehe S.?",
    "How did Gitahi perform?",
    "Results for Odoyo",
    "Evaluation of Gatumo, Francis Mambo",
    "Who scored above 4.8?",
    "List instructors with scores below 3.5",
    "Which instructors scored exactly 4.0?",
    "Which instructors got an A?",
    "Show me all with grade B",
    "Who received a C?",
    "Top 10 performers",
    "Top 3 instructors this semester",
    "Top 20 performers",
    "What feedback did students give Afundi?",
    "Summarize feedback for Mohamed",
    "Show sample feedback for Odoyo",
    "How did Alo perform?",   # typo test
    "Tell me about Basweti",  # partial match
    "Evaluation for a nonexistent instructor"
]

for q in test_queries:
    print("\nQuery:", q)
    print(get_consultant_response(q))


Query: Tell me about Afundi
[DEBUG] Comparing: tell me about afundi vs Mohamed, Mwanashehe S. -> similarity: 33.33333333333333
[DEBUG] Comparing: tell me about afundi vs Owuor, John David_Ouma -> similarity: 33.333333333333336
[DEBUG] Comparing: tell me about afundi vs Gitahi, Jesse Elikanah_Machirah -> similarity: 30.76923076923077
[DEBUG] Comparing: tell me about afundi vs Odoyo, Fredrick -> similarity: 40.0
[DEBUG] Comparing: tell me about afundi vs Gatumo, Francis Mambo -> similarity: 41.17647058823529
[DEBUG] Comparing: tell me about afundi vs Oyaro, Kepha N._Makori -> similarity: 35.29411764705882
[DEBUG] Comparing: tell me about afundi vs Ogada, Agnes Owuor -> similarity: 33.333333333333336
[DEBUG] Comparing: tell me about afundi vs Kuria,, John -> similarity: 28.57142857142857
[DEBUG] Comparing: tell me about afundi vs Achoki, George -> similarity: 35.294117647058826
[DEBUG] Comparing: tell me about afundi vs Njuguna, Amos Gitau -> similarity: 37.5
[DEBUG] Comparing: tell me a

In [29]:
# Ok that operational error is not good so lets look at the db again
cursor.execute("PRAGMA table_info(FeedbackCorpus)")
print(cursor.fetchall())

[(0, 'feedback_id', 'INTEGER', 0, None, 1), (1, 'faculty_id', 'INTEGER', 0, None, 0), (2, 'instructor', 'TEXT', 0, None, 0), (3, 'course_code', 'TEXT', 0, None, 0), (4, 'section_code', 'TEXT', 0, None, 0), (5, 'semester', 'TEXT', 0, None, 0), (6, 'comment_type', 'TEXT', 0, None, 0), (7, 'comment_text', 'TEXT', 0, None, 0)]


In [30]:
def get_feedback_summary(fid, limit=3):
    query = """
    SELECT comment_text
    FROM FeedbackCorpus
    WHERE faculty_id = ?
    LIMIT ?
    """
    cursor.execute(query, (fid, limit))
    rows = cursor.fetchall()

    if rows:
        comments = [r[0] for r in rows]
        return "Sample feedback:\n" + "\n".join(f"- {c}" for c in comments)
    return "No qualitative feedback available."

In [31]:
# Now just gotta stress test the system
# Stress test queries
test_queries = [
    "Tell me about Afundi",
    "What’s the evaluation for Mohamed, Mwanashehe S.?",
    "How did Gitahi perform?",
    "Results for Odoyo",
    "Evaluation of Gatumo, Francis Mambo",
    "Who scored above 4.8?",
    "List instructors with scores below 3.5",
    "Which instructors scored exactly 4.0?",
    "Which instructors got an A?",
    "Show me all with grade B",
    "Who received a C?",
    "Top 10 performers",
    "Top 3 instructors this semester",
    "Top 20 performers",
    "What feedback did students give Afundi?",
    "Summarize feedback for Mohamed",
    "Show sample feedback for Odoyo",
    "How did Alo perform?",   # typo test
    "Tell me about Basweti",  # partial match
    "Evaluation for a nonexistent instructor"
]

for q in test_queries:
    print("\nQuery:", q)
    print(get_consultant_response(q))


Query: Tell me about Afundi
[DEBUG] Comparing: tell me about afundi vs Mohamed, Mwanashehe S. -> similarity: 33.33333333333333
[DEBUG] Comparing: tell me about afundi vs Owuor, John David_Ouma -> similarity: 33.333333333333336
[DEBUG] Comparing: tell me about afundi vs Gitahi, Jesse Elikanah_Machirah -> similarity: 30.76923076923077
[DEBUG] Comparing: tell me about afundi vs Odoyo, Fredrick -> similarity: 40.0
[DEBUG] Comparing: tell me about afundi vs Gatumo, Francis Mambo -> similarity: 41.17647058823529
[DEBUG] Comparing: tell me about afundi vs Oyaro, Kepha N._Makori -> similarity: 35.29411764705882
[DEBUG] Comparing: tell me about afundi vs Ogada, Agnes Owuor -> similarity: 33.333333333333336
[DEBUG] Comparing: tell me about afundi vs Kuria,, John -> similarity: 28.57142857142857
[DEBUG] Comparing: tell me about afundi vs Achoki, George -> similarity: 35.294117647058826
[DEBUG] Comparing: tell me about afundi vs Njuguna, Amos Gitau -> similarity: 37.5
[DEBUG] Comparing: tell me a

alright so that worked, and I was reviewing the answers and there are some where it came out short.like here 'Query: What feedback did students give Afundi?
I couldn’t find that instructor. Try a full or closer name.

Query: Summarize feedback for Mohamed
I couldn’t find that instructor. Try a full or closer name.

Query: Show sample feedback for Odoyo
I couldn’t find that instructor. Try a full or closer name.' the AI is got at listing, ranking and just straight pulling of information. But is now breaking down when it comes to forming sentences and answering qualitative questions. Its not forming sentences. 'est match: Mohamed, Mwanashehe S. with score: 100.0
Based on evaluations, Mohamed, Mwanashehe S. received a grade of B+ with a mean score of 4.22.
No qualitative feedback available.' that's an output from the AI itself. So that means we need to start to train it on forming sentences that are coherent,understand the context of the question and can draw the neccassary information and also form summaries down the line. 

ok so lets begin on sentintment analysis

# Starter Sentiment + Summary (NLTK VADER)

In [32]:
from nltk.sentiment import SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()

def feedback_response(user_input, sample_n=3):
    # Step 1: Fuzzy match instructor
    match = find_best_faculty(user_input)
    if not match:
        return "I couldn’t find that instructor. Try a full or closer name."
    fid, full_name = match

    # Step 2: Fetch comments
    cursor.execute("""
        SELECT comment_text
        FROM FeedbackCorpus
        WHERE faculty_id = ?
    """, (fid,))
    comments = [r[0] for r in cursor.fetchall()]
    if not comments:
        return f"No qualitative feedback available for {full_name}."

    # Step 3: Sentiment analysis
    scores = [sia.polarity_scores(c)["compound"] for c in comments if c.strip()]
    pos = sum(1 for s in scores if s > 0.05)
    neg = sum(1 for s in scores if s < -0.05)
    neu = len(scores) - pos - neg
    total = len(scores)
    avg = sum(scores) / total if total else 0

    sentiment_label = "positive" if avg > 0.1 else "mixed" if abs(avg) <= 0.1 else "negative"

    # Step 4: Build consultant-style response
    response = [
        f"Feedback for {full_name} is mostly {sentiment_label}.",
        f"Breakdown: {pos/total:.0%} positive, {neu/total:.0%} neutral, {neg/total:.0%} negative."
    ]

    # Step 5: Sample comments
    sample_comments = comments[:sample_n]
    if sample_comments:
        response.append("Representative comments:")
        for c in sample_comments:
            response.append(f"- \"{c}\"")

    return "\n".join(response)

In [33]:
# Some basic stress testing
# Evaluation test queries
test_queries = [
    "Tell me about Afundi",                          # Instructor lookup
    "Summarize feedback for Mohamed",                # Feedback + sentiment summary
    "Results for Odoyo",                             # Lookup by surname
    "Which instructors got an A?",                   # Grade filter
    "Who received a C?",                             # Grade filter lower performers
    "List instructors with scores below 3.5",        # Score filter (below)
    "Which instructors scored exactly 4.0?",         # Score filter (equality)
    "Top 5 performers",                              # Ranking
    "What feedback did students give Gitahi?",       # Feedback query
    "Evaluation for a nonexistent instructor"        # Edge case handling
]

for q in test_queries:
    print("\nQuery:", q)
    print(get_consultant_response(q))


Query: Tell me about Afundi
[DEBUG] Comparing: tell me about afundi vs Mohamed, Mwanashehe S. -> similarity: 33.33333333333333
[DEBUG] Comparing: tell me about afundi vs Owuor, John David_Ouma -> similarity: 33.333333333333336
[DEBUG] Comparing: tell me about afundi vs Gitahi, Jesse Elikanah_Machirah -> similarity: 30.76923076923077
[DEBUG] Comparing: tell me about afundi vs Odoyo, Fredrick -> similarity: 40.0
[DEBUG] Comparing: tell me about afundi vs Gatumo, Francis Mambo -> similarity: 41.17647058823529
[DEBUG] Comparing: tell me about afundi vs Oyaro, Kepha N._Makori -> similarity: 35.29411764705882
[DEBUG] Comparing: tell me about afundi vs Ogada, Agnes Owuor -> similarity: 33.333333333333336
[DEBUG] Comparing: tell me about afundi vs Kuria,, John -> similarity: 28.57142857142857
[DEBUG] Comparing: tell me about afundi vs Achoki, George -> similarity: 35.294117647058826
[DEBUG] Comparing: tell me about afundi vs Njuguna, Amos Gitau -> similarity: 37.5
[DEBUG] Comparing: tell me a

Also ive noticed its still struggling with matching Afundi. Because in the debugging stress testing, '[DEBUG] Comparing: tell me about afundi vs Afundi, Patrick Omuhinda -> similarity: 46.15384615384615' it completely mismatches him, with this '[DEBUG] Comparing: tell me about afundi vs Muindi, Peter M. -> similarity: 50.0' being the highest matching it found, but giving this response 'No evaluation data found for Tell me about Afundi. ' even with odoyo '[DEBUG] Comparing: results for odoyo vs Odoyo, Fredrick -> similarity: 50.0' with its output being this 'No evaluation data found for Results for Odoyo.'
there must be sth up with the fuzzy matching

In [34]:
# Turn off debug output
DEBUG = False

# Qualitative stress test queries
test_queries = [
    "What feedback did students give Afundi?",          # direct feedback query
    "Summarize feedback for Mohamed",                   # summarization
    "Show sample feedback for Odoyo",                   # sample quotes
    "How did students feel about Gitahi?",              # synonym phrasing
    "Qualitative feedback for Gatumo",                  # alternate phrasing
    "Summarize student comments for Aloo",              # fuzzy match test
    "What did learners say about Basweti?",             # partial surname
    "Give me feedback themes for Ondiek",               # theme extraction
    "Summarize qualitative feedback for Njuguna",       # consultant-style summary
    "Feedback for a nonexistent instructor"             # edge case handling
]

for q in test_queries:
    print("\nQuery:", q)
    print(get_consultant_response(q))


Query: What feedback did students give Afundi?
I couldn’t find that instructor. Try a full or closer name.

Query: Summarize feedback for Mohamed
I couldn’t find that instructor. Try a full or closer name.

Query: Show sample feedback for Odoyo
I couldn’t find that instructor. Try a full or closer name.

Query: How did students feel about Gitahi?
I couldn’t find that instructor. Try a full or closer name.

Query: Qualitative feedback for Gatumo
I couldn’t find that instructor. Try a full or closer name.

Query: Summarize student comments for Aloo
I couldn’t find that instructor. Try a full or closer name.

Query: What did learners say about Basweti?
I couldn’t interpret that question. Try asking about an instructor, scores, grades, top performers, or feedback.

Query: Give me feedback themes for Ondiek
I couldn’t interpret that question. Try asking about an instructor, scores, grades, top performers, or feedback.

Query: Summarize qualitative feedback for Njuguna
I couldn’t find that 

ok gotta fix that fuzzy matching now. Cause its like its reading the entire string of the wuestion so the matching drops by alot.

In [35]:
# Revised fuzzy matching
import re

STOPWORDS = {
    "tell","me","about","results","for","evaluation","of","how","did",
    "performance","review","summarize","feedback","comments","student",
    "learners","say","give","themes","qualitative","what","did","show"
}

def extract_name_tokens(user_input):
    # Split into words, remove punctuation
    words = re.findall(r"[A-Za-z]+", user_input)
    # Keep words not in stopwords
    filtered = [w for w in words if w.lower() not in STOPWORDS]
    # Join back into a string
    return " ".join(filtered)

def find_best_faculty(user_input, threshold=70):
    # Extract likely name tokens
    q = extract_name_tokens(user_input).lower()
    if not q:
        return None

    cursor.execute("SELECT faculty_id, full_name FROM Faculty")
    rows = cursor.fetchall()
    best, best_score = None, -1

    for fid, name in rows:
        sim = max(
            fuzz.partial_ratio(q, name.lower()),
            fuzz.token_set_ratio(q, name.lower())
        )
        if sim > best_score:
            best_score = sim
            best = (fid, name)

    return best if best_score >= threshold else None

In [36]:
# Ok lets test things out now
# Turn off debug output
DEBUG = True

# Qualitative stress test queries
test_queries = [
    "What feedback did students give Afundi Patrick?",          # direct feedback query
    "Summarize feedback for Mohamed",                   # summarization
    "Show sample feedback for Odoyo Fred",                   # sample quotes
    "How did students feel about Gitahi?",              # synonym phrasing
    "Qualitative feedback for Gatumo",                  # alternate phrasing
    "Summarize student comments for Aloo",              # fuzzy match test
    "What did learners say about Basweti?",             # partial surname
    "Give me feedback themes for Ondiek",               # theme extraction
    "Summarize qualitative feedback for Njuguna",       # consultant-style summary
    "Feedback for a nonexistent instructor"             # edge case handling
]

for q in test_queries:
    print("\nQuery:", q)
    print(get_consultant_response(q))


Query: What feedback did students give Afundi Patrick?
Feedback for Mulindi, Patrick M. is mostly positive.
Breakdown: 42% positive, 55% neutral, 2% negative.
Representative comments:
- "good"
- "sweet He had arranged the slides properly which made it easier to learn .."
- "-- his explanation."

Query: Summarize feedback for Mohamed
No qualitative feedback available for Mohamed, Mwanashehe S..

Query: Show sample feedback for Odoyo Fred
No qualitative feedback available for Odoyo, Fredrick.

Query: How did students feel about Gitahi?
I couldn’t find that instructor. Try a full or closer name.

Query: Qualitative feedback for Gatumo
Feedback for Gatumo, Francis Mambo is mostly positive.
Breakdown: 44% positive, 56% neutral, 0% negative.
Representative comments:
- "Good"
- "N/A"
- "N/A"

Query: Summarize student comments for Aloo
Feedback for Aloo, Linus Alwal is mostly positive.
Breakdown: 37% positive, 63% neutral, 0% negative.
Representative comments:
- "good"
- "communicator always 

ok so slight change in the response. Ok ive realised if you add in the second name and re-ask the question it accuretly matches the lecturer, even if the second name is a partial like with Odoyo, hes full name is Odoyo Fredrick, but if you place in odoyo fred, it matches him accuretly. So if that's the case we let the fuzzy matching rest, as long as you do actually provide full name or closer name it can find the lecturer that's fine with me. Lets move to the next level of complexity for the qualitative section

In [37]:
# Right now the system is just saying “No qualitative feedback available” because it’s pulling raw comments but not analyzing or summarizing them.
# so lets do so sentinment theme extraction
from nltk.sentiment import SentimentIntensityAnalyzer
import re
from collections import Counter

sia = SentimentIntensityAnalyzer()

def analyze_feedback(comments, top_k=5):
    # Sentiment scores
    scores = [sia.polarity_scores(c)["compound"] for c in comments if c.strip()]
    pos = sum(1 for s in scores if s > 0.05)
    neg = sum(1 for s in scores if s < -0.05)
    neu = len(scores) - pos - neg
    total = len(scores)
    avg = sum(scores) / total if total else 0

    sentiment_label = "positive" if avg > 0.1 else "mixed" if abs(avg) <= 0.1 else "negative"

    # Keyword extraction (naive)
    stop = {"the","and","to","of","in","for","on","is","was","are","with","that","this","it","as","at","by","from","or","an","be","but","not"}
    tokens = []
    for c in comments:
        words = re.findall(r"[a-zA-Z][a-zA-Z'-]+", c.lower())
        tokens.extend(w for w in words if len(w) > 3 and w not in stop)
    common = Counter(tokens).most_common(top_k)
    themes = [w for w, _ in common]

    return {
        "sentiment": sentiment_label,
        "pos_pct": pos/total if total else 0,
        "neu_pct": neu/total if total else 0,
        "neg_pct": neg/total if total else 0,
        "themes": themes
    }


In [38]:
# then lets add in the responses to give out
def feedback_response(user_input, sample_n=3):
    match = find_best_faculty(user_input)
    if not match:
        return "I couldn’t find that instructor. Try a full or closer name."
    fid, full_name = match

    cursor.execute("SELECT comment_text FROM FeedbackCorpus WHERE faculty_id = ?", (fid,))
    comments = [r[0] for r in cursor.fetchall()]
    if not comments:
     return f"Students provided numeric evaluations for {full_name}, but no written comments were submitted."

    analysis = analyze_feedback(comments)
    response = [
        f"Feedback for {full_name} is mostly {analysis['sentiment']}.",
        f"Breakdown: {analysis['pos_pct']:.0%} positive, {analysis['neu_pct']:.0%} neutral, {analysis['neg_pct']:.0%} negative.",
        f"Common themes: {', '.join(analysis['themes']) or 'none detected'}."
    ]

    # Add sample comments
    sample_comments = comments[:sample_n]
    if sample_comments:
        response.append("Representative comments:")
        for c in sample_comments:
            response.append(f"- \"{c}\"")

    return "\n".join(response)

In [39]:
# now to do some stress testing
# Ok lets test things out now
# Turn off debug output
DEBUG = False

# Qualitative stress test queries
test_queries = [
    "What feedback did students give Afundi Patrick?",             # direct feedback query
    "Summarize feedback for Mohamed",                              # summarization
    "Show sample feedback for Odoyo Fred",                         # sample quotes
    "How did students feel about Gitahi Jesse Elikanah_Machirah?", # synonym phrasing
    "Qualitative feedback for Gatumo",                             # alternate phrasing
    "Summarize student comments for Aloo",                         # fuzzy match test
    "What did learners say about Basweti?",                        # partial surname
    "Give me feedback themes for Ondiek",                          # theme extraction
    "Summarize qualitative feedback for Njuguna",                  # consultant-style summary
    "Feedback for a nonexistent instructor"                        # edge case handling
]

for q in test_queries:
    print("\nQuery:", q)
    print(get_consultant_response(q))


Query: What feedback did students give Afundi Patrick?
Feedback for Mulindi, Patrick M. is mostly positive.
Breakdown: 42% positive, 55% neutral, 2% negative.
Common themes: good, none, excellent, great, which.
Representative comments:
- "good"
- "sweet He had arranged the slides properly which made it easier to learn .."
- "-- his explanation."

Query: Summarize feedback for Mohamed
Students provided numeric evaluations for Mohamed, Mwanashehe S., but no written comments were submitted.

Query: Show sample feedback for Odoyo Fred
Students provided numeric evaluations for Odoyo, Fredrick, but no written comments were submitted.

Query: How did students feel about Gitahi Jesse Elikanah_Machirah?
Students provided numeric evaluations for Gitahi, Jesse Elikanah_Machirah, but no written comments were submitted.

Query: Qualitative feedback for Gatumo
Feedback for Gatumo, Francis Mambo is mostly positive.
Breakdown: 44% positive, 56% neutral, 0% negative.
Common themes: good, none, excelle

ok so had to go back and revise the DB, For the 5th time, so the mark downs no longer make sense. So just ignore those that dont make any sense at all. 

## Patched intent logic

In [40]:
def parse_intent(user_input: str):
    text = user_input.lower()

    # Qualitative feedback intent
    qualitative_keywords = [
        "feedback", "comments", "say", "feel", "themes",
        "qualitative", "students", "learners", "opinions"
    ]
    if any(k in text for k in qualitative_keywords):
        return {"intent": "qualitative", "name": extract_name_tokens(user_input)}

    # Lookup intent
    if "tell me about" in text or "evaluation" in text or "results" in text or "performance" in text:
        return {"intent": "lookup", "name": extract_name_tokens(user_input)}

    # Ranking intent
    if "top" in text or "best" in text or "performers" in text:
        count = 5
        for word in text.split():
            if word.isdigit():
                count = int(word)
        return {"intent": "top_ranked", "count": count}

    # Score filters
    if "above" in text or "below" in text or "exactly" in text:
        return {"intent": "score_filter", "condition": text}

    # Grade filters
    if "grade" in text or "received" in text or "got" in text:
        return {"intent": "grade_filter", "condition": text}

    return {"intent": "unknown"}

In [41]:
# then gotta improve the theme extraction a bit by accounting for na, trivial words like good, excellent etc. They dont provide much value
import re
from collections import Counter

def extract_themes(comments, top_k=5):
    stop = {
        "the","and","to","of","in","for","on","is","was","are","with","that","this","it","as","at","by","from","or","an","be","but","not",
        "none","n/a","na","ok","good","excellent","great"
    }
    tokens = []
    for c in comments:
        words = re.findall(r"[a-zA-Z][a-zA-Z'-]+", c.lower())
        tokens.extend(w for w in words if len(w) > 3 and w not in stop)
    common = Counter(tokens).most_common(top_k)
    return [w for w, _ in common]

In [42]:
# then lets intergrate the changes back into the feedback response
def get_feedback_summary(fid, full_name, limit=3):
    query = """
    SELECT comment_text
    FROM FeedbackCorpus
    WHERE faculty_id = ?
    LIMIT ?
    """
    cursor.execute(query, (fid, limit))
    rows = cursor.fetchall()
    comments = [r[0] for r in rows if r[0]]

    if not comments:
        return f"Students provided numeric evaluations for {full_name}, but no written comments were submitted."

    analysis = analyze_feedback(comments)  # your sentiment logic
    themes = extract_themes(comments)

    response = [
        f"Feedback is mostly {analysis['sentiment']}.",
        f"Breakdown: {analysis['pos_pct']:.0%} positive, {analysis['neu_pct']:.0%} neutral, {analysis['neg_pct']:.0%} negative.",
        f"Common themes: {', '.join(themes) or 'none detected'}."
    ]

    # Filter sample comments: select comments with at least min_words words
    def filter_sample_comments(comments, min_words=3, limit=3):
        filtered = [c for c in comments if len(c.split()) >= min_words]
        return filtered[:limit]

    sample_comments = filter_sample_comments(comments, min_words=3, limit=3)
    if sample_comments:
        response.append("Representative comments:")
        for c in sample_comments:
            response.append(f"- \"{c}\"")

    return "\n".join(response)

In [43]:
# now to do some stress testing
# Ok lets test things out now
# Turn off debug output
DEBUG = False

# Qualitative stress test queries
test_queries = [
    "What feedback did students give Afundi Patrick?",             # direct feedback query
    "Summarize feedback for Mohamed",                              # summarization
    "Show sample feedback for Odoyo Fred",                         # sample quotes
    "How did students feel about Gitahi Jesse Elikanah_Machirah?", # synonym phrasing
    "Qualitative feedback for Gatumo",                             # alternate phrasing
    "Summarize student comments for Aloo",                         # fuzzy match test
    "What did learners say about Basweti?",                        # partial surname
    "Give me feedback themes for Ondiek",                          # theme extraction
    "Summarize qualitative feedback for Njuguna",                  # consultant-style summary
    "Feedback for a nonexistent instructor"                        # edge case handling
]

for q in test_queries:
    print("\nQuery:", q)
    print(get_consultant_response(q))


Query: What feedback did students give Afundi Patrick?
I couldn’t interpret that question. Try asking about an instructor, scores, grades, top performers, or feedback.

Query: Summarize feedback for Mohamed
I couldn’t interpret that question. Try asking about an instructor, scores, grades, top performers, or feedback.

Query: Show sample feedback for Odoyo Fred
I couldn’t interpret that question. Try asking about an instructor, scores, grades, top performers, or feedback.

Query: How did students feel about Gitahi Jesse Elikanah_Machirah?
I couldn’t interpret that question. Try asking about an instructor, scores, grades, top performers, or feedback.

Query: Qualitative feedback for Gatumo
I couldn’t interpret that question. Try asking about an instructor, scores, grades, top performers, or feedback.

Query: Summarize student comments for Aloo
I couldn’t interpret that question. Try asking about an instructor, scores, grades, top performers, or feedback.

Query: What did learners say a

ok this si bad.
the patch regressed because the parser now returns “qualitative”, but your router doesn’t handle that intent. Plus, name extraction is brittle, and grade parsing is misclassifying “C” queries. Let’s fix all three: routing, name resolution, and grade parsing.


In [44]:
import re
from rapidfuzz import fuzz, process

def extract_name_tokens(text: str):
    # Strip leading verbs/phrases commonly used in queries
    cleaned = re.sub(r"(?i)\b(tell me about|what did|what’s|whats|how did|results for|show|give me|summarize|qualitative feedback for|feedback for|student comments for|themes for|evaluation of|evaluation for)\b", "", text)
    # Remove filler words
    cleaned = re.sub(r"(?i)\b(students|learners|feedback|comments|themes|evaluation|results|sample|qualitative)\b", "", cleaned)
    # Keep letters, commas, spaces, periods, underscores
    cleaned = re.sub(r"[^a-zA-Z ,._-]", " ", cleaned)
    cleaned = re.sub(r"\s+", " ", cleaned).strip()
    return cleaned

def resolve_instructor_name(name_query: str, cursor, top_n=1, threshold=70):
    # Get all faculty names
    cursor.execute("SELECT faculty_id, full_name FROM Faculty")
    rows = cursor.fetchall()
    # Use RapidFuzz for better matching
    choices = {full_name: fid for (fid, full_name) in rows}
    matches = process.extract(name_query, choices.keys(), scorer=fuzz.token_set_ratio, limit=top_n)
    if matches and matches[0][1] >= threshold:
        best_name, score, _ = matches[0]
        return choices[best_name], best_name, score
    return None, None, 0

In [45]:
def get_consultant_response(user_input: str):
    intent = parse_intent(user_input)

    if intent["intent"] == "qualitative":
        name_query = extract_name_tokens(user_input)
        result = resolve_instructor_name(name_query, cursor)

        if result == (None, None, 0):
            return "I couldn’t find that instructor in the records."

        if isinstance(result[0], str) and result[0] == "AMBIGUOUS":
            candidates = result[1]
            return (
                "There are multiple instructors with that name. "
                "Please specify the full name, e.g.: " + ", ".join(candidates)
            )

        fid, full_name, score = result
        feedback = get_feedback_summary(fid, full_name, limit=3)

        cursor.execute(
            "SELECT mean_score, letter_grade FROM Evaluations "
            "WHERE faculty_id = ? ORDER BY mean_score DESC LIMIT 1",
            (fid,)
        )
        row = cursor.fetchone()
        if row:
            ms, lg = row
            head = f"Based on evaluations, {full_name} has a mean score of {ms:.2f} ({lg})."
            return head + "\n" + feedback
        return feedback


    if intent["intent"] == "lookup":
        name_query = extract_name_tokens(user_input)
        fid, full_name, score = resolve_instructor_name(name_query, cursor)
        if not fid or score < 70:
            return "I couldn’t find that instructor in the records."
        return lookup_instructor_evaluation(full_name)  # or use fid in your implementation

    if intent["intent"] == "top_ranked":
        return get_top_performers(intent["count"])

    if intent["intent"] == "score_filter":
        return filter_by_score_condition(intent["condition"])

    if intent["intent"] == "grade_filter":
        return filter_by_grade_condition(intent["condition"])

    return "I couldn’t interpret that question. Try asking about an instructor, scores, grades, top performers, or feedback."

this function has been revised like 3 times now.
What Changed
- Two‑stage resolver: looks in FeedbackCorpus.instructor first, then Faculty.full_name.
- Confidence cutoff: if fuzzy match score < 70, returns “I couldn’t find that instructor.”
- Fixed call: get_feedback_summary(fid, full_name, limit=3) so no missing argument error.
- Consistent fallback: both qualitative and lookup intents use the same cutoff logic.

This version should restore matches for lecturers like Afundi, Gatumo, Aloo, Basweti, while still protecting against false positives.



In [46]:
def parse_intent(user_input: str):
    text = user_input.lower()

    qualitative_keywords = [
        "feedback", "comments", "say", "feel", "themes",
        "qualitative", "students", "learners", "opinions", "sample feedback"
    ]
    if any(k in text for k in qualitative_keywords):
        return {"intent": "qualitative", "name": extract_name_tokens(user_input)}

    # Grade filters: capture exact letter if provided
    m = re.search(r"(?i)\b(grade|got|received)\s+([abcdf][+-]?)\b", text)
    if m:
        letter = m.group(2).upper()
        return {"intent": "grade_filter", "condition": {"type": "letter", "value": letter}}

    if re.search(r"(?i)\bwho.*grade\s+([abcdf][+-]?)\b", text):
        letter = re.search(r"(?i)\b([abcdf][+-]?)\b", text).group(1).upper()
        return {"intent": "grade_filter", "condition": {"type": "letter", "value": letter}}

    # Numeric score filters
    if "above" in text or "below" in text or "exactly" in text:
        # Extract numeric target if present
        mnum = re.search(r"(?i)\b(above|below|exactly)\s+(\d+(?:\.\d+)?)", text)
        cond = {"type": "score", "op": mnum.group(1).lower(), "value": float(mnum.group(2))} if mnum else {"raw": text}
        return {"intent": "score_filter", "condition": cond}

    # Ranking
    if "top" in text or "best" in text or "performers" in text:
        count = 5
        mcount = re.search(r"\b(\d+)\b", text)
        if mcount:
            count = int(mcount.group(1))
        return {"intent": "top_ranked", "count": count}

    # Lookup
    if "tell me about" in text or "evaluation" in text or "results" in text or "performance" in text:
        return {"intent": "lookup", "name": extract_name_tokens(user_input)}

    return {"intent": "unknown"}

In [47]:
# lets fix the grade and score filtering functions
def filter_by_grade_condition(condition):
    if isinstance(condition, dict) and condition.get("type") == "letter":
        letter = condition["value"]
        q = "SELECT full_name, mean_score, letter_grade FROM Evaluations WHERE letter_grade = ? ORDER BY mean_score DESC"
        cursor.execute(q, (letter,))
        rows = cursor.fetchall()
        if not rows:
            return f"No instructors found with grade {letter}."
        lines = [f"Instructors with grade {letter}:"]
        for name, score, lg in rows:
            lines.append(f"- {name}: {score:.2f}")
        return "\n".join(lines)
    return "I couldn’t interpret that grade request."

In [48]:
def filter_by_score_condition(condition):
    if isinstance(condition, dict) and condition.get("type") == "score":
        op = condition["op"]; val = condition["value"]
        if op == "above":
            q = "SELECT full_name, mean_score, letter_grade FROM Evaluations WHERE mean_score > ? ORDER BY mean_score DESC"
        elif op == "below":
            q = "SELECT full_name, mean_score, letter_grade FROM Evaluations WHERE mean_score < ? ORDER BY mean_score ASC"
        elif op == "exactly":
            q = "SELECT full_name, mean_score, letter_grade FROM Evaluations WHERE ABS(mean_score - ?) < 1e-9 ORDER BY full_name"
        else:
            return "I couldn’t interpret that score condition."
        cursor.execute(q, (val,))
        rows = cursor.fetchall()
        if not rows:
            return f"No instructors found with scores {op} {val:.2f}."
        hdr = f"Instructors with scores {op} {val:.2f}:" if op != "exactly" else f"Instructors with scores = {val:.2f}:"
        lines = [hdr]
        for name, score, lg in rows:
            grade = lg if lg else ""
            lines.append(f"- {name}: {score:.2f} ({grade})" if grade else f"- {name}: {score:.2f}")
        return "\n".join(lines)
    return "I couldn’t interpret that score request."

In [49]:
# lets imporve the theme extraction a bit more
import re
from collections import Counter

def extract_themes(comments, top_k=5):
    stop = {
        "the","and","to","of","in","for","on","is","was","are","with","that","this","it","as","at","by","from","or","an","be","but","not",
        "none","n/a","na","ok","good","excellent","great","fine","nice","fair"
    }
    tokens = []
    for c in comments:
        # normalize punctuation like double hyphens, underscores
        c = c.replace("_", " ")
        words = re.findall(r"[a-zA-Z][a-zA-Z'-]+", c.lower())
        tokens.extend(w for w in words if len(w) > 3 and w not in stop)
    common = Counter(tokens).most_common(top_k)
    return [w for w, _ in common]

In [50]:
def extract_name_tokens(text: str):
    cleaned = re.sub(r"(?i)\b(tell me about|what did|what’s|whats|how did|results for|show|give me|summarize|qualitative feedback for|feedback for|student comments for|themes for|evaluation of|evaluation for|say about|learners|students)\b", "", text)
    cleaned = re.sub(r"[^a-zA-Z ,._-]", " ", cleaned)
    return cleaned.strip()

In [51]:
# now to do some stress testing
# Ok lets test things out now
# Turn off debug output
DEBUG = False

# Qualitative stress test queries
test_queries = [
    "What feedback did students give Afundi Patrick?",             # direct feedback query
    "Summarize feedback for Mohamed",                              # summarization
    "Show sample feedback for Odoyo Fred",                         # sample quotes
    "How did students feel about Gitahi Jesse Elikanah_Machirah?", # synonym phrasing
    "Qualitative feedback for Gatumo",                             # alternate phrasing
    "Summarize student comments for Aloo",                         # fuzzy match test
    "What did learners say about Basweti?",                        # partial surname
    "Give me feedback themes for Ondiek",                          # theme extraction
    "Summarize qualitative feedback for Njuguna",                  # consultant-style summary
    "Feedback for a nonexistent instructor"                        # edge case handling
]

for q in test_queries:
    print("\nQuery:", q)
    print(get_consultant_response(q))


Query: What feedback did students give Afundi Patrick?
I couldn’t find that instructor in the records.

Query: Summarize feedback for Mohamed
Based on evaluations, Mohamed, Abdullahi Mohamed has a mean score of 4.50 (A).
Students provided numeric evaluations for Mohamed, Abdullahi Mohamed, but no written comments were submitted.

Query: Show sample feedback for Odoyo Fred
I couldn’t find that instructor in the records.

Query: How did students feel about Gitahi Jesse Elikanah_Machirah?
Based on evaluations, Gitahi, Jesse Elikanah_Machirah has a mean score of 4.49 (A).
Students provided numeric evaluations for Gitahi, Jesse Elikanah_Machirah, but no written comments were submitted.

Query: Qualitative feedback for Gatumo
I couldn’t find that instructor in the records.

Query: Summarize student comments for Aloo
I couldn’t find that instructor in the records.

Query: What did learners say about Basweti?
I couldn’t find that instructor in the records.

Query: Give me feedback themes for 

In [52]:
# gotta run a quick pragma check on the db
cursor.execute("PRAGMA table_info(Evaluations)")
print(cursor.fetchall())

[(0, 'faculty_id', 'INTEGER', 0, None, 0), (1, 'full_name', 'TEXT', 0, None, 0), (2, 'course_code', 'TEXT', 0, None, 0), (3, 'section_code', 'TEXT', 0, None, 0), (4, 'total_students', 'INTEGER', 0, None, 0), (5, 'total_respondents', 'INTEGER', 0, None, 0), (6, 'response_rate', 'REAL', 0, None, 0), (7, 'mean_score', 'REAL', 0, None, 0), (8, 'percent_score', 'REAL', 0, None, 0), (9, 'letter_grade', 'TEXT', 0, None, 0), (10, 'section_id', 'TEXT', 0, None, 0)]


In [53]:
# gotta tun a pragma check on the db
cursor.execute("PRAGMA table_info(FeedbackCorpus)")
print(cursor.fetchall())

[(0, 'feedback_id', 'INTEGER', 0, None, 1), (1, 'faculty_id', 'INTEGER', 0, None, 0), (2, 'instructor', 'TEXT', 0, None, 0), (3, 'course_code', 'TEXT', 0, None, 0), (4, 'section_code', 'TEXT', 0, None, 0), (5, 'semester', 'TEXT', 0, None, 0), (6, 'comment_type', 'TEXT', 0, None, 0), (7, 'comment_text', 'TEXT', 0, None, 0)]


In [54]:
print(parse_intent("What did learners say about Basweti?"))

{'intent': 'qualitative', 'name': 'Basweti'}


In [55]:
# now to do some stress testing
# Ok lets test things out now
# Turn off debug output
DEBUG = False

# Qualitative stress test queries
test_queries = [
    "What feedback did students give Afundi Patrick?",             # direct feedback query
    "Summarize feedback for Mohamed",                              # summarization
    "Show sample feedback for Odoyo Fred",                         # sample quotes
    "How did students feel about Gitahi Jesse Elikanah_Machirah?", # synonym phrasing
    "Qualitative feedback for Gatumo",                             # alternate phrasing
    "Summarize student comments for Aloo",                         # fuzzy match test
    "What did learners say about Basweti?",                        # partial surname
    "Give me feedback themes for Ondiek",                          # theme extraction
    "Summarize qualitative feedback for Njuguna",                  # consultant-style summary
    "Feedback for a nonexistent instructor"                        # edge case handling
]

for q in test_queries:
    print("\nQuery:", q)
    print(get_consultant_response(q))


Query: What feedback did students give Afundi Patrick?
I couldn’t find that instructor in the records.

Query: Summarize feedback for Mohamed
Based on evaluations, Mohamed, Abdullahi Mohamed has a mean score of 4.50 (A).
Students provided numeric evaluations for Mohamed, Abdullahi Mohamed, but no written comments were submitted.

Query: Show sample feedback for Odoyo Fred
I couldn’t find that instructor in the records.

Query: How did students feel about Gitahi Jesse Elikanah_Machirah?
Based on evaluations, Gitahi, Jesse Elikanah_Machirah has a mean score of 4.49 (A).
Students provided numeric evaluations for Gitahi, Jesse Elikanah_Machirah, but no written comments were submitted.

Query: Qualitative feedback for Gatumo
I couldn’t find that instructor in the records.

Query: Summarize student comments for Aloo
I couldn’t find that instructor in the records.

Query: What did learners say about Basweti?
I couldn’t find that instructor in the records.

Query: Give me feedback themes for 

ok let me try lowering the matching thresholds so that we can get some matches

In [56]:
def resolve_instructor_name(name_query: str, cursor, threshold=70):
    cursor.execute("SELECT faculty_id, full_name FROM Faculty")
    rows = cursor.fetchall()
    choices = {full_name: fid for (fid, full_name) in rows}
    matches = process.extract(name_query, choices.keys(), scorer=fuzz.token_set_ratio, limit=1)
    if matches:
        best_name, score, _ = matches[0]
        fid = choices[best_name]
        if score >= threshold:
            return fid, best_name, score
        else:
            # fallback: return anyway, but note low confidence
            return fid, best_name, score
    return None, None, 0

In [57]:
def normalize_name(name: str):
    return re.sub(r"[^a-zA-Z ]", "", name).lower().strip()

In [58]:
def resolve_instructor_name(name_query: str, cursor, threshold=70):
    # Try FeedbackCorpus first
    cursor.execute("SELECT DISTINCT faculty_id, instructor FROM FeedbackCorpus WHERE instructor IS NOT NULL")
    rows = cursor.fetchall()
    for fid, instr in rows:
        if fuzz.token_set_ratio(name_query.lower(), instr.lower()) >= threshold:
            return fid, instr, 100
    # Fallback to Faculty
    cursor.execute("SELECT faculty_id, full_name FROM Faculty")
    rows = cursor.fetchall()
    choices = {full_name: fid for (fid, full_name) in rows}
    matches = process.extract(name_query, choices.keys(), scorer=fuzz.token_set_ratio, limit=1)
    if matches:
        best_name, score, _ = matches[0]
        return choices[best_name], best_name, score
    return None, None, 0

In [59]:
# now to do some stress testing
# Ok lets test things out now
# Turn off debug output
DEBUG = False

# Qualitative stress test queries
test_queries = [
    "What feedback did students give Afundi Patrick?",             # direct feedback query
    "Summarize feedback for Mohamed",                              # summarization
    "Show sample feedback for Odoyo Fred",                         # sample quotes
    "How did students feel about Gitahi Jesse Elikanah_Machirah?", # synonym phrasing
    "Qualitative feedback for Gatumo",                             # alternate phrasing
    "Summarize student comments for Aloo",                         # fuzzy match test
    "What did learners say about Basweti?",                        # partial surname
    "Give me feedback themes for Ondiek",                          # theme extraction
    "Summarize qualitative feedback for Njuguna",                  # consultant-style summary
    "Feedback for a nonexistent instructor"                        # edge case handling
]

for q in test_queries:
    print("\nQuery:", q)
    print(get_consultant_response(q))


Query: What feedback did students give Afundi Patrick?
Based on evaluations, Afundi, Patrick Omuhinda has a mean score of 4.35 (A-).
Feedback is mostly mixed.
Breakdown: 33% positive, 67% neutral, 0% negative.
Common themes: none detected.

Query: Summarize feedback for Mohamed
Based on evaluations, Mohamed, Abdullahi Mohamed has a mean score of 4.50 (A).
Students provided numeric evaluations for Mohamed, Abdullahi Mohamed, but no written comments were submitted.

Query: Show sample feedback for Odoyo Fred
Based on evaluations, Odoyo, Fredrick has a mean score of 4.10 (B).
Students provided numeric evaluations for Odoyo, Fredrick, but no written comments were submitted.

Query: How did students feel about Gitahi Jesse Elikanah_Machirah?
Based on evaluations, Gitahi, Jesse Elikanah_Machirah has a mean score of 4.49 (A).
Students provided numeric evaluations for Gitahi, Jesse Elikanah_Machirah, but no written comments were submitted.

Query: Qualitative feedback for Gatumo
Based on eval

ok some red flags with the no qualitative comments.
mohammed,odoyo, gitahi, Njuguna are fine they don't have any comments fro the parsing or in the complete pdf. So those results are perfectly fine and are expected.
but with ondiek collins its not, he does have comments that appeared in the parsing and in the pdf, so that's a false negative. That's something of concern. But ill deal with it later but ive left a flag for it. 
Anyways ive read over the answers and they are fantastic but the na, nils and one word response aren't exactly useful. So we need to filter those out.
As well as improving the fallback for no qualitative feedback and the nonexistent instructor. Sample comments also need a new threshold of lets say currently 3 words

new update had to make some changes to functions up above so that marked down isnt fully right.  im thinking we just redo the function for no qualitative and the instructor match, based off of what we learnt from previous output because rn Afundi our lecturer 0 is no longer being found again.

In [60]:
# revised theme extraction
import re
from collections import Counter

def extract_themes(comments, top_k=5):
    stop = {
        "the","and","to","of","in","for","on","is","was","are","with","that","this","it","as","at","by","from","or","an","be","but","not",
        "none","n/a","na","nil","ok","good","excellent","great","fine","nice","fair",".","-","--"
    } # why are students so stubborn about punctuation
    tokens = []
    for c in comments:
        # normalize punctuation like underscores, double hyphens
        c = c.replace("_", " ").replace("--", " ")
        words = re.findall(r"[a-zA-Z][a-zA-Z'-]+", c.lower())
        # only keep words longer than 3 chars and not in stoplist
        tokens.extend(w for w in words if len(w) > 3 and w not in stop)
    common = Counter(tokens).most_common(top_k)
    return [w for w, _ in common]

In [61]:
# comments threshold i want the displayed comment to have atleast 3 words for now
def filter_sample_comments(comments, min_words=3, limit=3):
    filtered = []
    for c in comments:
        if not c: 
            continue
        # count words
        word_count = len(c.split())
        if word_count >= min_words and c.lower() not in {"n/a","na","nil","none","ok","good","."}:
            filtered.append(c)
    return filtered[:limit]

In [62]:
from rapidfuzz import fuzz, process
import re

def normalize_name(name: str):
    return re.sub(r"[^a-zA-Z ]", "", name).lower().strip()

def resolve_instructor_name(name_query: str, cursor, threshold=70):
    nq = normalize_name(name_query)

    # Collect candidates from Faculty
    cursor.execute("SELECT faculty_id, full_name FROM Faculty")
    faculty_rows = cursor.fetchall()
    matches = []
    for fid, full_name in faculty_rows:
        score = fuzz.token_set_ratio(nq, normalize_name(full_name))
        if score >= threshold:
            matches.append((fid, full_name, score))

    if not matches:
        return None, None, 0

    if len(matches) == 1:
        return matches[0]  # fid, full_name, score

    # Multiple matches → ambiguous
    return "AMBIGUOUS", [m[1] for m in matches], max(m[2] for m in matches)

In [63]:
# now to do some stress testing
# Ok lets test things out now
# Turn off debug output
DEBUG = False

# Qualitative stress test queries
test_queries = [
    "What feedback did students give Afundi Patrick?",             # direct feedback query
    "Summarize feedback for Mohamed",                              # summarization
    "Show sample feedback for Odoyo Fred",                         # sample quotes
    "How did students feel about Gitahi Jesse Elikanah_Machirah?", # synonym phrasing
    "Qualitative feedback for Gatumo",                             # alternate phrasing
    "Summarize student comments for Aloo",                         # fuzzy match test
    "What did learners say about Basweti?",                        # partial surname
    "Give me feedback themes for Ondiek Collins",                  # theme extraction
    "Summarize qualitative feedback for Njuguna",                  # consultant-style summary
    "Feedback for a nonexistent instructor"                        # edge case handling
]

for q in test_queries:
    print("\nQuery:", q)
    print(get_consultant_response(q))


Query: What feedback did students give Afundi Patrick?
Based on evaluations, Afundi, Patrick Omuhinda has a mean score of 4.35 (A-).
Feedback is mostly mixed.
Breakdown: 33% positive, 67% neutral, 0% negative.
Common themes: none detected.

Query: Summarize feedback for Mohamed
There are multiple instructors with that name. Please specify the full name, e.g.: Mohamed, Mwanashehe S., Mohamed, Abdullahi Mohamed, Mohamed, Hussein Abdi

Query: Show sample feedback for Odoyo Fred
I couldn’t find that instructor in the records.

Query: How did students feel about Gitahi Jesse Elikanah_Machirah?
Based on evaluations, Gitahi, Jesse Elikanah_Machirah has a mean score of 4.49 (A).
Students provided numeric evaluations for Gitahi, Jesse Elikanah_Machirah, but no written comments were submitted.

Query: Qualitative feedback for Gatumo
Based on evaluations, Gatumo, Francis Mambo has a mean score of 4.53 (A).
Feedback is mostly positive.
Breakdown: 33% positive, 67% neutral, 0% negative.
Common the

alright seems like things are good here i can start adding another level of complexity. ive decided i just need to  change the fallback message when only name is given for a lec. Cause with the ondiek case we have two ondieks one with a second name of collins and another one with a second name of benard and the response changes when you add in the second name. So the code was fine the AI did its best and choose not to answer. Because we never specified which one. So there should be a fallback for such cases. a message should be placed to ask for a second name because there are more than one in the system. Or i could be lazy with it now and just recommend to place in two names for now then comeback and ran an update on the response later.

decided not to be lazy let me update the resolver code.

ok so now you can see that it asks you to specify so thats good. But ill stay say to use both names

Anyways we are done with the micro level AI consulant and have a greate base foundation lets move onto the meso level

# Meso level

At this level, we’re aggregating across multiple lecturers/courses within a department or school, so the outputs shift from individual consultant summaries to patterns, outliers, and trends.


- Department Sentiment Overview
- Aggregate closed‑ended ratings (mean scores, grade distributions).
- Aggregate open‑ended sentiment (positive/neutral/negative percentages).
- Top Strengths (Consensus Themes)
- Themes mentioned positively in ≥ X% of courses.
- Example student voice.
- Common Areas for Improvement
- Themes mentioned constructively in ≥ Y% of courses.
- Example student voice.
- Notable Outliers
- High performers (lecturers/courses with standout positive scores).
- Needs attention (lowest scores or recurring negative themes).
- Emerging Trends vs. Last Semester
- Positive themes increasing.
- Negative themes rising.
- Recommended Department‑Level Actions
- Practical, role‑specific recommendations (e.g., pacing workshops, feedback turnaround policy).


In [64]:
# ayt lets beign with a baseline report that agregrates numeric data and qualitative sentiment
def generate_meso_sentiment_report(department_id, cursor):
    """
    Baseline meso-level report: aggregate numeric and qualitative sentiment
    for a given department.
    """

    # 1. Aggregate numeric evaluations
    cursor.execute("""
        SELECT AVG(mean_score), COUNT(*)
        FROM Evaluations
        WHERE department_id = ?
    """, (department_id,))
    avg_score, course_count = cursor.fetchone()

    cursor.execute("""
        SELECT letter_grade, COUNT(*)
        FROM Evaluations
        WHERE department_id = ?
        GROUP BY letter_grade
    """, (department_id,))
    grade_distribution = {lg: cnt for lg, cnt in cursor.fetchall()}

    # 2. Collect qualitative comments
    cursor.execute("""
        SELECT comment_text
        FROM FeedbackCorpus
        WHERE department_id = ?
    """, (department_id,))
    comments = [r[0] for r in cursor.fetchall() if r[0]]

    # 3. Sentiment analysis (reuse your analyze_feedback function)
    sentiment_summary = analyze_feedback(comments) if comments else None

    # 4. Build baseline narrative
    report_lines = []
    report_lines.append(f"Department ID {department_id} — Baseline Sentiment Report")
    report_lines.append(f"Average score: {avg_score:.2f} across {course_count} courses.")
    report_lines.append("Grade distribution:")
    for lg, cnt in grade_distribution.items():
        report_lines.append(f"  - {lg}: {cnt} courses")

    if sentiment_summary:
        report_lines.append(
            f"Open-ended sentiment: {sentiment_summary['pos_pct']:.0%} positive, "
            f"{sentiment_summary['neu_pct']:.0%} neutral, "
            f"{sentiment_summary['neg_pct']:.0%} negative."
        )
    else:
        report_lines.append("No qualitative comments available for this department.")

    return "\n".join(report_lines)

In [66]:
# gotta tun a pragma check on the db
cursor.execute("PRAGMA table_info(FeedbackCorpus)")
print(cursor.fetchall())

[(0, 'feedback_id', 'INTEGER', 0, None, 1), (1, 'faculty_id', 'INTEGER', 0, None, 0), (2, 'instructor', 'TEXT', 0, None, 0), (3, 'course_code', 'TEXT', 0, None, 0), (4, 'section_code', 'TEXT', 0, None, 0), (5, 'semester', 'TEXT', 0, None, 0), (6, 'comment_type', 'TEXT', 0, None, 0), (7, 'comment_text', 'TEXT', 0, None, 0)]


In [65]:
# ayt lets now test out the function by testing out chandaria as our baseline
# Example baseline test for meso-level reporting

# Pick a department_id that exists in your Evaluations/FeedbackCorpus tables
test_department_id = "School of Scie and Tech"   # replace with a real ID from your DB

# The baseline meso sentiment report
print("=== Baseline Meso-Level Sentiment Report ===")
print(generate_meso_sentiment_report(test_department_id, cursor))

=== Baseline Meso-Level Sentiment Report ===


OperationalError: no such column: department_id