In [23]:
import pandas as pd
import os

from inference import Inference
from rouge_score import rouge_scorer

initial_message = {"role": "assistant", "content": "How can I help you?"}

inf = Inference()
test_inputs = [
    [initial_message, {"role": "user", "content": "We will talk about the MEng in AI at Duke"}],
    [initial_message, {"role": "user", "content": "How much is the tuition?"}],
    [initial_message, {"role": "user", "content": "What's the duration of the program?"}],
    [initial_message, {"role": "user", "content": "What are the courses offered in the program?"}],
    [initial_message, {"role": "user", "content": "What are some of the goals of the program?"}],
]

expected_outputs = [
    "Ok, the Master of Engineering in Artificial Intelligence program at Duke University offers opportunities for students to pursue advanced studies in AI. You can ask me about the courses, the tuition and admission requirements for example.",
    "Tuition for campus-based Duke Master of Engineering programs for the 2023-2024 academic year is $31,760 per semester taken at the university. Tuition for the Master of Engineering in AI over two semesters and a summer session would result in a total tuition cost of $73,048.",
    "The normal duration of the Master of Engineering in AI program is one year of study (2 semesters and a summer session); however, the program can be extended for an additional Fall semester to complete in 16 months.",
    "AIPI 510: Sourcing Data for Analytics, AIPI 520: Modeling Process & Algorithms, AIPI 540: Deep Learning Applications,AIPI 561: Operationalizing AI (MLOps), AIPI 549: Industry Capstone Project, MENG 570: Business Fundamentals for Engineers, AIPI 501: Industry Seminar Series, EGR 590: Career Strategy & Design and there are also other technical electives to choose from.",
    "This degree's core curriculum was developed in collaboration with the industry. Build a personal portfolio of real-world, hands-on AI and machine-learning projects. Receive individual advising, academic and career, from outstanding, world-class faculty. Be engaged with peers from around the world as part of a small, intimate, and immersive cohort. Our curriculum covers the theory and application of AI and machine learning, heavily emphasizing hands-on learning via real-world problems and projects in each course. Students also have two opportunities to work directly with industry leaders during the program: through the semester-long industry project and their summer internship."
]

scorer = rouge_scorer.RougeScorer(['rouge1'], use_stemmer=True)

scores_rag = []
scores_norag = []

for q in range(len(test_inputs)):
    print(test_inputs[q])
    print(expected_outputs[q])

for q in range(len(test_inputs)):
    pred_rag = inf.generate_response(True, test_inputs[q])
    pred_norag = inf.generate_response(False, test_inputs[q])
    scores_rag.append(scorer.score(pred_rag, expected_outputs[q]))
    scores_norag.append(scorer.score(pred_norag, expected_outputs[q]))
    # wait 1 minute to avoid openai rate limit
    os.system("sleep 60")


[{'role': 'assistant', 'content': 'How can I help you?'}, {'role': 'user', 'content': 'We will talk about the MEng in AI at Duke'}]
Ok, the Master of Engineering in Artificial Intelligence program at Duke University offers opportunities for students to pursue advanced studies in AI. You can ask me about the courses, the tuition and admission requirements for example.
[{'role': 'assistant', 'content': 'How can I help you?'}, {'role': 'user', 'content': 'How much is the tuition?'}]
Tuition for campus-based Duke Master of Engineering programs for the 2023-2024 academic year is $31,760 per semester taken at the university. Tuition for the Master of Engineering in AI over two semesters and a summer session would result in a total tuition cost of $73,048.
[{'role': 'assistant', 'content': 'How can I help you?'}, {'role': 'user', 'content': "What's the duration of the program?"}]
The normal duration of the Master of Engineering in AI program is one year of study (2 semesters and a summer sess

In [27]:
rag_scores_df = pd.DataFrame({'precision': [score["rouge1"].precision for score in scores_rag], 'recall': [score["rouge1"].recall for score in scores_rag], 'fmeasure': [score["rouge1"].fmeasure for score in scores_rag]})
no_rag_scores_df = pd.DataFrame({'precision': [score["rouge1"].precision for score in scores_norag], 'recall': [score["rouge1"].recall for score in scores_norag], 'fmeasure': [score["rouge1"].fmeasure for score in scores_norag]})

print("RAG Scores")
display(rag_scores_df)

print("No RAG Scores")
display(no_rag_scores_df)

RAG Scores


Unnamed: 0,precision,recall,fmeasure
0,0.25,0.428571,0.315789
1,0.632653,0.563636,0.596154
2,0.263158,0.3125,0.285714
3,0.019231,0.071429,0.030303
4,0.346154,0.288,0.31441


No RAG Scores


Unnamed: 0,precision,recall,fmeasure
0,0.75,0.077586,0.140625
1,0.244898,0.164384,0.196721
2,0.052632,0.2,0.083333
3,0.019231,0.037037,0.025316
4,0.278846,0.149485,0.194631


In [29]:
# create new data frame with aggregated scores one row for each model (RAG and No RAG)
aggregated_scores = pd.DataFrame({
    "RAG": [rag_scores_df.precision.mean(), rag_scores_df.recall.mean(), rag_scores_df.fmeasure.mean()],
    "No RAG": [no_rag_scores_df.precision.mean(), no_rag_scores_df.recall.mean(), no_rag_scores_df.fmeasure.mean()]
}, index=["precision", "recall", "fmeasure"])

display(aggregated_scores)

Unnamed: 0,RAG,No RAG
precision,0.302239,0.269121
recall,0.332827,0.125698
fmeasure,0.308474,0.128125
