In [1]:
# Jupyter Notebook for rapid prototyping and debugging individual modules (e.g., testing PDF ingestion, RAG hint generation)
import pandas as pd

In [2]:
!pwd

/Users/pedram/Projects/GitMyself/AITutorApp/docs/notebooks


In [3]:
# load questions.csv
pd.read_csv('../../data/questions.csv')

Unnamed: 0,id,question,answer_1,answer_2,answer_3,answer_4,correct_answer,skill
0,1,We want to distinguish between three different...,"scipy, numpy, matplotlib, pandas, sklearn","numpy, matplotlib, pandas, seaborn","matplotlib, pandas, sklearn, numpy","scikit-learn, pandas, pandas, seaborn",3,[Data Loading]
1,2,"load the dataset\nurl = ""https://raw.githubuse...","dataset = read_csv(url, names=names)","dataset = read_file(url, names=names)","dataset = read(url, names)","dataset = read_file(url, title=names)",1,[Data Loading]
2,3,Which one is not true about the data?,"150 samples, with 4 attributes (same units, al...",Balanced class distribution (50 samples for ea...,The first 10 data are Iris-setosa,The mean of sepal width is 5.843,4,[Exploratory Data Analysis (EDA)]
3,4,Which of the following methods are not suitabl...,"Logistic Regression (LR), Linear Discriminant ...","K-Nearest Neighbors (KNN), Classification and ...","Gaussian Naive Bayes (NB), Support Vector Mach...","Linear regression (LR), Decision tree (DT)",4,[Supervised Learning]
4,5,Which of the following is not required in IRIS...,Set-up test harness,Handling missing data,Separate validation/test/train,Build prediction models and accuracy measures,2,"[Data Cleaning, Evaluation Metrics]"


In [4]:
# stage 2
import requests
import json

url = "http://127.0.0.1:8000/hints/"
payload = {
    "user_id": "test_user_1",
    "question_number": 5,
    "user_answer": "I think the answer is 1 we donot need to set-up test harness"
}
headers = {
    "Content-Type": "application/json"
}

response = requests.post(url, headers=headers, data=json.dumps(payload))

print(response.status_code)
print(response.json())

200
{'question_number': 5, 'hint': 'Think about what a "test harness" does in the context of machine learning.  Is it essential for evaluating a model\'s performance on unseen data', 'user_id': 'test_user_1', 'hint_style': 'Conceptual'}


In [5]:
# stage 3
import requests
import json
url = "http://127.0.0.1:8000/answer/"
payload = {
    "user_id": "test_user_1",
    "question_number": 1,
    "user_answer": "2", 
    "time_taken_ms": 15000
}
headers = {
    "Content-Type": "application/json"
}
response = requests.post(url, headers=headers, data=json.dumps(payload))

print(response.status_code)
print(response.json())

200
{'correct': False, 'correct_answer': '3', 'skill': '[Data Loading]', 'intervention_needed': True, 'current_mastery': 0.17575757575757575}


In [7]:
#stage 3
# check users endpoint

base_url = "http://127.0.0.1:8000/users"
user_id = "test_user_1"

# Check /users/{user_id}/bkt
bkt_url = f"{base_url}/{user_id}/bkt"
bkt_response = requests.get(bkt_url)
print(f"Status Code for /bkt: {bkt_response.status_code}")
print("Response for /bkt:", bkt_response.json())

# Check /users/{user_id}/state
state_url = f"{base_url}/{user_id}/profile"
state_response = requests.get(state_url)
print(f"Status Code for /state: {state_response.status_code}")
print("Response for /state:", state_response.json())

Status Code for /bkt: 200
Response for /bkt: {'[Exploratory Data Analysis (EDA)]': 0.2, '[Data Loading]': 0.17575757575757575, '[Data Cleaning, Evaluation Metrics]': 0.2, '[Supervised Learning]': 0.2}
Status Code for /state: 200
Response for /state: {'user_id': 'test_user_1', 'created_at': '2025-09-06T18:46:18.148805', 'preferences': {'preferred_hint_style': 'Automatic', 'feedback_preference': 'immediate'}, 'feedback_scores': {}, 'skill_mastery': [{'skill_id': '[Data Loading]', 'mastery_level': 0.17575757575757575, 'consecutive_errors': 1}], 'interaction_history': [{'timestamp': '2025-09-06T18:46:22.144521', 'question_id': 1, 'skill': '[Data Loading]', 'action': 'answered', 'user_answer': '2', 'is_correct': False, 'time_taken_ms': 15000}, {'timestamp': '2025-09-06T18:46:19.489693', 'question_id': 5, 'skill': '[Data Cleaning, Evaluation Metrics]', 'action': 'requested_hint', 'user_answer': None, 'is_correct': None, 'time_taken_ms': None}]}


In [12]:
# Stage 3: Test Answer Submission and BKT Update
print("--- Testing Stage 3: Answer Submission ---")

base_url = "http://127.0.0.1:8000"
user_id = "stage3_test_user"

# 1. Submit an answer to a question
answer_payload = {
   "user_id": user_id,
   "question_number": 1,
   "user_answer": "4" # Assuming '4' is the correct answer for question 1
    }
answer_url = f"{base_url}/answer/"
answer_response = requests.post(answer_url, json=answer_payload)
 
print(f"Status Code for /answer: {answer_response.status_code}")
print("Response for /answer:", json.dumps(answer_response.json(), indent=2))
print("-" * 20)
# 2. Check the user's BKT mastery state after answering
bkt_url = f"{base_url}/users/{user_id}/bkt"
bkt_response = requests.get(bkt_url)
print(f"Status Code for /users/{{user_id}}/bkt: {bkt_response.status_code}")
print("Response for /users/{user_id}/bkt:", json.dumps(bkt_response.json(), indent=2))

--- Testing Stage 3: Answer Submission ---
Status Code for /answer: 200
Response for /answer: {
  "correct": false,
  "correct_answer": "3",
  "skill": "[Data Loading]",
  "intervention_needed": true,
  "current_mastery": 0.1720680393912265
}
--------------------
Status Code for /users/{user_id}/bkt: 200
Response for /users/{user_id}/bkt: {
  "[Exploratory Data Analysis (EDA)]": 0.2,
  "[Data Loading]": 0.1720680393912265,
  "[Data Cleaning, Evaluation Metrics]": 0.2,
  "[Supervised Learning]": 0.2
}


In [9]:
base_url = "http://127.0.0.1:8000"
user_id = "stage3_test_user"

preferences_payload = {
"preferred_hint_style": "Analogy",
 "feedback_preference": "on_demand"
    }
preferences_url = f"{base_url}/users/{user_id}/preferences"
preferences_response = requests.put(preferences_url, json=preferences_payload)
 
print(f"Status Code for PUT /preferences: {preferences_response.status_code}")
print("Response for PUT /preferences:", json.dumps(preferences_response.json(), indent=2))
print("-" * 20)

# 2. Request a hint to see if the new preference is used
hint_payload = {
 "user_id": user_id,
  "question_number": 2
}
hint_url = f"{base_url}/hints/"
hint_response = requests.post(hint_url, json=hint_payload)

print(f"Status Code for /hints: {hint_response.status_code}")
hint_data = hint_response.json()
print("Response for /hints:", json.dumps(hint_data, indent=2))
print("-" * 20)

# 3. Submit feedback on the hint that was just received
feedback_payload = {
   "user_id": user_id,
   "question_id": 2,
   "hint_style": hint_data.get("hint_style"), # Use the style from the hint response
   "rating": 5,
   "comment": "This analogy was very helpful!"
   }
feedback_url = f"{base_url}/feedback/"
feedback_response = requests.post(feedback_url, json=feedback_payload) 
print(f"Status Code for /feedback: {feedback_response.status_code}")
print("Response for /feedback:", json.dumps(feedback_response.json(), indent=2))

Status Code for PUT /preferences: 200
Response for PUT /preferences: {
  "preferred_hint_style": "Analogy",
  "feedback_preference": "on_demand"
}
--------------------
Status Code for /hints: 200
Response for /hints: {
  "question_number": 2,
  "hint": "Imagine you're ordering takeout.  You have a list of restaurants (URLs) and want to get the menu (the dataset).  You need",
  "user_id": "stage3_test_user",
  "hint_style": "Analogy"
}
--------------------
Status Code for /feedback: 200
Response for /feedback: {
  "message": "Feedback received and recorded"
}


In [10]:
# Stage 4.5: Test Adaptive Hint Selection and Hybrid Feedback
import requests
import json
import time

print("--- Testing Stage 4.5: Adaptive Logic ---")

base_url = "http://127.0.0.1:8000"
user_id = "stage4_5_test_user"

# --- Setup: Give feedback to establish a "best" style ---
print("\n1. Seeding feedback to train the model...")
# Give "Analogy" a high rating
feedback_payload_good = {
    "user_id": user_id,
    "question_id": 1,
    "hint_style": "Analogy",
    "rating": 5,
    "comment": "This was great!"
}
requests.post(f"{base_url}/feedback/", json=feedback_payload_good)

# Give "Socratic Question" a low rating
feedback_payload_bad = {
    "user_id": user_id,
    "question_id": 1,
    "hint_style": "Socratic Question",
    "rating": 1,
    "comment": "This was confusing."
}
requests.post(f"{base_url}/feedback/", json=feedback_payload_bad)
print("Feedback seeded.")
print("-" * 20)


# --- Test 1: Adaptive Selection (Exploitation) ---
# Set user preference to "Automatic" to enable the adaptive logic
print("\n2. Testing Adaptive Hint Selection (Exploitation)...")
preferences_payload = {
    "preferred_hint_style": "Automatic",
    "feedback_preference": "on_demand"
}
requests.put(f"{base_url}/users/{user_id}/preferences", json=preferences_payload)

# Request a hint. With a low exploration rate, it should pick "Analogy".
hint_payload = {"user_id": user_id, "question_number": 2}
hint_response = requests.post(f"{base_url}/hints/", json=hint_payload)

print(f"Status Code for /hints: {hint_response.status_code}")
hint_data = hint_response.json()
print("Response for /hints:", json.dumps(hint_data, indent=2))
print("-" * 20)


# --- Test 2: Post-Hint BKT Performance Tracking ---
print("\n3. Testing Post-Hint BKT Performance Tracking...")
# First, get a hint for a new question to set the pre-hint mastery
hint_payload_2 = {"user_id": user_id, "question_number": 3}
hint_response_2 = requests.post(f"{base_url}/hints/", json=hint_payload_2)
hint_style_given = hint_response_2.json()["hint_style"]
print(f"Hint given for Question 3 was style: '{hint_style_given}'")

# Wait a moment to simulate user thinking time
time.sleep(1)

# Now, submit a correct answer, flagging that a hint was shown
answer_payload = {
    "user_id": user_id,
    "question_number": 3,
    "user_answer": "1",  # Assuming '1' is correct for question 3
    "time_taken_ms": 1000,
    "hint_shown": True
}
answer_response = requests.post(f"{base_url}/answer/", json=answer_payload)
print(f"Status Code for /answer: {answer_response.status_code}")
print("Response for /answer:", json.dumps(answer_response.json(), indent=2))
print("Check server logs to confirm implicit feedback was recorded.")
print("-" * 20)

--- Testing Stage 4.5: Adaptive Logic ---

1. Seeding feedback to train the model...
Feedback seeded.
--------------------

2. Testing Adaptive Hint Selection (Exploitation)...
Status Code for /hints: 200
Response for /hints: {
  "question_number": 2,
  "hint": "Considering the provided URL and the `names` list, what data type do you expect the loaded dataset to have?\n",
  "user_id": "stage4_5_test_user",
  "hint_style": "Socratic Question"
}
--------------------

3. Testing Post-Hint BKT Performance Tracking...
Hint given for Question 3 was style: 'Analogy'
Status Code for /answer: 200
Response for /answer: {
  "correct": false,
  "correct_answer": "4",
  "skill": "[Exploratory Data Analysis (EDA)]",
  "intervention_needed": true,
  "current_mastery": 0.17575757575757575
}
Check server logs to confirm implicit feedback was recorded.
--------------------


In [None]:
import requests
import json
import time
BASE_URL = "http://127.0.0.1:8000"
USER_ID = f"stage5_5_notebook_user_{int(time.time())}"

def print_response(name, response):
    """Helper function to print formatted API responses."""
    print(f"--- {name} ---")
    print(f"Status Code: {response.status_code}")
    try:
            print(json.dumps(response.json(), indent=2))
    except json.JSONDecodeError:
            print(response.text)
    print("-" * (len(name) + 8))
    print()
def run_stage5_5_test():
    """Runs the test scenario for Stage 5.5."""
    print(f"--- Running Test Scenario for Stage 5.5 with User ID: {USER_ID} ---")
    # 1. Create a new user
    response = requests.post(f"{BASE_URL}/users/", json={"user_id": USER_ID})
    if response.status_code not in [200, 201, 409]:
        print("Failed to create user. Aborting test.")
        print_response("Create User", response)
        return
    print_response("1. Create User", response)
    # 2. Answer a multiple_choice question incorrectly
    mc_payload = {
    "user_id": USER_ID,
    "question_number": 1,
    "user_answer": "1",  # Correct is 3
    }
    response = requests.post(f"{BASE_URL}/answer/", json=mc_payload)
    print_response("2. Answer Multiple Choice (Incorrect)", response)
    
    # 3. Answer a fill_in_the_blank question incorrectly
    fitb_payload = {
            "user_id": USER_ID,
            "question_number": 6,
            "user_answer": "Berlin",  # Correct is "Paris"
        }
    print_response("3. Answer Fill-in-the-Blank (Incorrect)", response)
    
    # 4. Request a hint for the fill-in-the-blank question
    # The RAG agent should now have the context of the two previous incorrect answers.
    print(">>> NOTE: Check the server logs to see the 'user_history' block sent to the LLM. <<<")
    hint_payload = {
            "user_id": USER_ID,
            "question_number": 6,
   }
    response = requests.post(f"{BASE_URL}/hints/", json=hint_payload)
    print_response("4. Request Hint (with history)", response) 
    # 5. Retrieve the user's profile to verify the interaction history
    response = requests.get(f"{BASE_URL}/users/{USER_ID}/profile")
    print_response("5. Get User Profile (with history)", response)
    print("--- Test Scenario for Stage 5.5 Complete ---")

# To run this test, ensure the FastAPI server is running, then execute this function.
run_stage5_5_test()

### Expected Output Behavior:
#
# 1.  **Create User**: Should return a 200 or 201 status code with a success message.
# 2.  **Answer Multiple Choice (Incorrect)**: Should return `{"correct": false, ...}`.
# 3.  **Answer Fill-in-the-Blank (Incorrect)**: Should return `{"correct": false, ...}`.
# 4.  **Request Hint (with history)**: Should return a hint. The key thing to check is the **server logs**, where you should see a prompt sent to the LLM that includes a `user_history` section summarizing the two incorrect  answers.
# 5.  **Get User Profile (with history)**: The `interaction_history` array in the JSON response should contain three entries corresponding to the two incorrect answers and the hint request, showing the system is correctly logging  the expanded user interactions.

--- Running Test Scenario for Stage 5.5 with User ID: stage5_5_notebook_user_1757193583 ---
--- 1. Create User ---
Status Code: 200
{
  "user_id": "stage5_5_notebook_user_1757193583",
  "created_at": "2025-09-06T21:19:55.252806",
  "preferences": {
    "preferred_hint_style": "Automatic",
    "feedback_preference": "immediate"
  },
  "feedback_scores": {},
  "skill_mastery": [],
  "interaction_history": []
}
----------------------

--- 2. Answer Multiple Choice (Incorrect) ---
Status Code: 200
{
  "correct": false,
  "correct_answer": "3",
  "skill": "[Data Loading]",
  "intervention_needed": true,
  "current_mastery": 0.17575757575757575
}
---------------------------------------------

--- 3. Answer Fill-in-the-Blank (Incorrect) ---
Status Code: 200
{
  "correct": false,
  "correct_answer": "3",
  "skill": "[Data Loading]",
  "intervention_needed": true,
  "current_mastery": 0.17575757575757575
}
-----------------------------------------------

>>> NOTE: Check the server logs to see t