In [1]:
import asyncio
from grader import TIERS, grade_tier
from pathlib import Path
from agents import Agent
from main import setup_client

async def run_grader(settings, tier: TIERS, report: Path):
    client = setup_client()

    agent = Agent(settings)

    response = await grade_tier(
        client,
        agent,
        tier_name=tier,
        report_contents=report.read_text(),
        rubric_contents=Path("rubrics/scc-rubric.md").read_text()
    )
    return response 


In [2]:

async def run_consistency_test(settings, tier: TIERS, report, num_iterations):
    all_responses = []
    
    for i in range(num_iterations):
        response = await run_grader(settings, tier, report)
        
        response_summary = [True if ('[X]' in line) else False for line in response.splitlines()]
        all_responses.append(response_summary)
        
        print(response)
    return all_responses


In [3]:
settings = {
    "prompt" : Path("rubrics/tier-grader.md").read_text(),
    "model" : "gpt-5-mini",
    "reasoning" : "low",
}

In [ ]:
import pandas as pd

In [18]:
results_baseline = await run_consistency_test(settings, TIERS.BASELINE, Path("reports/scc-student-report.md"), 3)

print(results_baseline)

df_baseline = pd.DataFrame(results_baseline)
df_baseline


- [X] Design experience
    - [X] State your discussion partner
    - [X] Provide a brief summary of your conversation
- [ ] Implement a pre-order/post-order traversal in `scc.py` 
- [ ] Pass baseline tests
- [ ] Complete an analysis of Pre/Post Order Traversal
    - [ ] Theoretical time
        - [ ] Include annotated code in report
        - [ ] Annotate all non-trivial parts of your code
        - [X] Provide explanation of time complexity
            - [ ] State all assumptions clearly
        - [ ] Bold final time complexity
    - [ ] Theoretical space
        - [ ] Include annotated code in report
        - [ ] Annotate all non-trivial parts of your code
        - [X] Provide explanation of time complexity
            - [ ] State all assumptions clearly
        - [ ] Bold final time complexity
    - [X] Empirical data
        - [X] Fill in empirical runtimes table
    - [X] Comparison of Theoretical and Empirical Results
        - [X] Provide plot comparing theoretical time compl

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,17,18,19,20,21,22,23,24,25,26
0,True,True,True,False,False,False,False,False,False,True,...,False,True,True,True,True,True,True,False,False,False
1,True,True,True,False,False,True,True,True,False,True,...,False,True,True,True,True,True,True,False,False,False
2,True,True,True,False,False,False,True,True,True,True,...,False,True,True,True,True,True,True,,,


In [None]:
results_core = await run_consistency_test(settings, TIERS.CORE, Path("reports/scc-student-report.md"), 3)

print(results_core)

df_core = pd.DataFrame(results_core)
df_core

- [X] Design experience
    - [X] State your discussion partner
    - [X] Provide a brief summary of your conversation
- [X] Implement the Strongly Connected Components (SCC) algorithm 
- [X] Pass all core tests
- [ ] Complete an analysis of SCC
    - [ ] Theoretical time
        - [ ] Include annotated code in report
        - [ ] Annotate all non-trivial parts of your code
        - [X] Provide explanation of time complexity
            - [X] State all assumptions clearly
        - [ ] Bold final time complexity
    - [ ] Theoretical space
        - [ ] Include annotated code in report
        - [ ] Annotate all non-trivial parts of your code
        - [X] Provide explanation of time complexity
            - [X] State all assumptions clearly
        - [ ] Bold final time complexity
    - [X] Empirical data
        - [X] Fill in empirical runtimes table
    - [X] Comparison of Theoretical and Empirical Results
        - [X] Provide plot comparing theoretical time complexity to observe

In [17]:
results_stretch_1 = await run_consistency_test(settings, TIERS.STRETCH_1, Path("reports/scc-student-report.md"), 3)

print(results_stretch_1)

df_stretch_1 = pd.DataFrame(results_stretch_1)
df_stretch_1

- [X] Design experience
    - [X] State your discussion partner
    - [X] Provide a brief summary of your conversation
- [ ] Write code to identify all *tree*, *forward*, *back*, and *cross* edges in a graph
- [ ] Pass all stretch 1 tests
- [ ] Comment on why identifying articulation points in a graph is useful.
    - [ ] In your own words, explain what an articulation point is
    - [X] Provide at least two real-world examples of articulation points
        - [ ] Bold the name of your examples
        - [ ] Explain what the nodes and edges represent in your examples
        - [X] Explain what the articulation point represents in your examples
        - [X] Discuss why it would be useful to know the articulation point in your given examples
- [X] Design experience
    - [X] State your discussion partner
    - [X] Provide a brief summary of your conversation
- [ ] Write code to identify all *tree*, *forward*, *back*, and *cross* edges in a graph
- [ ] Pass all stretch 1 tests
- [X] Comm

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,True,True,True,False,False,False,False,True,False,False,True,True
1,True,True,True,False,False,True,False,False,False,False,False,False
2,True,True,True,False,False,False,False,True,False,True,True,True


In [15]:
results_stretch_2 = await run_consistency_test(settings, TIERS.STRETCH_2, Path("reports/scc-student-report.md"), 3)

print(results_stretch_2)

df_stretch_2 = pd.DataFrame(results_stretch_2)
df_stretch_2

- [X] Design experience[ ] State your discussion partner[ ] Provide a brief summary of your conversation
- [X] State your discussion partner
- [X] Provide a brief summary of your conversation
- [X] Pick a real-world dataset[ ] Your dataset should usedirectededges (this is important!)
- [X] Your dataset should usedirectededges (this is important!)
- [X] Describe the dataset in your report[ ] How many nodes/edges?[ ] What do the nodes/edges mean? (users/companies/articles/etc; follows/trades/citations/etc.)
- [X] How many nodes/edges?
- [X] What do the nodes/edges mean? (users/companies/articles/etc; follows/trades/citations/etc.)
- [X] Adapt the data for your algorithm
- [X] Run your SCC algorithm on the graph
- [X] Provide a discussion on your findings
- [X] Design experience[ ] State your discussion partner[ ] Provide a brief summary of your conversation
- [X] State your discussion partner
- [X] Provide a brief summary of your conversation
- [X] Pick a real-world dataset[ ] Your datas

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,True,True,True,True,True,True,True,True,True,True,True
1,True,True,True,True,True,True,True,True,True,True,True
2,True,True,True,True,True,True,True,True,True,True,True
