In [17]:
import polars as pl
import json 

muntasir = pl.read_csv('Muntasir -- 2025-12-17 __ PCEX Dataset - pcex_python.csv')
paws_catalog_v2 = json.load(open('paws-catalog_v2.json', 'r'))

In [18]:
paws_catalog_v2_identity_map = dict()
for item in paws_catalog_v2:
    paws_catalog_v2_identity_map[item['identity']['id']] = item['id']

muntasir = muntasir.with_columns([
    pl.struct(
        pl.col('set_name'),
        pl.col('item_name'),
        pl.col('item_type'),
    ).map_elements(
        lambda row: paws_catalog_v2_identity_map.get(row['set_name'], '')
        if row['item_type'] == 'PCEX Set' 
        else paws_catalog_v2_identity_map.get(row['item_name'], ''),
        return_dtype=pl.Utf8
    ).alias('paws_catalog_v2_id')
])

muntasir.head(5)

item_language,set_name,item_name,item_type,item_statement,item_code,item_url,paws_catalog_v2_id
str,str,str,str,str,str,str,str
"""PYTHON""","""py_work_hours""","""py_work_hours1""","""PCEX Set""","""Suppose we have an input file …","""#Step 1: Define the function d…","""http://pawscomp2.sis.pitt.edu/…","""6954bccff9187b06cf8372d2"""
"""PYTHON""","""py_work_hours""","""py_work_hours2""","""PCEX Challenge""","""Suppose we have an input file …","""#Step 1: Define the function d…","""http://pawscomp2.sis.pitt.edu/…","""6954bccff9187b06cf837149"""
"""PYTHON""","""py_win_percentage""","""py_win_percentage""","""PCEX Set""","""Construct a program that recei…","""#Step 1: Assign initial values…","""http://pawscomp2.sis.pitt.edu/…","""6954bccff9187b06cf837302"""
"""PYTHON""","""py_win_percentage""","""py_win_percentage_input""","""PCEX Challenge""","""Construct a program that recei…","""#Step 1: Read the number of ga…","""http://pawscomp2.sis.pitt.edu/…","""6954bccff9187b06cf837155"""
"""PYTHON""","""py_win_percentage""","""py_win_percentage_won_equal""","""PCEX Challenge""","""Construct a program that recei…","""#Step 1: Assign initial values…","""http://pawscomp2.sis.pitt.edu/…","""6954bccff9187b06cf837156"""


In [19]:
# muntasir.filter(pl.col('paws_catalog_v2_id') == '')[['set_name', 'item_name', 'item_type']].to_dicts()

# 

import os

if not os.path.exists('kcs-from-paws-catalog-v2.json'):
    import requests

    kcs = dict()
    for id in muntasir['paws_catalog_v2_id'].unique():
        if len(id) == 0:
            continue
        item = requests.get(f'http://adapt2.sis.pitt.edu/next.course-authoring/api/catalog-v2/{id}').json()
        classification = item['classification']
        print(id, 'processing...')
        kcs[id] = classification['knowledge_components'] if 'knowledge_components' in classification else dict()
        print('--> done! len=', len(kcs[id]))

    json.dump(kcs, open('kcs-from-paws-catalog-v2.json', 'w'), indent=2)

kcs = json.load(open('kcs-from-paws-catalog-v2.json', 'r'))

# --- all kc set names ---
# 'alice_edu_py_ontology expert'
# 'Arun added 08.12.2025 Arun Parser v0.1'
# 'Arun added 08.15.2025 Arun Parser v0.1'
# 'Arun parser v0.1'
# 'parser'
# 'Rafaella added 09.03.2025 alice_edu_py_ontology'
# 'unspecified'

In [20]:
for id, item_kcs in kcs.items():
    kc_set1 = sorted(
        item_kcs['alice_edu_py_ontology expert']['concepts'] 
        if 'alice_edu_py_ontology expert' in item_kcs else dict()
    )
    kc_set2 = sorted(
        item_kcs['Rafaella added 09.03.2025 alice_edu_py_ontology']['concepts'] 
        if 'Rafaella added 09.03.2025 alice_edu_py_ontology' in item_kcs else dict()
    )
    if kc_set1 != kc_set2:
        muntasir_row = muntasir.filter(pl.col('paws_catalog_v2_id') == id)
        print(muntasir_row[['set_name', 'item_name', 'item_type', 'item_url']].to_dicts())
        print('alice_edu_py_ontology expert=>\n', kc_set1)
        print('Rafaella added 09.03.2025 alice_edu_py_ontology=>\n', kc_set2)
        print('    ')
    

[{'set_name': 'py_list_fill', 'item_name': 'py_list_fill_odds', 'item_type': 'PCEX Set', 'item_url': 'http://pawscomp2.sis.pitt.edu/pcex/index.html?lang=PYTHON&set=py_list_fill'}]
alice_edu_py_ontology expert=>
 ['AddingListElementWithAppend', 'Addition', 'AssigningValueVariable', 'CallingLibraryFunction', 'CreatingList', 'ForLoopWith', 'ForLoopWith', 'Multiplication', 'Printing', 'SingleForLoopIteration']
Rafaella added 09.03.2025 alice_edu_py_ontology=>
 ['AddingListElementWithAppend', 'Addition', 'AssigningValueVariable', 'CallingLibraryFunction', 'CreatingList', 'ForLoopWith', 'Multiplication', 'Printing', 'SingleForLoopIteration']
    
[{'set_name': 'py_win_percentage', 'item_name': 'py_win_percentage', 'item_type': 'PCEX Set', 'item_url': 'http://pawscomp2.sis.pitt.edu/pcex/index.html?lang=PYTHON&set=py_win_percentage'}]
alice_edu_py_ontology expert=>
 ['AssigningValueVariable', 'BooleanEvaluating', 'CallingLibraryFunction', 'Division', 'HandlingInput', 'Printing', 'SingleWhileLo