In [None]:
import json

catalog = json.loads(open("./catalog.json").read())

In [None]:
import mysql.connector
import configparser

config = configparser.ConfigParser()
config.read("mysql.ini")

agg_db = mysql.connector.connect(
    host=config["mysql"]["host"],
    user=config["mysql"]["username"],
    password=config["mysql"]["password"],
    ssl_disabled=True,  # Disable SSL
)
agg_cursor = agg_db.cursor()


def insert_content_attrs(id, description, code, url, metadata):
    print(
        id, description, code, url, metadata
    )
    # agg_cursor.execute(
    #     """INSERT INTO aggregate.ent_content_attrs (content_id, description, code, preview_url, metadata) VALUES (%s, %s, %s, %s, %s)""",
    #     (
    #         id,
    #         description,
    #         code,
    #         url,
    #         json.dumps(metadata) if len(metadata.keys()) > 0 else "",
    #     ),
    # )

In [None]:
# ------------- PROCESS WEAT CONTENT -------------

import re
import requests


def process_weat_pcex_items():
    print("connection to aggregate database established")
    
    prefixes = [
        "http://pawscomp2.sis.pitt.edu/pcex-authoring/assets/preview/index.html?load=",
        "http://pawscomp2.sis.pitt.edu/pcex-authoring/preview/index.html?load="
    ]

    for _, item in enumerate(catalog):
        matched_prefixes = [p for p in prefixes if item["url"].startswith(p)]
        if len(matched_prefixes) == 0:
            continue
        iframe_url = item["url"][len(matched_prefixes[0]):].split("?")
        index = int(
            [p for p in iframe_url[1].split("&") if p.startswith("index=")][0].split("=")[1]
        )
        content = requests.get(iframe_url[0]).json()[0]
        content = content["activityGoals"][index]

        metadata = dict()

        if content["fullyWorkedOut"] == False:
            metadata["distractors"] = []
            for line in content["distractorList"]:
                line = line["line"]
                del line["id"]
                del line["indentLevel"]
                if "number" in line and line["number"] > 0:
                    line["line_number"] = int(line["number"])
                del line["number"]
                metadata["distractors"].append(line)

        insert_content_attrs(
            item["id"],
            content["goalDescription"],
            "\n".join(
                [
                    l["content"]
                    for l in sorted(
                        content["lineList"],
                        key=lambda l: int(l["number"]),
                    )
                ]
            ),
            item["url"],
            json.dumps(metadata) if len(metadata.keys()) > 0 else "",
        )

        print(f"processed content_id: {item['id']}")

    agg_db.commit()



process_weat_pcex_items()

In [None]:
# ------------- PROCESS PCEX-v1n2 CONTENT -------------

import re
import requests


def process_pcex_v12_items():
    print("connection to aggregate database established")

    prefixes = [
        "http://adapt2.sis.pitt.edu/pcex/index.html",
        "http://pawscomp2.sis.pitt.edu/pcex/index.html",
        "http://pawscomp2.sis.pitt.edu/pcex/pcex_v2/index.html",
    ]

    for _, item in enumerate(catalog):
        if len([1 for prefix in prefixes if item["url"].startswith(prefix)]) == 0:
            continue

        iframe_url = item["url"].split("index.html?")[1].split("&")
        language = iframe_url[0].split("=")[1]
        problem_set = iframe_url[1].split("=")[1]
        challenge = iframe_url[2].split("=")[1] if len(iframe_url) > 2 else None

        content = requests.get(
            f"http://pawscomp2.sis.pitt.edu/pcex/pcex_v2/data/{language}_{problem_set}.json?v=201801041411"
        ).json()

        content = (
            content["activityGoals"][0]
            if challenge is None
            else [
                g
                for g in content["activityGoals"]
                if g["fileName"].startswith(challenge)
            ][0]
        )

        metadata = dict()
        if content["fullyWorkedOut"] == False:
            metadata["distractors"] = []
            for line in content["distractorList"]:
                line = line["line"]
                del line["id"]
                del line["indentLevel"]
                if "number" in line and line["number"] > 0:
                    line["line_number"] = int(line["number"])
                del line["number"]
                metadata["distractors"].append(line)

        insert_content_attrs(
            item["id"],
            content["goalDescription"],
            "\n".join(
                [
                    l["content"]
                    for l in sorted(
                        content["lineList"],
                        key=lambda l: int(l["number"]),
                    )
                ]
            ),
            item["url"],
            json.dumps(metadata) if len(metadata.keys()) > 0 else "",
        )

        print(f"processed content_id: {item['id']}")

    agg_db.commit()


process_pcex_v12_items()

In [None]:
# # ------------- PROCESS jsparsons-python CONTENT -------------

# import re
# import requests


# def process_jsparsons_python_items():
#     print("connection to aggregate database established")
#     prefix = "http://adapt2.sis.pitt.edu/acos/pitt/jsparsons/jsparsons-python/"

#     for _, item in enumerate(catalog):
#         if not (item["url"].startswith(prefix)):
#             continue

#         iframe_url = f"{item['url']}&grp=demo&usr=demo&sid=-1&cid=-1"
#         content = requests.get(iframe_url).text

#         code = (
#             content[
#                 content.find("parson.init('") : content.find("parson.shuffleLines();")
#             ]
#             .strip()[13:-3]
#             .replace("\\n", "\n")
#         )

#         description_start = content.find('<div id="instructions">')
#         description = (
#             content[
#                 content.find('<div id="instructions">') : content.find(
#                     "</div>", description_start
#                 )
#                 + 6
#             ]
#             .replace('<div id="instructions">', "")
#             .strip()[5:-6]
#             .strip()
#         )

#         insert_content_attrs(item["id"], description, code, iframe_url, dict())

#         print(f"processed content_id: {item['id']}")

#     agg_db.commit()


# process_jsparsons_python_items()

In [None]:
# # ------------- PROCESS jsvee CONTENT -------------

# import re
# import requests
# import json

# jsvees = json.loads(open("./jsvee.json").read())
# jsvees = {k: "\n".join(v["lines"]) for k, v in jsvees.items()}


# def process_jsvee_items():
#     print("connection to aggregate database established")
#     prefix_pittpy = "http://adapt2.sis.pitt.edu/acos/pitt/jsvee/jsvee-python/"
#     prefix_acospy = "https://acos.cs.vt.edu/pitt/jsvee/jsvee-python/"
#     prefix_acosjava = "https://acos.cs.vt.edu/pitt/jsvee/jsvee-java/"

#     for _, item in enumerate(catalog):
#         if not (
#             item["url"].startswith(prefix_pittpy)
#             or item["url"].startswith(prefix_acospy)
#             or item["url"].startswith(prefix_acosjava)
#         ):
#             continue

#         example_id = item["url"].split("?example-id=")[1]

#         insert_content_attrs(
#             item["id"],
#             "",
#             jsvees[example_id],
#             f"{item['url']}&grp=demo&usr=demo&sid=-1&cid=-1",
#             dict(),
#         )

#         print(f"processed content_id: {item['id']}")

#     agg_db.commit()


# process_jsvee_items()

In [None]:
# # ------------- PROCESS QuizPet CONTENT -------------

# import re
# import requests
# import json


# def process_quizpet_items():
#     print("connection to aggregate database established")
#     prefix = "http://adapt2.sis.pitt.edu/quizpet/displayQuiz.jsp"

#     for _, item in enumerate(catalog):
#         if not (item["url"].startswith(prefix)):
#             continue

#         req = requests.get(item["url"])
#         if req.status_code != 200:
#             print(f"Failed to retrieve {item['url']}")
#             continue

#         content = req.text
#         code = content[
#             content.find("<pre>") + 5 : content.find("<form method=post>")
#         ].strip()

#         insert_content_attrs(
#             item["id"],
#             "",
#             code,
#             item["url"],
#             dict(),
#         )

#         print(f"processed content_id: {item['id']}")

#     agg_db.commit()


# process_quizpet_items()

In [None]:
# # ------------- PROCESS QuizJet CONTENT -------------

# import re
# import requests
# import json


# def process_quizjet_items():
#     print("connection to aggregate database established")
#     prefix = "http://pawscomp2.sis.pitt.edu/quizjet/displayQuiz.jsp"

#     for _, item in enumerate(catalog):
#         if not (item["url"].startswith(prefix)):
#             continue

#         req = requests.get(item["url"])
#         if req.status_code != 200:
#             print(f"Failed to retrieve {item['url']}")
#             continue

#         content = req.text
#         code = content[
#             content.find("<pre>") + 5 : content.find("<form method=post>")
#         ].strip()

#         insert_content_attrs(
#             item["id"],
#             "",
#             code,
#             item["url"],
#             dict(),
#         )

#         print(f"processed content_id: {item['id']}")

#     agg_db.commit()


# process_quizjet_items()

In [None]:
# # ------------- PROCESS WebEx CONTENT -------------

# import re
# import requests
# import json
# from bs4 import BeautifulSoup


# def process_quizpet_items():
#     print("connection to aggregate database established")
#     prefix = "http://adapt2.sis.pitt.edu/web_ex_NV0FGdaHzy/"

#     for _, item in enumerate(catalog):
#         if not (item["url"].startswith(prefix)):
#             continue

#         req = requests.get(item["url"])
#         if req.status_code != 200:
#             print(f"Failed to retrieve {item['url']}")
#             continue

#         content = BeautifulSoup(req.text, "html.parser")

#         insert_content_attrs(
#             item["id"],
#             "",
#             "\n".join(
#                 [
#                     "\n".join(
#                         [ln for ln in tr.get_text().split("\n") if len(ln.strip()) > 0]
#                     )
#                     for tr in content.find_all("tr")
#                 ]
#             ),
#             item["url"],
#             dict(),
#         )

#         print(f"processed content_id: {item['id']}")

#     agg_db.commit()


# process_quizpet_items()

In [None]:
# ------------- PROCESS AnnEx CONTENT -------------
import re
import requests
import json
from bs4 import BeautifulSoup


def process_annex_items():
    print("connection to aggregate database established")
    prefix = "http://adapt2.sis.pitt.edu/pitt/annotated/annotated-java/Dissection2"

    for _, item in enumerate(catalog):
        if not (item["url"].startswith(prefix)):
            continue
        
        # "http://adapt2.sis.pitt.edu/pitt/annotated/annotated-java/Dissection2?act=post_request_example",
        # 
        
        url = "http://adapt2.sis.pitt.edu/acos/html/annotated/annotated-java/" + item["url"].split('?act=')[-1]

        req = requests.get(url)
        if req.status_code != 200:
            print(f"Failed to retrieve {url}")
            continue

        content = BeautifulSoup(req.text, "html.parser")

        insert_content_attrs(
            item["id"],
            "",
            "\n".join(
                [
                    "\n".join(
                        [ln for ln in tr.get_text().split("\n") if len(ln.strip()) > 0]
                    )
                    for tr in content.find_all("tr")
                ]
            ),
            item["url"],
            dict(),
        )

        print(f"processed content_id: {item['id']}")

    agg_db.commit()


process_annex_items()

In [None]:
# # ------------- PROCESS Python-PCRS CONTENT -------------

# import re
# import requests
# import json
# from bs4 import BeautifulSoup


# def process_pcrs_python_items():
#     print("connection to aggregate database established")
#     prefix_python = "https://pcrs.utm.utoronto.ca/mgrids/problems/python/"
#     prefix_java = "https://pcrs.utm.utoronto.ca/mgrids/problems/java/"

#     for _, item in enumerate(catalog):
#         if not (
#             item["url"].startswith(prefix_python) or item["url"].startswith(prefix_java)
#         ):
#             continue

#         req = requests.get(item["url"])
#         if req.status_code != 200:
#             print(f"Failed to retrieve {item['url']}")
#             continue

#         content = BeautifulSoup(req.text, "html.parser")
#         description = content.find("h5", {"class": "problem-description"}).get_text()
#         code = content.find("div", {"id": "div_id_submission"}).get_text().strip()

#         insert_content_attrs(
#             item["id"],
#             description,
#             code,
#             item["url"],
#             dict(),
#         )

#         print(f"processed content_id: {item['id']}")

#     agg_db.commit()


# process_pcrs_python_items()

In [None]:
# # possible fixes to make content launch correctly in preview mode

# -- # fix webex preview urls to include sid=demo
# update ent_content_attrs set preview_url = REPLACE(preview_url, '&svc=progvis', '&svc=progvis&sid=demo')
# where preview_url like 'http://adapt2.sis.pitt.edu/web_ex_NV0FGdaHzy/%&svc=progvis%'

# -- # append "&usr=demo" to dbqa
# -- http://adapt2.sis.pitt.edu/lti/launch?tool=dbqa&sub=select-from-3&usr=demo

# -- # append "&sid=demo&usr=demo&grp=demo" to sqlknot
# -- http://adapt2.sis.pitt.edu/sqlknot/index.html?cid=10&tid=4&svc=progvis&sid=demo&usr=demo&grp=demo

# --  