In [8]:
import json
from pathlib import Path
import glob

datapath = Path.cwd().parent / "data"
filepaths = glob.glob(str(datapath / "*.json"))
assert all(Path(filepath).exists() for filepath in filepaths)

def print_keys(d, indent=0):
    for k, v in d.items():
        print("\t" * indent + k)
        if isinstance(v, dict):
            print_keys(v, indent + 1)
        elif isinstance(v, list):
            print_keys(v[0], indent + 1)

for fp in filepaths:
    term = Path(fp).stem
    term_data = json.loads(Path(fp).read_text())

    for student in term_data:
        print_keys(student)
        break
    break

id
idnumber
firstaccess
sex
passed
enrolled_courses
	id
	idnumber
	fullname
	enrolledusercount
	enablecompletion
	completionhascriteria
	completionusertracked
	progress
	completed
	startdate
	enddate
	marker
	lastaccess
grades
	quiz
		id
		itemname
		idnumber
		graderaw
		grademin
		grademax
		gradeformatted
	assign
		id
		itemname
		idnumber
		graderaw
		grademin
		grademax
		gradeformatted
	checkmark
		id
		itemname
		idnumber
		graderaw
		grademin
		grademax
		gradeformatted
	organizer
		id
		itemname
		idnumber
		graderaw
		grademin
		grademax
		gradeformatted
	unknown
		id
		itemname
		idnumber
		graderaw
		grademin
		grademax
		gradeformatted


## 4 students are missing a matriculation number

In [2]:
import json
from pathlib import Path
import glob

datapath = Path.cwd().parent / "data"
filepaths = glob.glob(str(datapath / "*.json"))
assert all(Path(filepath).exists() for filepath in filepaths)


for fp in filepaths:
    term = Path(fp).stem
    term_data = json.loads(Path(fp).read_text())

    for student in term_data:
        # top layer keys
        expected = ['id', 'idnumber', 'firstaccess', 'sex', 'passed', 'enrolled_courses', 'grades']
        if not all(k in student for k in expected):
            print(f"student {student['id']} @ {term} is missing: {set(expected) - set(student.keys())}")

        # enrolled_courses keys
        expected = ['id', 'idnumber', 'fullname', 'enrolledusercount', 'enablecompletion', 'completionhascriteria', 'completionusertracked', 'progress', 'completed', 'startdate', 'enddate', 'marker', 'lastaccess']
        for course in student['enrolled_courses']:
            if not all(k in course for k in expected):
                print(f"student {student['id']} @ {term} is missing: {set(expected) - set(course.keys())}")
        
        expected = ['quiz', 'assign', 'checkmark', 'organizer', 'unknown'] # -> missing in a lot of students
        # if not all(k in student['grades'] for k in expected):
            # print(f"student {student['id']} @ {term} is missing: {set(expected) - set(student['grades'].keys())}")
        

student 99661 @ 2017W is missing: {'idnumber'} -> dict_values([99661, 1508766302, 'Female', False, [{'id': 11392, 'idnumber': '185A91-2017W', 'fullname': '185.A91 Einführung in die Programmierung 1 (VU 4,0) 2017W', 'enrolledusercount': 776, 'enablecompletion': False, 'completionhascriteria': False, 'completionusertracked': False, 'progress': None, 'completed': None, 'startdate': 1506981600, 'enddate': 1538517600, 'marker': 0, 'lastaccess': 1519901179}], {'quiz': [{'id': 86524, 'itemname': 'Einstufungstest', 'idnumber': '', 'graderaw': None, 'grademin': 0, 'grademax': 20, 'gradeformatted': '-'}, {'id': 89134, 'itemname': 'TUWEL-Test 1', 'idnumber': '', 'graderaw': None, 'grademin': 0, 'grademax': 10, 'gradeformatted': '-'}, {'id': 89137, 'itemname': 'TUWEL-Test 2', 'idnumber': None, 'graderaw': None, 'grademin': 0, 'grademax': 10, 'gradeformatted': '-'}, {'id': 89140, 'itemname': 'TUWEL-Test 3', 'idnumber': None, 'graderaw': None, 'grademin': 0, 'grademax': 10, 'gradeformatted': '-'}, {

## the firstaccess column is useless

In [3]:
import datetime
from preprocess import get_term, TERMS


for term in TERMS:
    students = get_term(term)
    time_access = [int(s["firstaccess"]) for s in students]
    min_time = min(time_access)
    max_time = max(time_access)
    print(f"{datetime.datetime.fromtimestamp(min_time)} -> {datetime.datetime.fromtimestamp(max_time)}")

1970-01-01 01:00:00 -> 2019-07-11 20:41:24
1970-01-01 01:00:00 -> 2018-03-07 11:30:02
2007-10-06 22:01:31 -> 2018-10-07 22:19:23
2006-03-20 14:41:49 -> 2019-03-06 21:50:02
2007-05-04 15:44:07 -> 2020-02-27 16:36:38
2006-03-20 14:41:49 -> 2020-03-09 20:20:10
2006-03-20 14:41:49 -> 2020-11-19 12:45:38
1970-01-01 01:00:00 -> 2021-03-18 17:51:09
2006-03-20 14:41:49 -> 2021-11-20 11:09:27
2006-03-20 14:41:49 -> 2022-03-03 12:13:34
1970-01-01 01:00:00 -> 2022-10-17 09:38:16
2006-03-20 14:41:49 -> 2023-03-30 14:55:45
2006-03-02 17:10:23 -> 2023-10-29 17:41:08
2008-10-08 10:40:52 -> 2024-03-18 18:42:19
