Loading the json with the grade data:

In [43]:
import json
with open('grades.json', 'rb') as f:
    data = json.load(f)

Extracting the relevant information out of the json for one course:

In [44]:
build_dict = lambda course: {
    'id': course['content']['achievementDto']['cpCourseLibDto']['id'], 
    'course_name_de': course['content']['achievementDto']['cpCourseLibDto']['courseTitle']['value'], 
    'course_name_en': course['content']['achievementDto']['cpCourseLibDto']['courseTitle']['translations']['translation'][1]['value'], 
    'course_number': course['content']['achievementDto']['cpCourseLibDto']['courseNumber']['courseNumber'], 
    'ects': course['content']['achievementDto']['cpCourseLibDto']['ectsCredits'], 
    'semester_code': course['content']['achievementDto']['semesterLibDto']['key'], 
    'semester_name_de': course['content']['achievementDto']['semesterLibDto']['semesterDesignation']
    ['value'], 
    'semester_name_en': course['content']['achievementDto']['semesterLibDto']['semesterDesignation']
    ['translations']['translation'][1]['value'], 
    'semester_start_date': course['content']['achievementDto']['semesterLibDto']['startOfAcademicSemester']['value'], 
    'semester_end_date': course['content']['achievementDto']['semesterLibDto']['endOfAcademicSemester']['value'], 
    'grade_date': course['content']['achievementDto']['achievementDate']['value'], 
    'grade_name_de': course['content']['achievementDto']['gradeDto']['name']['value'], 
    'grade_name_en': course['content']['achievementDto']['gradeDto']['name']['translations']['translation'][1]['value'], 
    'grade': course['content']['achievementDto']['gradeDto']['value'], 
}

Creating a list of dicts, each dict containing the info for one course.

In [187]:
dicts = [build_dict(course) for course in data['resource']]

For each course, parse the grades out of its html file, and add to its dict:

In [188]:
from bs4 import BeautifulSoup

possible_grades = ['1.0', '1.3', '1.4', '1.7', '2.0', '2.3', '2.4', '2.7', '3.0', '3.3', '3.4', '3.7', '4.0', '4.3', '4.7', '5.0']
all_possible_grades = possible_grades + ['did_not_show_up']

for d in dicts:
    # University regulation: written exams from first semester are weighted half the points.
    d['grade_weight'] = d['ects']
    if ('Discrete Structures' in d['course_name_en'] 
        or 'Introduction to Informatics' in d['course_name_en']
        or 'Computer Organization' in d['course_name_en']
       ):
        d['grade_weight'] >>= 1 # divide by 2 but leave as int (known to all be devisable by 2)
    # read the html file to a string
    with open('stats/{}.html'.format(d['id']), 'rb') as f:
        html_doc = f.read()    
    soup = BeautifulSoup(html_doc, 'html.parser')
    # the data can be found in the titles of div objects with the class "kandcountbox"
    divs = soup.find_all('div', 'kandcountbox')
    titles = [div['title'] for div in divs]
    # A list of tuples (<grade>, <number of students>) e.g. ('1.0', 3)
    nums = [(ts[-1].split()[0], int(ts[-2].split()[0])) for t in titles if (ts := t.split(','))]
    d.update((grade, 0) for grade in all_possible_grades) # All courses get all grades, also if 0 students that grade.
    for i, t in enumerate(titles):
        if 'Nicht erschienen' in t: # Students who did not show up
            d['did_not_show_up'] = nums[i][1]
        elif '5.0' in t: # add up fails and cheats together.
            d['5.0'] += nums[i][1]
    # We already counted all the 5.0s and added them, so don't add again.
    d.update((tup for tup in nums if tup[0] != '5.0'))


Create a pandas dataframe with the data:

In [189]:
import pandas as pd

df = pd.DataFrame(dicts)
df['did_show_up'] = df[possible_grades].sum(axis=1)
df['numeric_grade'] = pd.to_numeric(df['grade'])
df['int_grade_X10'] = df['grade'].apply(lambda x: int((x.replace('.', '') + '0')[:2]))
df['5.0_with_noshows'] = df['5.0'] + df['did_not_show_up']
df['total_students'] = df['did_show_up'] + df['did_not_show_up']
grades_with_noshows = possible_grades + ['5.0_with_noshows']
grades_with_noshows.remove('5.0')
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    display(df)

Unnamed: 0,id,course_name_de,course_name_en,course_number,ects,semester_code,semester_name_de,semester_name_en,semester_start_date,semester_end_date,grade_date,grade_name_de,grade_name_en,grade,grade_weight,1.0,1.3,1.4,1.7,2.0,2.3,2.4,2.7,3.0,3.3,3.4,3.7,4.0,4.3,4.7,5.0,did_not_show_up,did_show_up,numeric_grade,int_grade_X10,5.0_with_noshows,total_students
0,950552753,Compilerbau I,Compiler Construction I,IN2227,5,21S,Sommersemester 2021,Summer semester 2021,2021-04-01,2021-09-30,2021-07-26T00:00:00,gut,good,2.0,5,9,10,0,15,18,15,0,22,31,13,0,12,7,8,0,1,40,161,2.0,20,41,201
1,950548250,Statistik für BWL (Einführung mit R),Statistics for Business Administration,MA9712M,6,21S,Sommersemester 2021,Summer semester 2021,2021-04-01,2021-09-30,2021-07-22T00:00:00,befriedigend,satisfactory,2.7,6,11,23,0,28,59,103,0,125,114,97,0,70,35,39,14,34,68,752,2.7,27,102,820
2,950549876,Computergestützte Statistik,Computational Statistics,MA3402,5,21S,Sommersemester 2021,Summer semester 2021,2021-04-01,2021-09-30,2021-07-20T00:00:00,ausreichend,sufficient,4.0,5,3,3,0,9,14,10,0,9,14,12,0,10,13,18,19,29,72,163,4.0,40,101,235
3,950511694,Grundlagen der Künstlichen Intelligenz,Techniques in Artificial Intelligence,IN2062,5,20W,Wintersemester 2020/21,Winter semester 2020/21,2020-10-01,2021-03-31,2021-03-01T00:00:00,befriedigend,satisfactory,3.0,5,20,29,0,44,66,70,0,77,86,69,0,83,65,61,39,107,454,816,3.0,30,561,1270
4,950525218,Höhere Algorithmik,Advanced Algorithms,IN2360,6,20W,Wintersemester 2020/21,Winter semester 2020/21,2020-10-01,2021-03-31,2021-02-23T00:00:00,befriedigend,satisfactory,2.7,6,8,4,0,4,6,1,0,2,9,6,0,4,2,7,2,11,36,66,2.7,27,47,102
5,950478816,Blockkurs Spanisch A1,Intensive Course Spanish A1,SZ1201-1,3,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,2020-09-09T00:00:00,sehr gut,very good,1.0,3,3,7,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,16,1.0,10,0,16
6,950487012,Modellbildung und Simulation (Fokus Analysis),Modelling and Simulation (Focus Analysis),IN2366,9,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,2020-08-14T00:00:00,befriedigend,satisfactory,2.7,9,4,3,0,4,3,5,0,7,8,1,0,6,1,1,0,5,14,48,2.7,27,19,62
7,950475386,Einführung in die Theoretische Informatik,Introduction to Theory of Computation,IN0011,8,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,2020-08-13T00:00:00,sehr gut,very good,1.0,8,42,15,0,21,23,35,0,33,35,57,0,40,96,68,76,135,181,676,1.0,10,316,857
8,950474800,Diskrete Wahrscheinlichkeitstheorie,Discrete Probability Theory,IN0018,6,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,2020-08-05T00:00:00,befriedigend,satisfactory,2.7,6,39,15,0,15,24,30,0,41,45,47,0,51,15,61,48,126,148,557,2.7,27,274,705
9,950477777,Seminar Economics and Computation,Advanced Seminar Course - Economics and Comput...,IN4705,5,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,2020-07-31T00:00:00,gut,good,1.7,5,1,0,0,3,0,0,0,2,0,0,0,0,0,0,0,0,0,6,1.7,17,0,6


In [190]:
int_grades = list(map(lambda x: int(x.replace('.', '')), possible_grades)) # '1.3' -> 13
df['mean'] = ((df[possible_grades] * int_grades).sum(axis=1) / df['did_show_up']) / 10
df['mean_with_noshows'] = ((df[grades_with_noshows] * int_grades).sum(axis=1) / df['total_students']) / 10
df

Unnamed: 0,id,course_name_de,course_name_en,course_number,ects,semester_code,semester_name_de,semester_name_en,semester_start_date,semester_end_date,...,4.7,5.0,did_not_show_up,did_show_up,numeric_grade,int_grade_X10,5.0_with_noshows,total_students,mean,mean_with_noshows
0,950552753,Compilerbau I,Compiler Construction I,IN2227,5,21S,Sommersemester 2021,Summer semester 2021,2021-04-01,2021-09-30,...,0,1,40,161,2.0,20,41,201,2.640373,3.10995
1,950548250,Statistik für BWL (Einführung mit R),Statistics for Business Administration,MA9712M,6,21S,Sommersemester 2021,Summer semester 2021,2021-04-01,2021-09-30,...,14,34,68,752,2.7,27,102,820,2.986037,3.153049
2,950549876,Computergestützte Statistik,Computational Statistics,MA3402,5,21S,Sommersemester 2021,Summer semester 2021,2021-04-01,2021-09-30,...,19,29,72,163,4.0,40,101,235,3.557055,3.999149
3,950511694,Grundlagen der Künstlichen Intelligenz,Techniques in Artificial Intelligence,IN2062,5,20W,Wintersemester 2020/21,Winter semester 2020/21,2020-10-01,2021-03-31,...,39,107,454,816,3.0,30,561,1270,3.268137,3.887244
4,950525218,Höhere Algorithmik,Advanced Algorithms,IN2360,6,20W,Wintersemester 2020/21,Winter semester 2020/21,2020-10-01,2021-03-31,...,2,11,36,66,2.7,27,47,102,3.087879,3.762745
5,950478816,Blockkurs Spanisch A1,Intensive Course Spanish A1,SZ1201-1,3,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,0,0,0,16,1.0,10,0,16,1.39375,1.39375
6,950487012,Modellbildung und Simulation (Fokus Analysis),Modelling and Simulation (Focus Analysis),IN2366,9,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,0,5,14,48,2.7,27,19,62,2.789583,3.28871
7,950475386,Einführung in die Theoretische Informatik,Introduction to Theory of Computation,IN0011,8,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,76,135,181,676,1.0,10,316,857,3.642751,3.929405
8,950474800,Diskrete Wahrscheinlichkeitstheorie,Discrete Probability Theory,IN0018,6,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,48,126,148,557,2.7,27,274,705,3.533932,3.841702
9,950477777,Seminar Economics and Computation,Advanced Seminar Course - Economics and Comput...,IN4705,5,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,0,0,0,6,1.7,17,0,6,1.916667,1.916667


In [191]:
import numpy as np

In [192]:
# The (x - \mu)^2 part of the calculation for the sd.
squared_differences = np.square(np.array([np.array(int_grades)/10,]*len(df)) - np.array([df['mean'],] * len(int_grades)).transpose())

In [193]:
df['standard_deviation'] = np.sqrt(np.multiply(df[possible_grades], squared_differences).sum(axis=1) / df['did_show_up'])

In [194]:
df

Unnamed: 0,id,course_name_de,course_name_en,course_number,ects,semester_code,semester_name_de,semester_name_en,semester_start_date,semester_end_date,...,5.0,did_not_show_up,did_show_up,numeric_grade,int_grade_X10,5.0_with_noshows,total_students,mean,mean_with_noshows,standard_deviation
0,950552753,Compilerbau I,Compiler Construction I,IN2227,5,21S,Sommersemester 2021,Summer semester 2021,2021-04-01,2021-09-30,...,1,40,161,2.0,20,41,201,2.640373,3.10995,0.892785
1,950548250,Statistik für BWL (Einführung mit R),Statistics for Business Administration,MA9712M,6,21S,Sommersemester 2021,Summer semester 2021,2021-04-01,2021-09-30,...,34,68,752,2.7,27,102,820,2.986037,3.153049,0.892316
2,950549876,Computergestützte Statistik,Computational Statistics,MA3402,5,21S,Sommersemester 2021,Summer semester 2021,2021-04-01,2021-09-30,...,29,72,163,4.0,40,101,235,3.557055,3.999149,1.175724
3,950511694,Grundlagen der Künstlichen Intelligenz,Techniques in Artificial Intelligence,IN2062,5,20W,Wintersemester 2020/21,Winter semester 2020/21,2020-10-01,2021-03-31,...,107,454,816,3.0,30,561,1270,3.268137,3.887244,1.13025
4,950525218,Höhere Algorithmik,Advanced Algorithms,IN2360,6,20W,Wintersemester 2020/21,Winter semester 2020/21,2020-10-01,2021-03-31,...,11,36,66,2.7,27,47,102,3.087879,3.762745,1.362875
5,950478816,Blockkurs Spanisch A1,Intensive Course Spanish A1,SZ1201-1,3,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,0,0,16,1.0,10,0,16,1.39375,1.39375,0.260933
6,950487012,Modellbildung und Simulation (Fokus Analysis),Modelling and Simulation (Focus Analysis),IN2366,9,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,5,14,48,2.7,27,19,62,2.789583,3.28871,1.12828
7,950475386,Einführung in die Theoretische Informatik,Introduction to Theory of Computation,IN0011,8,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,135,181,676,1.0,10,316,857,3.642751,3.929405,1.219888
8,950474800,Diskrete Wahrscheinlichkeitstheorie,Discrete Probability Theory,IN0018,6,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,126,148,557,2.7,27,274,705,3.533932,3.841702,1.26996
9,950477777,Seminar Economics and Computation,Advanced Seminar Course - Economics and Comput...,IN4705,5,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,0,0,6,1.7,17,0,6,1.916667,1.916667,0.606676


In [195]:
df['z'] = (pd.to_numeric(df['grade']) - df['mean']) / df['standard_deviation']
df

Unnamed: 0,id,course_name_de,course_name_en,course_number,ects,semester_code,semester_name_de,semester_name_en,semester_start_date,semester_end_date,...,did_not_show_up,did_show_up,numeric_grade,int_grade_X10,5.0_with_noshows,total_students,mean,mean_with_noshows,standard_deviation,z
0,950552753,Compilerbau I,Compiler Construction I,IN2227,5,21S,Sommersemester 2021,Summer semester 2021,2021-04-01,2021-09-30,...,40,161,2.0,20,41,201,2.640373,3.10995,0.892785,-0.717275
1,950548250,Statistik für BWL (Einführung mit R),Statistics for Business Administration,MA9712M,6,21S,Sommersemester 2021,Summer semester 2021,2021-04-01,2021-09-30,...,68,752,2.7,27,102,820,2.986037,3.153049,0.892316,-0.320556
2,950549876,Computergestützte Statistik,Computational Statistics,MA3402,5,21S,Sommersemester 2021,Summer semester 2021,2021-04-01,2021-09-30,...,72,163,4.0,40,101,235,3.557055,3.999149,1.175724,0.376742
3,950511694,Grundlagen der Künstlichen Intelligenz,Techniques in Artificial Intelligence,IN2062,5,20W,Wintersemester 2020/21,Winter semester 2020/21,2020-10-01,2021-03-31,...,454,816,3.0,30,561,1270,3.268137,3.887244,1.13025,-0.237237
4,950525218,Höhere Algorithmik,Advanced Algorithms,IN2360,6,20W,Wintersemester 2020/21,Winter semester 2020/21,2020-10-01,2021-03-31,...,36,66,2.7,27,47,102,3.087879,3.762745,1.362875,-0.284603
5,950478816,Blockkurs Spanisch A1,Intensive Course Spanish A1,SZ1201-1,3,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,0,16,1.0,10,0,16,1.39375,1.39375,0.260933,-1.509009
6,950487012,Modellbildung und Simulation (Fokus Analysis),Modelling and Simulation (Focus Analysis),IN2366,9,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,14,48,2.7,27,19,62,2.789583,3.28871,1.12828,-0.079398
7,950475386,Einführung in die Theoretische Informatik,Introduction to Theory of Computation,IN0011,8,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,181,676,1.0,10,316,857,3.642751,3.929405,1.219888,-2.166388
8,950474800,Diskrete Wahrscheinlichkeitstheorie,Discrete Probability Theory,IN0018,6,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,148,557,2.7,27,274,705,3.533932,3.841702,1.26996,-0.65666
9,950477777,Seminar Economics and Computation,Advanced Seminar Course - Economics and Comput...,IN4705,5,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,0,6,1.7,17,0,6,1.916667,1.916667,0.606676,-0.357137


In [196]:
# Now compute Z-score with noshows:
squared_differences = np.square(np.array([np.array(int_grades)/10,]*len(df)) - np.array([df['mean_with_noshows'],] * len(int_grades)).transpose())
df['standard_deviation_with_noshows'] = np.sqrt(np.multiply(df[grades_with_noshows], squared_differences).sum(axis=1) / df['total_students'])
df['z_with_noshows'] = (pd.to_numeric(df['grade']) - df['mean_with_noshows']) / df['standard_deviation_with_noshows']
df

Unnamed: 0,id,course_name_de,course_name_en,course_number,ects,semester_code,semester_name_de,semester_name_en,semester_start_date,semester_end_date,...,numeric_grade,int_grade_X10,5.0_with_noshows,total_students,mean,mean_with_noshows,standard_deviation,z,standard_deviation_with_noshows,z_with_noshows
0,950552753,Compilerbau I,Compiler Construction I,IN2227,5,21S,Sommersemester 2021,Summer semester 2021,2021-04-01,2021-09-30,...,2.0,20,41,201,2.640373,3.10995,0.892785,-0.717275,1.235302,-0.898526
1,950548250,Statistik für BWL (Einführung mit R),Statistics for Business Administration,MA9712M,6,21S,Sommersemester 2021,Summer semester 2021,2021-04-01,2021-09-30,...,2.7,27,102,820,2.986037,3.153049,0.892316,-0.320556,1.019147,-0.444537
2,950549876,Computergestützte Statistik,Computational Statistics,MA3402,5,21S,Sommersemester 2021,Summer semester 2021,2021-04-01,2021-09-30,...,4.0,40,101,235,3.557055,3.999149,1.175724,0.376742,1.183755,0.000719
3,950511694,Grundlagen der Künstlichen Intelligenz,Techniques in Artificial Intelligence,IN2062,5,20W,Wintersemester 2020/21,Winter semester 2020/21,2020-10-01,2021-03-31,...,3.0,30,561,1270,3.268137,3.887244,1.13025,-0.237237,1.228703,-0.722098
4,950525218,Höhere Algorithmik,Advanced Algorithms,IN2360,6,20W,Wintersemester 2020/21,Winter semester 2020/21,2020-10-01,2021-03-31,...,2.7,27,47,102,3.087879,3.762745,1.362875,-0.284603,1.427182,-0.744646
5,950478816,Blockkurs Spanisch A1,Intensive Course Spanish A1,SZ1201-1,3,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,1.0,10,0,16,1.39375,1.39375,0.260933,-1.509009,0.260933,-1.509009
6,950487012,Modellbildung und Simulation (Fokus Analysis),Modelling and Simulation (Focus Analysis),IN2366,9,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,2.7,27,19,62,2.789583,3.28871,1.12828,-0.079398,1.35636,-0.434037
7,950475386,Einführung in die Theoretische Informatik,Introduction to Theory of Computation,IN0011,8,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,1.0,10,316,857,3.642751,3.929405,1.219888,-2.166388,1.216849,-2.407369
8,950474800,Diskrete Wahrscheinlichkeitstheorie,Discrete Probability Theory,IN0018,6,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,2.7,27,274,705,3.533932,3.841702,1.26996,-0.65666,1.276994,-0.894054
9,950477777,Seminar Economics and Computation,Advanced Seminar Course - Economics and Comput...,IN4705,5,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,1.7,17,0,6,1.916667,1.916667,0.606676,-0.357137,0.606676,-0.357137


In [198]:
# inclusive definition of percentile
df['percentile'] = (np.multiply(np.array([np.array(int_grades),]*len(df)) < np.array([df['int_grade_X10'],] * len(int_grades)).transpose(), df[possible_grades]).sum(axis=1) / df['did_show_up']) * 100

In [199]:
# calculate percentile including noshows
df['percentile_with_noshows'] = (np.multiply(np.array([np.array(int_grades),]*len(df)) < np.array([df['int_grade_X10'],] * len(int_grades)).transpose(), df[grades_with_noshows]).sum(axis=1) / df['total_students']) * 100
df

Unnamed: 0,id,course_name_de,course_name_en,course_number,ects,semester_code,semester_name_de,semester_name_en,semester_start_date,semester_end_date,...,5.0_with_noshows,total_students,mean,mean_with_noshows,standard_deviation,z,standard_deviation_with_noshows,z_with_noshows,percentile_with_noshows,percentile
0,950552753,Compilerbau I,Compiler Construction I,IN2227,5,21S,Sommersemester 2021,Summer semester 2021,2021-04-01,2021-09-30,...,41,201,2.640373,3.10995,0.892785,-0.717275,1.235302,-0.898526,16.915423,21.118012
1,950548250,Statistik für BWL (Einführung mit R),Statistics for Business Administration,MA9712M,6,21S,Sommersemester 2021,Summer semester 2021,2021-04-01,2021-09-30,...,102,820,2.986037,3.153049,0.892316,-0.320556,1.019147,-0.444537,27.317073,29.787234
2,950549876,Computergestützte Statistik,Computational Statistics,MA3402,5,21S,Sommersemester 2021,Summer semester 2021,2021-04-01,2021-09-30,...,101,235,3.557055,3.999149,1.175724,0.376742,1.183755,0.000719,35.744681,51.533742
3,950511694,Grundlagen der Künstlichen Intelligenz,Techniques in Artificial Intelligence,IN2062,5,20W,Wintersemester 2020/21,Winter semester 2020/21,2020-10-01,2021-03-31,...,561,1270,3.268137,3.887244,1.13025,-0.237237,1.228703,-0.722098,24.094488,37.5
4,950525218,Höhere Algorithmik,Advanced Algorithms,IN2360,6,20W,Wintersemester 2020/21,Winter semester 2020/21,2020-10-01,2021-03-31,...,47,102,3.087879,3.762745,1.362875,-0.284603,1.427182,-0.744646,22.54902,34.848485
5,950478816,Blockkurs Spanisch A1,Intensive Course Spanish A1,SZ1201-1,3,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,0,16,1.39375,1.39375,0.260933,-1.509009,0.260933,-1.509009,0.0,0.0
6,950487012,Modellbildung und Simulation (Fokus Analysis),Modelling and Simulation (Focus Analysis),IN2366,9,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,19,62,2.789583,3.28871,1.12828,-0.079398,1.35636,-0.434037,30.645161,39.583333
7,950475386,Einführung in die Theoretische Informatik,Introduction to Theory of Computation,IN0011,8,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,316,857,3.642751,3.929405,1.219888,-2.166388,1.216849,-2.407369,0.0,0.0
8,950474800,Diskrete Wahrscheinlichkeitstheorie,Discrete Probability Theory,IN0018,6,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,274,705,3.533932,3.841702,1.26996,-0.65666,1.276994,-0.894054,17.446809,22.082585
9,950477777,Seminar Economics and Computation,Advanced Seminar Course - Economics and Comput...,IN4705,5,20S,Sommersemester 2020,Summer semester 2020,2020-04-01,2020-09-30,...,0,6,1.916667,1.916667,0.606676,-0.357137,0.606676,-0.357137,16.666667,16.666667


In [200]:
# mean Z score
mean_z_score = np.multiply(df['z'], df['grade_weight']).sum() / df['grade_weight'].sum()
print(mean_z_score)
mean_z_score_with_noshows = np.multiply(df['z_with_noshows'], df['grade_weight']).sum() / df['grade_weight'].sum()
print(mean_z_score_with_noshows)

-0.901407363914152
-1.0792597750038568


In [167]:
avg_grade = np.multiply(df['numeric_grade'], df['grade_weight']).sum() / df['grade_weight'].sum()
print(avg_grade)

1.997183098591549


In [201]:
avg_percentile = np.multiply(df['percentile'], df['grade_weight']).sum() / df['grade_weight'].sum()
print(avg_percentile)
avg_percentile_with_noshows = np.multiply(df['percentile_with_noshows'], df['grade_weight']).sum() / df['grade_weight'].sum()
print(avg_percentile_with_noshows)

17.963827268248338
14.51441913098491


In [207]:
total_ects = df['ects'].sum()

Save the DataFrame data to a json:

In [215]:
df.to_json(path_or_buf='web/js/grade_data.json', orient='records')

In [216]:
with open('web/js/agg_data.json', 'w') as agg_f:
    json.dump({
        'mean_z_score': mean_z_score,
        'mean_z_score_with_noshows': mean_z_score_with_noshows,
        'avg_grade': avg_grade,
        'avg_percentile': avg_percentile,
        'avg_percentile_with_noshows': avg_percentile_with_noshows,
        'total_ects': int(total_ects),
    }, agg_f)