In [1]:
import numpy as np
import pandas as pd
import os
import random
import json
from functools import reduce
import glob

In [2]:
with open('drop_downs.json', 'r') as json_file:
   grade_create = json.load(json_file)

In [3]:
student_ids = np.linspace(7234, 7657, 423, dtype=int)

In [5]:
def calculate_final_grades(dataframes):
    # Step 1: Drop the 'Feedback' column if it exists in any DataFrame
    clean_dataframes = [df.drop(columns=['Feedback'], errors='ignore') for df in dataframes]

    clean_dataframes = [df.reset_index(drop=True) for df in clean_dataframes]
    
    # Step 2: Rename 'Grades' column in each DataFrame to avoid conflicts during merge
    for i, df in enumerate(clean_dataframes):
        df.rename(columns={'Grades': f'Assignment_{i + 1}'}, inplace=True)
    
    # Step 2: Merge all DataFrames on the 'Student ID' column
    merged_df = reduce(lambda left, right: pd.merge(left, right, on='Student ID', how='outer'), clean_dataframes)
    
    # Step 3: Calculate final grade by averaging all grade columns
    # The columns except 'Student ID' are the assignment grades
    grade_columns = merged_df.columns.drop('Student ID')
    merged_df['Final Grade'] = merged_df[grade_columns].mean(axis=1)
    
    # Step 4: Return the DataFrame with Student ID and Final Grade
    final_grades_df = merged_df[['Student ID', 'Final Grade']]
    return final_grades_df

def generate_section_csv(class_ids):
    # Step 3: Shift mean for grades between 85 and 95 for this section
    mean_grade = random.uniform(85, 95)
    std_dev = 20  # Standard deviation for grade distribution

    # Generate grades using a Gaussian distribution
    grades = np.clip(np.random.normal(mean_grade, std_dev, len(class_ids)), 0, 100)
    grades = np.round(grades, 2)  # Rounds grades to 2 decimal points
    
    # Step 4: Create feedback for each student
    feedback = ["Link to Assignment with Professor Feedback"] * len(class_ids)

    # Step 5: Create a DataFrame to hold the data
    data = {
        "Student ID": class_ids,
        "Grades": grades,
        "Feedback": feedback
    }
    df = pd.DataFrame(data)
    
    return df

for program,courses in grade_create.items(): 
    os.mkdir(f"course_grades/{program}")
    #print(program)
    for course, sections in courses.items():
        os.mkdir(f"course_grades/{program}/{course}")
        # Randomly select between 6 and 20 students for this section
        for section, assignments in sections["Sections"].items(): 
            os.mkdir(f"course_grades/{program}/{course}/{section}")
            selected_student_ids = np.random.choice(student_ids, random.randint(6, 20), replace=False)
            section_grades = []
            for assignment in assignments: 
                df = generate_section_csv(selected_student_ids)
                df.to_csv(f"course_grades/{program}/{course}/{section}/{assignment}.csv")
                section_grades.append(df)
            # create final grade
            if len(section_grades) == 1 or len(section_grades)==0: 
                pass
            else: 
                df = calculate_final_grades(section_grades)
                df.to_csv(f"course_grades/{program}/{course}/{section}/final_grade.csv")
            
            
            
        

In [6]:
def score_to_grade(score):
    if score >= 93:
        return 'A'
    elif score >= 90:
        return 'A-'
    elif score >= 83:
        return 'B'
    elif score >= 80:
        return 'B-'
    elif score >= 78:
        return 'C+'
    elif score >= 70:
        return 'C'
    else:
        return 'F'


In [7]:
programs = glob.glob(f"course_grades/*")

overall = {}
for program in programs:
    courses = glob.glob(f"{program}/*")
    for course in courses: 
        sections = glob.glob(f"{course}/*")
        for section in sections:
            grades = {}
            if "All" in section: 
                try: 
                    all_section = pd.read_csv(f"{section}/final_grade.csv")
                    grade_list = all_section['Final Grade'].apply(score_to_grade)
                    for g in grade_list: 
                        if g not in grades.keys(): 
                            grades[g] = 1
                        else: 
                            grades[g] += 1
                    all_section = pd.DataFrame([grades])
                    all_section.to_csv(f"{course}/overall_grades.csv")
                    overall[section]=all_section
                except: 
                    print(section)
                


course_grades\BSI\BCR411\All
course_grades\MCR\MCR702\All
course_grades\MSSI\CAC611\All
course_grades\MSSI\hececk\All
course_grades\MSSI\INT606\All
course_grades\MSSI\RSI451\All
course_grades\MSSI\RSI613\All
course_grades\MSSI\RSI656\All
course_grades\MSTI\MST653\All
course_grades\MSTI\MST657\All
course_grades\MSTI\MST658\All
course_grades\MSTI\MST659\All
course_grades\MSTI\MST690\All
course_grades\MSTI\MST698\All


In [None]:
overall

In [9]:
new_overall = {"BSI":[], "MSSI":[], "MSTI":[], "MCR":[]}
for path, overalls in overall.items(): 
    if "BSI" in path: 
        new_overall["BSI"].append(overall[path])
    elif "MSSI" in path: 
        new_overall["MSSI"].append(overall[path])
    elif "MSTI" in path:
        new_overall["MSTI"].append(overall[path])
    elif "MCR" in path: 
        new_overall["MCR"].append(overall[path])
    else:
        print(path)
    

In [10]:
overall_u = []
for program, grades in new_overall.items(): 
    overall_program = grades[0]
    for grade in grades[1:]:
        #print(grade)
        overall_program = overall_program.add(grade,fill_value=0)
        overall_u.append(overall_program)
    overall_program.to_csv(f"course_grades/{program}/overall_grades.csv")

In [11]:
over = overall_u[0]
for o in overall_u: 
    over = over.add(o,fill_value=0)
over.to_csv(f"course_grades/overall_grades.csv")

In [12]:
over

Unnamed: 0,A,A-,B,B-,C,C+,F
0,1984.0,2340.0,7658,2228.0,1250.0,736.0,71.0
