In [1]:
import json
import os
import pandas as pd
from py_constants import *
from py_common import *
# %run py_common.py ## Common Methods
%run py_feature_extraction.ipynb ##Feature Extraction

### Extracting Student Features. (Only those features are checked which are present in teachers solution)

In [2]:
def extract_features(root, teacher_solution):
    # with open(json_file, 'r') as f:
    #     data = json.load(f)
    
    features = {}
    for key, function_name in FUNCTION_MAP.items():
        if key in teacher_solution:
            function = globals()[function_name]
            features[key] = function(root)
    
    return features

### Calculate Individual Student Scores

In [3]:
def calculate_individual_scores(teacher_solution, student_solution):
    individual_scores = {}
    total_score = 0
    
    for feature, teacher_count in teacher_solution.items():
        if feature in student_solution:
            student_count = student_solution[feature]
            # Calculate the score based on the difference between counts
            feature_score = max(0, min(1, 1 - abs(teacher_count - student_count) / teacher_count))
            # Penalize if the student's count exceeds the teacher's count
            if student_count > teacher_count:
                feature_score *= PENALTY_FACTOR  # Example penalty factor (adjust as needed)
            # Add the feature score to the total score
            total_score += feature_score
            # Store the individual score for the feature
            individual_scores[feature] = {
                "student_count": student_count,
                "teacher_count": teacher_count,
                "score": f"{student_count}/{teacher_count}",
                "feature_score": feature_score
            }
        else:
            # If the feature is not present in the student's solution
            individual_scores[feature] = {
                "student_count": 0,
                "teacher_count": teacher_count,
                "score": f"0/{teacher_count}",
                "feature_score": 0
            }
    
    # Calculate the final score percentage
    num_features = len(teacher_solution)
    final_score_percentage = total_score / num_features * 100 if num_features > 0 else 0
    return individual_scores, final_score_percentage

# Example usage:
# teacher_solution = {'NF': 4, 'NP': 4, 'NPt-s': 1, 'NAdd-s': 1, 'NAdd-B': 1, 'NAdd-A': 1}
# student_solution = {'NF': 4, 'NP': 7, 'NPt-s': 1, 'NAdd-s': 1, 'NAdd-B': 1, 'NAdd-A': 1}

# individual_scores, final_score_percentage = calculate_individual_scores(teacher_solution, student_solution)
# print("Individual scores:", individual_scores)
# print("Final score percentage:", final_score_percentage)

### Formatting individual result into excel row data 

In [4]:
def create_row_data(student_id, student_solution, final_score_percentage):
    row_data = {'Student_ID': student_id}
    for feature, score_data in student_solution.items():
        row_data[feature] = score_data
    row_data['Final_Score'] = str(round(final_score_percentage, 2)) + "%"
    return row_data

## Main Function

In [5]:
if __name__ == "__main__":

    assignment_number = 1  # INPUT ASSIGNMENT NUMBER
    file_name = "submission.cpp" # INPUT FILE NAME

    student_submissions_path = STUDENT_BASE_PATH.format(assignment_number)

    instructor_json = INSTRUCTOR_JSON_FILE.format(assignment_number)
    with open(instructor_json, 'r') as f:
        teacher_solution = json.load(f)
    
    columns=['Student_ID'] + list(teacher_solution.keys()) + ['Final_Score']
    results_df = pd.DataFrame(columns=columns)

    row_data_array = []
    row_data_array.append(create_row_data('Instructor Solution',teacher_solution, 100))

    for student_folder in os.listdir(student_submissions_path):
        student_id = student_folder  # Assuming student folder names are student IDs
        student_submission = STUDENT_SUBMISSION.format(assignment_number, student_folder, file_name)

        xml_output_file = STUDENT_XML_OUTPUT.format(assignment_number, student_folder,file_name)
        
        generate_ast(student_submission, xml_output_file)

        root =  parse_xml(xml_output_file)

        student_solution = extract_features(root, teacher_solution)
        individual_scores, final_score_percentage = calculate_individual_scores(teacher_solution, student_solution)

        print(student_id, individual_scores, final_score_percentage)

        row_data = create_row_data(student_id, student_solution, final_score_percentage)

        row_data_array.append(row_data)

    results_df = pd.DataFrame(row_data_array, columns=columns)
    result_excel_file = RESULTS_ASSIGNMENT.format(assignment_number)
    results_df.to_excel(result_excel_file, index=False)

101 {'NF': {'student_count': 3, 'teacher_count': 3, 'score': '3/3', 'feature_score': 1}, 'FLC': {'student_count': 18.25, 'teacher_count': 15.75, 'score': '18.25/15.75', 'feature_score': 0.7992063492063491}, 'NP': {'student_count': 3, 'teacher_count': 3, 'score': '3/3', 'feature_score': 1}, 'NC': {'student_count': 3, 'teacher_count': 4, 'score': '3/4', 'feature_score': 0.75}, 'NR': {'student_count': 6, 'teacher_count': 6, 'score': '6/6', 'feature_score': 1}, 'NFRec': {'student_count': 0, 'teacher_count': 1, 'score': '0/1', 'feature_score': 0}, 'NCRec': {'student_count': 0, 'teacher_count': 1, 'score': '0/1', 'feature_score': 0}, 'NIFPar': {'student_count': 0, 'teacher_count': 1, 'score': '0/1', 'feature_score': 0}, 'NRRec': {'student_count': 0, 'teacher_count': 1, 'score': '0/1', 'feature_score': 0}, 'NRNRec': {'student_count': 0, 'teacher_count': 1, 'score': '0/1', 'feature_score': 0}} 45.49206349206349
102 {'NF': {'student_count': 3, 'teacher_count': 3, 'score': '3/3', 'feature_score'