In [None]:
from dataclasses import dataclass, field
from typing import List, Dict, Optional


@dataclass
class LogParser:
    """
    A class for parsing chronological logs and extracting information.
    Handles both assignment info and question-level details.
    """

    log_lines: List[str]
    week_tag: Optional[str] = None
    student_info: Dict[str, str] = field(default_factory=dict)
    assignments: Dict[str, Dict] = field(default_factory=dict)

    def parse_logs(self):
        """
        Main method to parse logs and populate student_info and assignments.
        """
        unique_students = set()

        self._find_all_questions()

        for line in reversed(
            self.log_lines
        ):  # Process in reverse to get the most recent entries first
            if self._is_student_info(line):
                self._process_student_info(line, unique_students)
            elif "total-points" in line:
                self._process_assignment_header(line)

        # process assignment entries after all headers have been processed
        for line in reversed(self.log_lines):
            if any(item in line for item in self.all_questions) and "total-points" not in line:
                self._process_assignment_entry(line)

    def _find_all_questions(self):
        """
        Finds all questions in the log_lines and returns a list of them.
        """
        questions = []
        for line in self.log_lines:
            if self.week_tag in line:
                parts = line.split(",")
                question_tag = parts[3].strip()
                if question_tag not in questions:
                    questions.append(question_tag)
        self.all_questions = questions

    def _is_student_info(self, line: str) -> bool:
        """
        Checks if the line contains student information.
        """
        return line.startswith("Student Info")

    def _process_student_info(self, line: str, unique_students: set):
        """
        Processes a line containing student information.
        Raises an error if multiple unique students are found.
        """
        parts = line.split(", ")
        # Example: "Student Info, 790, jovyan, 2024-12-27 19:40:10"
        student_name = parts[2].strip()
        unique_students.add(student_name)

        if len(unique_students) > 1:
            raise ValueError(
                f"Error: Multiple unique student names found: {unique_students}"
            )

        # Only set student_info once
        if not self.student_info:
            self.student_info = {
                "student_id": parts[1].strip(),
                "username": student_name,
                "timestamp": parts[3].strip(),
            }

    def _process_assignment_header(self, line: str):
        parts = line.split(",")
        assignment_tag = parts[0].strip()
        if assignment_tag.startswith("total-points"):
            # Handle total-points lines as assignment info
            total_points_value = self._extract_total_points(parts)
            timestamp = parts[-1].strip()
            notebook_name = parts[3].strip()

            if notebook_name not in self.assignments:
                self.assignments[notebook_name] = {
                    "max_points": total_points_value,
                    "notebook": notebook_name,
                    "assignment": self.week_tag,
                    "total_score": 0.0,
                    "latest_timestamp": timestamp,
                    "questions": {},  # Ensure 'questions' key is initialized
                }
            elif self.assignments[notebook_name]["latest_timestamp"] < timestamp:
                self.assignments[notebook_name]["max_points"] = total_points_value
                self.assignments[notebook_name]["latest_timestamp"] = timestamp

    def _process_assignment_entry(self, line: str):
        """
        Processes a line containing an assignment entry.
        Adds it to the assignments dictionary.
        """
        parts = line.split(",")
        assignment_tag = parts[0].strip()
        question_tag = parts[1].strip()
        score_earned = float(parts[2].strip()) if len(parts) > 2 else 0.0
        score_possible = float(parts[3].strip()) if len(parts) > 3 else 0.0
        timestamp = parts[-1].strip()

        # Ensure assignment entry exists
        if assignment_tag not in self.assignments:
            self.assignments[assignment_tag] = {
                "questions": {},
                "total_score": 0.0,
                "latest_timestamp": timestamp,
            }

        # Add or update the question with the most recent timestamp
        questions = self.assignments[assignment_tag]["questions"]
        if (
            question_tag not in questions
            or timestamp > questions[question_tag]["timestamp"]
        ):
            questions[question_tag] = {
                "score_earned": score_earned,
                "score_possible": score_possible,
                "timestamp": timestamp,
            }

        # Update the latest timestamp if this one is more recent
        if timestamp > self.assignments[assignment_tag]["latest_timestamp"]:
            self.assignments[assignment_tag]["latest_timestamp"] = timestamp

    def _extract_total_points(self, parts: List[str]) -> Optional[float]:
        """
        Extracts the total-points value from the parts array of a total-points line.
        """
        try:
            return float(parts[1].strip())
        except (ValueError, IndexError):
            return None

    def calculate_total_scores(self):
        """
        Calculates total scores for each assignment by summing the 'score_earned'
        of its questions, and sets 'total_points' if it was not specified.
        """
        for assignment, data in self.assignments.items():
            # Sum of all question score_earned
            total_score = sum(q["score_earned"] for q in data["questions"].values())
            data["total_score"] = total_score

    def get_results(self) -> Dict[str, Dict]:
        """
        Returns the parsed results as a hierarchical dictionary with three sections:
        """
        return {
            "student_information": self.student_info,
            "assignment_information": {
                assignment: {
                    "latest_timestamp": data["latest_timestamp"],
                    "total_score": data["total_score"],
                    "max_points": data.get("max_points", 0.0),
                }
                for assignment, data in self.assignments.items()
            },
            "assignment_scores": {
                assignment: {
                    "questions": data["questions"],
                    "total_score": data["total_score"],
                }
                for assignment, data in self.assignments.items()
            },
        }


# ----------------- Example usage -----------------

if __name__ == "__main__":
    log_lines = [
        "Student Info, 449, jovyan, 2024-12-27 20:55:12",
        "total-points, 4.0, week1-readings, 17_operators_q, 2024-12-27 20:55:23",
        "17_operators_q, question-operators-mario-dining-1, 0, 0.5, 2024-12-27 20:55:23",
        "17_operators_q, question-operators-mario-dining-2, 0, 0.5, 2024-12-27 20:55:23",
        "17_operators_q, question-operators-mario-dining-3, 0, 0.5, 2024-12-27 20:55:23",
        "17_operators_q, question-operators-mario-dining-4, 0, 0.5, 2024-12-27 20:55:23",
        "17_operators_q, question-operators-mario-dining-5, 0, 1.0, 2024-12-27 20:55:23",
        "total-points, 4.0, week1-readings, 17_operators_q, 2024-12-27 20:55:42",
        "17_operators_q, question-operators-mario-dining-1, 0.5, 0.5, 2024-12-27 20:55:42",
        "17_operators_q, question-operators-mario-dining-2, 0.5, 0.5, 2024-12-27 20:55:42",
        "17_operators_q, question-operators-mario-dining-3, 0.5, 0.5, 2024-12-27 20:55:42",
        "17_operators_q, question-operators-mario-dining-4, 0.5, 0.5, 2024-12-27 20:55:42",
        "17_operators_q, question-operators-mario-dining-5, 1.0, 1.0, 2024-12-27 20:55:42",
        "total-points, 2.0, week1-readings, 17_operators_q, 2024-12-27 20:55:47",
        "17_operators_q, question-operators-mario-dining-1, 0.5, 0.5, 2024-12-27 20:55:47",
        "17_operators_q, question-operators-mario-dining-2, 0, 0.5, 2024-12-27 20:55:47",
        "17_operators_q, question-operators-mario-dining-3, 0.5, 0.5, 2024-12-27 20:55:47",
        "17_operators_q, question-operators-mario-dining-4, 0, 0.5, 2024-12-27 20:55:47",
        "17_operators_q, question-operators-mario-dining-5, 0, 1.0, 2024-12-27 20:55:47",
        "19_operators_q, question-operators-mario-dining-3, 0.5, 0.5, 2024-12-27 20:55:47",
    ]

    parser = LogParser(log_lines=log_lines, week_tag="week1-readings")
    parser.parse_logs()
    parser.calculate_total_scores()
    results = parser.get_results()

    print("Student Information:")
    print(results["student_information"])

    print("\nAssignment Information:")
    for assignment, info in results["assignment_information"].items():
        print(f"\nAssignment Tag: {assignment}")
        print(f"Latest Timestamp: {info['latest_timestamp']}")
        print(f"Total Score: {info['total_score']}")
        print(f"Max Points: {info['max_points']}")

    print("\nAssignment Scores:")
    for assignment, score_info in results["assignment_scores"].items():
        print(f"\nAssignment Tag: {assignment}")
        print(f"Total Score Earned: {score_info['total_score']}")
        print("Questions:")
        for q_tag, q_data in score_info["questions"].items():
            print(f"  {q_tag}:")
            print(f"    score_earned: {q_data['score_earned']}")
            print(f"    score_possible: {q_data['score_possible']}")
            print(f"    timestamp: {q_data['timestamp']}")

In [None]:
log_lines = [
    # Student Info
    "Student Info, 449, jovyan, 2024-12-27 20:55:12",
    # Week 1 Assignment: 17_operators_q
    "total-points, 3.0, week1-readings, 17_operators_q, 2024-12-27 20:55:23",
    "17_operators_q, question-operators-mario-dining-1, 0, 0.5, 2024-12-27 20:55:23",
    "17_operators_q, question-operators-mario-dining-2, 0.5, 0.5, 2024-12-27 20:55:23",
    "17_operators_q, question-operators-mario-dining-3, 0.5, 0.5, 2024-12-27 20:55:23",
    "17_operators_q, question-operators-mario-dining-4, 0.5, 0.5, 2024-12-27 20:55:23",
    "17_operators_q, question-operators-mario-dining-5, 1.0, 1.0, 2024-12-27 20:55:23",
    # Week 1 Assignment: 18_advanced_q
    "total-points, 4.0, week1-readings, 18_advanced_q, 2024-12-27 20:56:00",
    "18_advanced_q, question-advanced-problem-1, 1.0, 1.0, 2024-12-27 20:56:00",
    "18_advanced_q, question-advanced-problem-2, 1.0, 1.0, 2024-12-27 20:56:00",
    "18_advanced_q, question-advanced-problem-3, 0.5, 1.0, 2024-12-27 20:56:00",
    "18_advanced_q, question-advanced-problem-4, 0.5, 1.0, 2024-12-27 20:56:00",
    # Week 2 Assignment: 19_concepts_q
    "total-points, 5.0, week2-concepts, 19_concepts_q, 2024-12-28 20:57:00",
    "19_concepts_q, question-concepts-basic-1, 0.5, 1.0, 2024-12-28 20:57:00",
    "19_concepts_q, question-concepts-basic-2, 0.5, 1.0, 2024-12-28 20:57:00",
    "19_concepts_q, question-concepts-basic-3, 0.5, 1.0, 2024-12-28 20:57:00",
    "19_concepts_q, question-concepts-basic-4, 0.5, 1.0, 2024-12-28 20:57:00",
    "19_concepts_q, question-concepts-basic-5, 1.0, 1.0, 2024-12-28 20:57:00",
]


parser = LogParser(log_lines=log_lines, week_tag="week1-readings")
parser.parse_logs()
parser.calculate_total_scores()
results = parser.get_results()

results


In [None]:
log_lines = [
    # Student Info
    "Student Info, 550, jovyan2, 2024-12-27 20:55:12",
    # Week 1 Assignment: 17_operators_q (Initial Attempt)
    "total-points, 3.0, week1-readings,17_operators_q, 2024-12-27 20:55:23",
    "17_operators_q, question-operators-mario-dining-1, 0, 0.5, 2024-12-27 20:55:23",
    "17_operators_q, question-operators-mario-dining-2, 0, 0.5, 2024-12-27 20:55:23",
    "17_operators_q, question-operators-mario-dining-3, 0.5, 0.5, 2024-12-27 20:55:23",
    "17_operators_q, question-operators-mario-dining-4, 0, 0.5, 2024-12-27 20:55:23",
    "17_operators_q, question-operators-mario-dining-5, 0, 1.0, 2024-12-27 20:55:23",
    # Week 1 Assignment: 17_operators_q (Re-attempt)
    "total-points, 3.0, week1-readings,17_operators_q, 2024-12-27 21:00:00",
    "17_operators_q, question-operators-mario-dining-1, 0.5, 0.5, 2024-12-27 21:00:00",
    "17_operators_q, question-operators-mario-dining-2, 0.5, 0.5, 2024-12-27 21:00:00",
    "17_operators_q, question-operators-mario-dining-4, 0.5, 0.5, 2024-12-27 21:00:00",
    # Week 1 Assignment: 18_challenging_q
    "total-points, 5.0, week1-readings,18_challenging_q, 2024-12-27 21:05:00",
    "18_challenging_q, question-challenging-problem-1, 1.0, 1.0, 2024-12-27 21:05:00",
    "18_challenging_q, question-challenging-problem-2, 0.5, 1.0, 2024-12-27 21:05:00",
    "18_challenging_q, question-challenging-problem-3, 1.0, 1.0, 2024-12-27 21:05:00",
    "18_challenging_q, question-challenging-problem-4, 1.0, 1.0, 2024-12-27 21:05:00",
    "18_challenging_q, question-challenging-problem-5, 0, 1.0, 2024-12-27 21:05:00",
]

parser = LogParser(log_lines=log_lines, week_tag="week1-readings")
parser.parse_logs()
parser.calculate_total_scores()
results = parser.get_results()

results

In [None]:
log_lines = [
    # Student Info
    "Student Info, 660, jovyan3, 2024-12-27 20:55:12",
    # Week 1 Assignment: skipped some questions
    "total-points, 4.0, week1-readings,17_operators_q, 2024-12-27 20:55:23",
    "17_operators_q, question-operators-mario-dining-1, 0.5, 0.5, 2024-12-27 20:55:23",
    "17_operators_q, question-operators-mario-dining-3, 0.5, 0.5, 2024-12-27 20:55:23",
    "17_operators_q, question-operators-mario-dining-5, 1.0, 1.0, 2024-12-27 20:55:23",
    "total-points, 4.0, week1-readings,18_operators_q, 2024-12-27 20:55:23",
    # Week 2 Assignment: all questions attempted
    "total-points, 5.0, week2-math,20_math_q, 2024-12-28 20:55:23",
    "20_math_q, question-math-basic-1, 1.0, 1.0, 2024-12-28 20:55:23",
    "20_math_q, question-math-basic-2, 0.5, 1.0, 2024-12-28 20:55:23",
    "20_math_q, question-math-basic-3, 0.5, 1.0, 2024-12-28 20:55:23",
    "20_math_q, question-math-basic-4, 1.0, 1.0, 2024-12-28 20:55:23",
    "20_math_q, question-math-basic-5, 0.5, 1.0, 2024-12-28 20:55:23",
    # Week 3 Assignment: some skipped, partial scores
    "total-points, 4.0, week3-concepts,21_concepts_q, 2024-12-29 20:55:23",
    "21_concepts_q, question-concepts-basic-1, 0.5, 1.0, 2024-12-29 20:55:23",
    "21_concepts_q, question-concepts-basic-2, 0.5, 1.0, 2024-12-29 20:55:23",
]

parser = LogParser(log_lines=log_lines, week_tag="week1-readings")
parser.parse_logs()
parser.calculate_total_scores()
results = parser.get_results()

results