In [1]:
import nbformat
import re
import textwrap
from typing import Dict, List, Tuple, Any
from pathlib import Path

def print_table(section_title: str, rows: List[Tuple[str, str, float, str]]):
    """
    Print a formatted table for a section.
    
    Args:
        section_title (str): Title of the section to be displayed
        rows (List[Tuple]): List of tuples containing (Detail, Marking, Mark, CodeDetail)
    
    Each row is formatted with wrapped text and proper alignment.
    """
    try:
        # Define fixed column widths
        col_detail = 60
        col_marking = 12
        col_mark = 8

        # Prepare header and separator lines
        header = f"| {'Detail'.ljust(col_detail)} | {'Marking'.ljust(col_marking)} | {'Mark'.ljust(col_mark)} |"
        separator = f"|{'-'*(col_detail+2)}|{'-'*(col_marking+2)}|{'-'*(col_mark+2)}|"

        print(f"\n--- {section_title.upper()} DETAILS ---")
        print(header)
        print(separator)
        
        for detail, marking, mark, code_detail in rows:
            mark_sym = "✓" if marking == "Found" else ""
            detail_lines = textwrap.wrap(detail, width=col_detail) or [""]
            marking_lines = textwrap.wrap(marking, width=col_marking) or [""]
            mark_lines = textwrap.wrap(str(mark), width=col_mark) or [""]
            code_lines = textwrap.wrap(code_detail, width=col_detail) if code_detail.strip() else []
            
            max_lines = max(len(detail_lines), len(marking_lines), len(mark_lines))
            
            for i in range(max_lines):
                d_line = detail_lines[i] if i < len(detail_lines) else ""
                m_line = mark_sym.ljust(col_marking) if i == 0 else "".ljust(col_marking)
                mark_line = mark_lines[i] if i < len(mark_lines) else ""
                print(f"| {d_line.ljust(col_detail)} | {m_line.ljust(col_marking)} | {mark_line.ljust(col_mark)} |")
            
            for line in code_lines:
                print(f"| {'  ' + line.ljust(col_detail - 2)} | {' '.ljust(col_marking)} | {' '.ljust(col_mark)} |")
            print(separator)
    except Exception as e:
        print(f"Error printing table: {str(e)}")

class NotebookGrader:
    """
    A class to grade Jupyter notebooks based on specific criteria.
    """
    
    def __init__(self):
        """Initialize the grader with default values and required imports."""
        self.total_score = 0
        self.feedback = []
        self.required_imports = ['numpy', 'pandas', 'matplotlib.pyplot', 'statsmodels.api']
        self.details: Dict[str, List[Tuple[str, str, float, str]]] = {}
        self.import_aliases = {
            'numpy': ['np'],
            'pandas': ['pd'],
            'matplotlib.pyplot': ['plt'],
            'statsmodels.api': ['sm']
        }

    def check_import_with_aliases(self, source: str, module: str) -> bool:
        """
        Check if a module is imported, including common aliases.
        
        Args:
            source (str): Source code to check
            module (str): Module name to look for
        
        Returns:
            bool: True if module or its alias is found
        """
        patterns = [module] + self.import_aliases.get(module, [])
        for pattern in patterns:
            if re.search(rf"^\s*(import|from)\s+{re.escape(pattern)}(\W|$)", source, re.MULTILINE | re.IGNORECASE):
                return True
        return False

    def grade_imports(self, cells: List) -> int:
        """Grade the import section of the notebook."""
        try:
            score = 0
            found_imports = {}
            
            for req in self.required_imports:
                code_detail = ""
                for cell in cells:
                    if cell['cell_type'] == 'code':
                        if self.check_import_with_aliases(cell['source'], req):
                            code_detail = next(line.strip() for line in cell['source'].splitlines() 
                                             if self.check_import_with_aliases(line, req))
                            break
                if code_detail:
                    found_imports[req] = code_detail

            rows = []
            marks_per_import = 10 / len(self.required_imports)
            for req in self.required_imports:
                if req in found_imports:
                    rows.append((f"Found import: {req}", "Found", marks_per_import, found_imports[req]))
                else:
                    rows.append((f"Missing import: {req}", "Missing", 0, ""))
        
            score = sum(mark for _, _, mark, _ in rows)
            self.feedback.append("✓ All required imports present" if score == 10 
                               else f"✗ Missing imports: {', '.join(req for req in self.required_imports if req not in found_imports)}")
            self.details['Imports'] = rows
            return int(score)
        except Exception as e:
            self.feedback.append(f"Error grading imports: {str(e)}")
            return 0

    def grade_data_loading(self, cells: List) -> int:
        score = 0
        rows = []
        found = False
        # Loop over cells to check for pd.read_csv.
        for i, cell in enumerate(cells):
            if cell['cell_type'] == 'code':
                detail = f"Checking for pd.read_csv in cell {i+1}"
                if "pd.read_csv" in cell['source']:
                    found = True
                    code_detail = cell['source'].strip()
                    rows.append((detail, "Found", 15, code_detail))
                    break  # Stop after first found instance.
                else:
                    rows.append((detail, "Missing", 0, ""))
    
        if found:
            score = 15
            overall_feedback = "✓ Data loading implemented correctly"
        else:
            overall_feedback = "✗ Missing or incorrect data loading"
    
        self.feedback.append(overall_feedback)
        self.details['Data Loading'] = rows
        return score

    def grade_simple_linear_regression(self, cells: List) -> int:
        score = 0
        required_elements = {'X1': None, 'X2': None, 'X3': None, 'X4': None, 'X5': None}
        rows = []
    
        # For each variable, search for the corresponding regression model code.
        for cell in cells:
            if cell['cell_type'] == 'code':
                source = cell['source']
                for var in required_elements.keys():
                    pattern = re.compile(rf"lr_model_{var}\s*=\s*sm\.OLS\(", re.IGNORECASE)
                    if pattern.search(source) and required_elements[var] is None:
                        required_elements[var] = source.strip()
    
        implemented_count = sum(1 for v in required_elements.values() if v is not None)
        score = int((implemented_count / 5) * 25)
    
        for var, code_detail in required_elements.items():
            if code_detail:
                rows.append((f"Found linear regression model for {var}", "Found", 25/5, code_detail))
            else:
                rows.append((f"Missing linear regression model for {var}", "Missing", 0, ""))
    
        if implemented_count == 5:
            overall_feedback = "✓ All simple linear regressions implemented correctly"
        else:
            overall_feedback = "✗ Missing some linear regression implementations"
    
        self.feedback.append(overall_feedback)
        self.details['Simple Linear Regression'] = rows
        return score

    def grade_scatter_plots(self, cells: List) -> int:
        score = 0
        plot_count = 0
        rows = []
        # Check each cell for both plt.scatter and plt.plot.
        for i, cell in enumerate(cells):
            if cell['cell_type'] == 'code':
                detail = f"Checking scatter plot in cell {i+1}"
                if 'plt.scatter' in cell['source'] and 'plt.plot' in cell['source']:
                    plot_count += 1
                    code_detail = cell['source'].strip()
                    rows.append((detail, "Found", 7, code_detail))
                else:
                    rows.append((detail, "Missing", 0, ""))
    
        score = min(plot_count * 7, 20)
        if score == 20:
            overall_feedback = "✓ All scatter plots implemented correctly"
        else:
            overall_feedback = "✗ Missing or incomplete scatter plots"
    
        self.feedback.append(overall_feedback)
        self.details['Scatter Plots'] = rows
        return score

    def grade_multiple_regression(self, cells: List) -> int:
        score = 0
        found_multiple = None
        found_prediction = None
        rows = []
    
        # Search cells for multiple regression and prediction.
        for i, cell in enumerate(cells):
            if cell['cell_type'] == 'code':
                source = cell['source']
                if (not found_multiple and 
                    re.search(r"(linear|lr|regression)_model_(MultipleR|MultiR|Multiple_Regression)\s*=\s*sm\.OLS\(", 
                              source, re.IGNORECASE)):
                    found_multiple = source.strip()
                    rows.append((f"Checking for multiple regression in cell {i+1}", "Found", 10, found_multiple))
                if (not found_prediction and 'Performance=' in source):
                    found_prediction = source.strip()
                    rows.append((f"Checking for prediction in cell {i+1}", "Found", 10, found_prediction))
    
        if found_multiple and found_prediction:
            score = 20
            overall_feedback = "✓ Multiple regression implemented correctly"
        else:
            overall_feedback = "✗ Issues with multiple regression implementation"
    
        self.feedback.append(overall_feedback)
        self.details['Multiple Regression'] = rows
        return score

    def grade_reasoning(self, cells: List) -> int:
        score = 0
        rows = []
        reasoning_keywords = ["difference", "close", "error", "r-square"]
        required_values = ["80.28", "76"]
        found_values = False
        found_reasoning = False
    
        for i, cell in enumerate(cells):
            if cell['cell_type'] == 'markdown':
                source_lower = cell['source'].lower()
                detail = f"Checking reasoning in markdown cell {i+1}"
                if all(val in source_lower for val in required_values):
                    found_values = True
                    rows.append((f"{detail}: Found required values", "Found", 5, cell['source'].strip()))
                else:
                    rows.append((f"{detail}: Required values not found", "Missing", 0, ""))
    
                if found_values and any(keyword in source_lower for keyword in reasoning_keywords):
                    found_reasoning = True
                    rows.append((f"{detail}: Found reasoning keywords", "Found", 5, cell['source'].strip()))
                    break
    
        if found_reasoning:
            score = 10
            overall_feedback = "✓ Reasoning for actual vs predicted values provided"
        else:
            overall_feedback = "✗ Missing or incomplete reasoning for actual vs predicted values"
    
        self.feedback.append(overall_feedback)
        self.details['Reasoning'] = rows
        return score

    def grade_notebook(self, notebook_path: str) -> Dict[str, Any]:
        """
        Grade a Jupyter notebook file.
        
        Args:
            notebook_path (str): Path to the notebook file
        
        Returns:
            Dict: Grading results including scores and feedback
        """
        try:
            notebook_path = Path(notebook_path)
            if not notebook_path.exists():
                raise FileNotFoundError(f"Notebook file not found: {notebook_path}")

            with open(notebook_path) as f:
                nb = nbformat.read(f, as_version=4)
            cells = nb['cells']
        
            scores = {
                'imports': self.grade_imports(cells),
                'data_loading': self.grade_data_loading(cells),
                'simple_linear_regression': self.grade_simple_linear_regression(cells),
                'scatter_plots': self.grade_scatter_plots(cells),
                'multiple_regression': self.grade_multiple_regression(cells),
                'reasoning': self.grade_reasoning(cells)
            }
        
            self.total_score = sum(scores.values())
        
            return {
                'total_score': self.total_score,
                'percentage': (self.total_score / 100) * 100,
                'component_scores': scores,
                'feedback': self.feedback,
                'details': self.details
            }
        except Exception as e:
            return {
                'total_score': 0,
                'percentage': 0,
                'component_scores': {},
                'feedback': [f"Error grading notebook: {str(e)}"],
                'details': {}
            }

def print_grading_report(results: Dict[str, Any]):
    """
    Print a formatted grading report.
    
    Args:
        results (Dict): Grading results from NotebookGrader
    """
    try:
        print("\n=== GRADING REPORT ===")
        print(f"Total Score: {results['total_score']}/100")
        print(f"Percentage: {results['percentage']}%\n")
        
        print("Component Scores:")
        for component, score in results['component_scores'].items():
            print(f"- {component}: {score}")
        
        print("\nFeedback:")
        for fb in results['feedback']:
            print(fb)
        
        print("\nDetailed Tables:")
        for section, rows in results['details'].items():
            print_table(section, rows)
    except Exception as e:
        print(f"Error printing grading report: {str(e)}")

# Usage example:
if __name__ == "__main__":
    try:
        grader = NotebookGrader()
        results = grader.grade_notebook("Quiz Answer_202504.ipynb")
        print_grading_report(results)
    except Exception as e:
        print(f"Error running grader: {str(e)}")



=== GRADING REPORT ===
Total Score: 100/100
Percentage: 100.0%

Component Scores:
- imports: 10
- data_loading: 15
- simple_linear_regression: 25
- scatter_plots: 20
- multiple_regression: 20
- reasoning: 10

Feedback:
✓ All required imports present
✓ Data loading implemented correctly
✓ All simple linear regressions implemented correctly
✓ All scatter plots implemented correctly
✓ Multiple regression implemented correctly
✓ Reasoning for actual vs predicted values provided

Detailed Tables:

--- IMPORTS DETAILS ---
| Detail                                                       | Marking      | Mark     |
|--------------------------------------------------------------|--------------|----------|
| Found import: numpy                                          | ✓            | 2.5      |
|   import numpy as np                                         |              |          |
|--------------------------------------------------------------|--------------|----------|
| Found import: pandas