In [5]:
!pip install reportlab google-generativeai python-dotenv matplotlib seaborn pandas reportlab

Collecting reportlab
  Downloading reportlab-4.4.1-py3-none-any.whl.metadata (1.8 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Downloading reportlab-4.4.1-py3-none-any.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m25.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading python_dotenv-1.1.0-py3-none-any.whl (20 kB)
Installing collected packages: reportlab, python-dotenv
Successfully installed python-dotenv-1.1.0 reportlab-4.4.1


In [12]:
import json
from collections import defaultdict

# --- Load JSON Data ---
def load_json_data(file_path):
    try:
        with open(file_path, 'r') as file:
            return json.load(file)
    except Exception as e:
        print(f"Error loading JSON: {e}")
        return None

# --- Process Data ---
def process_data(json_data):
    if not json_data or not isinstance(json_data, list):
        print("Invalid JSON data")
        return None

    # Initialize data structure
    processed_data = {
        "overall_summary": {},
        "subject_summary": defaultdict(dict),
        "chapter_details": defaultdict(dict)
    }
    chapter_stats = defaultdict(lambda: {
        "questions_total": 0,
        "answered": 0,
        "correct": 0,
        "incorrect": 0,
        "marked_review": 0,
        "not_answered": 0,
        "total_time_seconds": 0,
        "difficulty_counts": defaultdict(int)
    })

    # Initialize concept stats per chapter
    concept_stats = defaultdict(lambda: defaultdict(lambda: {"total": 0, "correct": 0, "incorrect": 0}))

    # Map sections to subjects
    section_subject_map = {
        "Physics Single Correct": "Physics",
        "Physics Numerical": "Physics",
        "Chemistry Single Correct": "Chemistry",
        "Chemistry Numerical": "Chemistry",
        "Mathematics Single Correct": "Mathematics",
        "Mathematics Numerical": "Mathematics"
    }

    # Subject ID to name mapping
    subject_map = {
        "607018ee404ae53194e73d92": "Physics",
        "607018ee404ae53194e73d90": "Chemistry",
        "607018ee404ae53194e73d91": "Mathematics"
    }

    # Debug: Track questions per chapter
    debug_counts = defaultdict(list)

    # Process overall summary
    data = json_data[0]
    processed_data["overall_summary"] = {
        "total_marks_scored": data.get("totalMarkScored", 0),
        "total_marks_possible": data.get("totalMarks", 300),
        "total_time_taken_seconds": data.get("totalTimeTaken", 0),
        "total_questions_in_test": data.get("test", {}).get("totalQuestions", 0),
        "final_attempted": data.get("totalAttempted", 0),
        "final_correct": data.get("totalCorrect", 0),
        "overall_accuracy_percent": data.get("accuracy", 0),
        "time_taken_minutes": data.get("totalTimeTaken", 0) / 60.0
    }

    # Process subject summary
    for subject in data.get("subjects", []):
        subject_id = subject.get("subjectId", {}).get("$oid", "")
        subject_name = subject_map.get(subject_id, "Unknown")
        processed_data["subject_summary"][subject_name] = {
            "marks_scored": subject.get("totalMarkScored", 0),
            "total_marks_possible": subject.get("totalMarks", 100),  # Fixed: Use 100 as default
            "time_seconds": subject.get("totalTimeTaken", 0),
            "attempted": subject.get("totalAttempted", 0),
            "correct": subject.get("totalCorrect", 0),
            "incorrect": subject.get("totalAttempted", 0) - subject.get("totalCorrect", 0),
            "accuracy_percent": subject.get("accuracy", 0),
            "avg_time_per_attempted_q_seconds": (subject.get("totalTimeTaken", 0) / subject.get("totalAttempted", 0)) if subject.get("totalAttempted", 0) > 0 else 0
        }

    # Process sections and questions
    for section in data.get("sections", []):
        section_title = section.get("sectionId", {}).get("title", "")
        subject = section_subject_map.get(section_title, "Unknown")

        for question in section.get("questions", []):
            chapter = question.get("questionId", {}).get("chapters", [{}])[0].get("title", "Unknown")
            status = question.get("status", "notAnswered")
            time_taken = question.get("timeTaken", 0)
            level = question.get("questionId", {}).get("level", "unknown")
            concepts = [concept.get("title", "Unknown") for concept in question.get("questionId", {}).get("concepts", [])]

            # Filter for specified chapters
            if (subject == "Physics" and chapter not in ["Electrostatics", "Capacitance"]) or \
               (subject == "Chemistry" and chapter not in ["Solutions", "Electrochemistry"]) or \
               (subject == "Mathematics" and chapter not in ["Functions", "Sets and Relations"]):
                continue

            # Update chapter stats
            chapter_stats[(subject, chapter)]["questions_total"] += 1
            chapter_stats[(subject, chapter)]["total_time_seconds"] += time_taken
            chapter_stats[(subject, chapter)]["difficulty_counts"][level] += 1

            # Debug: Log question details
            debug_info = {
                "status": status,
                "time_taken": time_taken,
                "level": level,
                "concepts": concepts
            }

            if status == "answered":
                chapter_stats[(subject, chapter)]["answered"] += 1
                # Check correctness
                is_correct = False
                marked_options = question.get("markedOptions", [])
                input_value = question.get("inputValue", {})

                if marked_options:
                    is_correct = any(opt.get("isCorrect", False) for opt in marked_options)
                elif input_value.get("value") is not None:
                    is_correct = input_value.get("isCorrect", False)

                if is_correct:
                    chapter_stats[(subject, chapter)]["correct"] += 1
                    debug_info["correct"] = True
                else:
                    chapter_stats[(subject, chapter)]["incorrect"] += 1
                    debug_info["correct"] = False

                # Update concept stats
                for concept in concepts:
                    concept_stats[(subject, chapter)][concept]["total"] += 1
                    if is_correct:
                        concept_stats[(subject, chapter)][concept]["correct"] += 1
                    else:
                        concept_stats[(subject, chapter)][concept]["incorrect"] += 1
            elif status == "markedReview":
                chapter_stats[(subject, chapter)]["marked_review"] += 1
            elif status == "notAnswered":
                chapter_stats[(subject, chapter)]["not_answered"] += 1

            debug_counts[(subject, chapter)].append(debug_info)

    # Calculate chapter stats
    for (subject, chapter), stats in chapter_stats.items():
        stats["accuracy_on_answered_percent"] = (stats["correct"] / stats["answered"] * 100) if stats["answered"] > 0 else 0.0
        stats["avg_time_per_answered_q_seconds"] = (stats["total_time_seconds"] / stats["answered"]) if stats["answered"] > 0 else 0.0
        processed_data["chapter_details"][subject][chapter] = stats

    # Calculate total questions per subject
    subject_questions = defaultdict(int)
    for subject in processed_data["chapter_details"]:
        subject_questions[subject] = sum(chapter["questions_total"] for chapter in processed_data["chapter_details"][subject].values())
        processed_data["subject_summary"][subject]["total_questions"] = subject_questions[subject]

    # Calculate total questions in paper
    processed_data["overall_summary"]["total_questions_calculated"] = sum(subject_questions.values())

    # Debug: Print question counts
    print("\n=== Debug: Question Counts per Chapter ===")
    for (subject, chapter), questions in debug_counts.items():
        print(f"\n{subject} - {chapter}:")
        for i, q in enumerate(questions, 1):
            print(f"  Q{i}: Status={q['status']}, Time={q['time_taken']}s, Level={q['level']}, Correct={q.get('correct', 'N/A')}, Concepts={q['concepts']}")

    # Print concept analysis
    print("\n=== Concept Analysis for All Chapters ===")
    for (subject, chapter), concepts in concept_stats.items():
        print(f"\n{subject} - {chapter}:")
        for concept, stats in concepts.items():
            print(f"  {concept}:")
            print(f"    Total Questions: {stats['total']}")
            print(f"    Correct: {stats['correct']}")
            print(f"    Incorrect: {stats['incorrect']}")

    return processed_data

# --- Main Execution ---
file_path = "/content/sample_submission_analysis_1.json"
json_data = load_json_data(file_path)
if json_data:
    result = process_data(json_data)
    if result:
        import pprint
        print("\n=== Overall Summary ===")
        pprint.pprint(result["overall_summary"])
        print("\n=== Subject Summary ===")
        pprint.pprint(dict(result["subject_summary"]))
        print("\n=== Corrected Chapter Details ===")
        pprint.pprint(dict(result["chapter_details"]))
else:
    print("Failed to load JSON file")


=== Debug: Question Counts per Chapter ===

Physics - Capacitance:
  Q1: Status=answered, Time=25s, Level=medium, Correct=False, Concepts=['Multiple dielectric slabs in capacior']
  Q2: Status=markedReview, Time=173s, Level=easy, Correct=N/A, Concepts=['Series and Parallel Combinations of Capacitor']
  Q3: Status=answered, Time=91s, Level=medium, Correct=True, Concepts=['Charging of Capacitors']
  Q4: Status=answered, Time=57s, Level=easy, Correct=False, Concepts=['Force on plates of capacitor']
  Q5: Status=answered, Time=24s, Level=medium, Correct=True, Concepts=['Series and Parallel Combinations of Capacitor']
  Q6: Status=markedReview, Time=22s, Level=medium, Correct=N/A, Concepts=['Discharging of Capacitors']
  Q7: Status=answered, Time=22s, Level=tough, Correct=True, Concepts=['Induced charge on dielectric']
  Q8: Status=answered, Time=22s, Level=medium, Correct=True, Concepts=['Energy stored in capacitor']
  Q9: Status=answered, Time=44s, Level=easy, Correct=True, Concepts=['Se

In [13]:
import json
from collections import defaultdict

# --- Load JSON Data ---
def load_json_data(file_path):
    try:
        with open(file_path, 'r') as file:
            return json.load(file)
    except Exception as e:
        print(f"Error loading JSON: {e}")
        return None

# --- Process Data ---
def process_data(json_data):
    if not json_data or not isinstance(json_data, list):
        print("Invalid JSON data")
        return None, None, None

    # Initialize data structure
    processed_data = {
        "overall_summary": {},
        "subject_summary": defaultdict(dict),
        "chapter_details": defaultdict(dict)
    }
    chapter_stats = defaultdict(lambda: {
        "questions_total": 0,
        "answered": 0,
        "correct": 0,
        "incorrect": 0,
        "marked_review": 0,
        "not_answered": 0,
        "total_time_seconds": 0,
        "difficulty_counts": defaultdict(int)
    })

    # Initialize concept stats per chapter
    concept_stats = defaultdict(lambda: defaultdict(lambda: {"total": 0, "correct": 0, "incorrect": 0}))

    # Map sections to subjects
    section_subject_map = {
        "Physics Single Correct": "Physics",
        "Physics Numerical": "Physics",
        "Chemistry Single Correct": "Chemistry",
        "Chemistry Numerical": "Chemistry",
        "Mathematics Single Correct": "Mathematics",
        "Mathematics Numerical": "Mathematics"
    }

    # Subject ID to name mapping
    subject_map = {
        "607018ee404ae53194e73d92": "Physics",
        "607018ee404ae53194e73d90": "Chemistry",
        "607018ee404ae53194e73d91": "Mathematics"
    }

    # Debug: Track questions per chapter
    debug_counts = defaultdict(list)

    # Process overall summary
    data = json_data[0]
    processed_data["overall_summary"] = {
        "total_marks_scored": data.get("totalMarkScored", 0),
        "total_marks_possible": data.get("totalMarks", 300),
        "total_time_taken_seconds": data.get("totalTimeTaken", 0),
        "total_questions_in_test": data.get("test", {}).get("totalQuestions", 0),
        "final_attempted": data.get("totalAttempted", 0),
        "final_correct": data.get("totalCorrect", 0),
        "overall_accuracy_percent": data.get("accuracy", 0),
        "time_taken_minutes": data.get("totalTimeTaken", 0) / 60.0
    }

    # Process subject summary
    for subject in data.get("subjects", []):
        subject_id = subject.get("subjectId", {}).get("$oid", "")
        subject_name = subject_map.get(subject_id, "Unknown")
        processed_data["subject_summary"][subject_name] = {
            "marks_scored": subject.get("totalMarkScored", 0),
            "total_marks_possible": subject.get("totalMarks", 100),
            "time_seconds": subject.get("totalTimeTaken", 0),
            "attempted": subject.get("totalAttempted", 0),
            "correct": subject.get("totalCorrect", 0),
            "incorrect": subject.get("totalAttempted", 0) - subject.get("totalCorrect", 0),
            "accuracy_percent": subject.get("accuracy", 0),
            "avg_time_per_attempted_q_seconds": (subject.get("totalTimeTaken", 0) / subject.get("totalAttempted", 0)) if subject.get("totalAttempted", 0) > 0 else 0
        }

    # Process sections and questions
    for section in data.get("sections", []):
        section_title = section.get("sectionId", {}).get("title", "")
        subject = section_subject_map.get(section_title, "Unknown")

        for question in section.get("questions", []):
            chapter = question.get("questionId", {}).get("chapters", [{}])[0].get("title", "Unknown")
            status = question.get("status", "notAnswered")
            time_taken = question.get("timeTaken", 0)
            level = question.get("questionId", {}).get("level", "unknown")
            concepts = [concept.get("title", "Unknown") for concept in question.get("questionId", {}).get("concepts", [])]

            # Filter for specified chapters
            if (subject == "Physics" and chapter not in ["Electrostatics", "Capacitance"]) or \
               (subject == "Chemistry" and chapter not in ["Solutions", "Electrochemistry"]) or \
               (subject == "Mathematics" and chapter not in ["Functions", "Sets and Relations"]):
                continue

            # Update chapter stats
            chapter_stats[(subject, chapter)]["questions_total"] += 1
            chapter_stats[(subject, chapter)]["total_time_seconds"] += time_taken
            chapter_stats[(subject, chapter)]["difficulty_counts"][level] += 1

            # Debug: Log question details
            debug_info = {
                "status": status,
                "time_taken": time_taken,
                "level": level,
                "concepts": concepts,
                "subject": subject  # Include subject for subject-wise difficulty analysis
            }

            if status == "answered":
                chapter_stats[(subject, chapter)]["answered"] += 1
                # Check correctness
                is_correct = False
                marked_options = question.get("markedOptions", [])
                input_value = question.get("inputValue", {})

                if marked_options:
                    is_correct = any(opt.get("isCorrect", False) for opt in marked_options)
                elif input_value.get("value") is not None:
                    is_correct = input_value.get("isCorrect", False)

                if is_correct:
                    chapter_stats[(subject, chapter)]["correct"] += 1
                    debug_info["correct"] = True
                else:
                    chapter_stats[(subject, chapter)]["incorrect"] += 1
                    debug_info["correct"] = False

                # Update concept stats
                for concept in concepts:
                    concept_stats[(subject, chapter)][concept]["total"] += 1
                    if is_correct:
                        concept_stats[(subject, chapter)][concept]["correct"] += 1
                    else:
                        concept_stats[(subject, chapter)][concept]["incorrect"] += 1
            elif status == "markedReview":
                chapter_stats[(subject, chapter)]["marked_review"] += 1
            elif status == "notAnswered":
                chapter_stats[(subject, chapter)]["not_answered"] += 1

            debug_counts[(subject, chapter)].append(debug_info)

    # Calculate chapter stats
    for (subject, chapter), stats in chapter_stats.items():
        stats["accuracy_on_answered_percent"] = (stats["correct"] / stats["answered"] * 100) if stats["answered"] > 0 else 0.0
        stats["avg_time_per_answered_q_seconds"] = (stats["total_time_seconds"] / stats["answered"]) if stats["answered"] > 0 else 0.0
        processed_data["chapter_details"][subject][chapter] = stats

    # Calculate total questions per subject
    subject_questions = defaultdict(int)
    for subject in processed_data["chapter_details"]:
        subject_questions[subject] = sum(chapter["questions_total"] for chapter in processed_data["chapter_details"][subject].values())
        processed_data["subject_summary"][subject]["total_questions"] = subject_questions[subject]

    # Calculate total questions in paper
    processed_data["overall_summary"]["total_questions_calculated"] = sum(subject_questions.values())

    return processed_data, concept_stats, debug_counts

# --- Prepare Comprehensive LLM Context ---
def prepare_comprehensive_llm_context(processed_data, concept_stats, debug_counts):
    """
    Prepare comprehensive test data including detailed difficulty-wise breakdown
    """

    context = f"""# Test Performance Analysis Report

## Overall Test Summary
- **Total Score**: {processed_data['overall_summary']['total_marks_scored']}/{processed_data['overall_summary']['total_marks_possible']} marks
- **Questions Attempted**: {processed_data['overall_summary']['final_attempted']}/{processed_data['overall_summary']['total_questions_in_test']}
- **Correct Answers**: {processed_data['overall_summary']['final_correct']}
- **Overall Accuracy**: {processed_data['overall_summary']['overall_accuracy_percent']:.1f}%
- **Time Taken**: {processed_data['overall_summary']['time_taken_minutes']:.1f} minutes

## Subject-wise Performance

"""

    # Add subject summaries
    for subject, stats in processed_data['subject_summary'].items():
        context += f"""### {subject}
- Score: {stats['marks_scored']}/{stats['total_marks_possible']} marks
- Questions: {stats['attempted']}/{stats['total_questions']} attempted
- Correct: {stats['correct']} | Incorrect: {stats['incorrect']}
- Accuracy: {stats['accuracy_percent']:.1f}%
- Avg Time/Question: {stats['avg_time_per_attempted_q_seconds']:.1f} seconds

"""

    # Calculate overall difficulty-wise analysis
    context += "## Overall Difficulty-wise Analysis\n\n"

    difficulty_stats = {
        'easy': {'total': 0, 'answered': 0, 'correct': 0, 'incorrect': 0,
                 'marked_review': 0, 'not_answered': 0, 'total_time': 0},
        'medium': {'total': 0, 'answered': 0, 'correct': 0, 'incorrect': 0,
                   'marked_review': 0, 'not_answered': 0, 'total_time': 0},
        'tough': {'total': 0, 'answered': 0, 'correct': 0, 'incorrect': 0,
                  'marked_review': 0, 'not_answered': 0, 'total_time': 0}
    }

    # Process debug_counts for overall difficulty analysis
    for (subject, chapter), questions in debug_counts.items():
        for q in questions:
            level = q['level']
            if level in difficulty_stats:
                difficulty_stats[level]['total'] += 1
                difficulty_stats[level]['total_time'] += q['time_taken']

                if q['status'] == 'answered':
                    difficulty_stats[level]['answered'] += 1
                    if q.get('correct', False):
                        difficulty_stats[level]['correct'] += 1
                    else:
                        difficulty_stats[level]['incorrect'] += 1
                elif q['status'] == 'markedReview':
                    difficulty_stats[level]['marked_review'] += 1
                elif q['status'] == 'notAnswered':
                    difficulty_stats[level]['not_answered'] += 1

    # Display overall difficulty-wise stats
    for difficulty, stats in difficulty_stats.items():
        if stats['total'] > 0:
            accuracy = (stats['correct'] / stats['answered'] * 100) if stats['answered'] > 0 else 0
            avg_time = stats['total_time'] / stats['answered'] if stats['answered'] > 0 else 0

            context += f"""### {difficulty.capitalize()} Level Questions
- Total Questions: {stats['total']}
- Attempted: {stats['answered']} | Correct: {stats['correct']} | Incorrect: {stats['incorrect']}
- Not Attempted: {stats['not_answered']} | Marked for Review: {stats['marked_review']}
- Accuracy: {accuracy:.1f}%
- Average Time per Attempted Question: {avg_time:.1f} seconds
- Total Time Spent: {stats['total_time']} seconds

"""

    # Add subject-wise difficulty analysis
    context += "## Subject-wise Difficulty Analysis\n\n"

    # Initialize subject-wise difficulty stats
    subject_difficulty_stats = defaultdict(lambda: {
        'easy': {'total': 0, 'answered': 0, 'correct': 0, 'incorrect': 0,
                 'marked_review': 0, 'not_answered': 0, 'total_time': 0},
        'medium': {'total': 0, 'answered': 0, 'correct': 0, 'incorrect': 0,
                   'marked_review': 0, 'not_answered': 0, 'total_time': 0},
        'tough': {'total': 0, 'answered': 0, 'correct': 0, 'incorrect': 0,
                  'marked_review': 0, 'not_answered': 0, 'total_time': 0}
    })

    # Process debug_counts for subject-wise difficulty analysis
    for (subject, chapter), questions in debug_counts.items():
        for q in questions:
            level = q['level']
            subj = q['subject']
            if level in subject_difficulty_stats[subj]:
                subject_difficulty_stats[subj][level]['total'] += 1
                subject_difficulty_stats[subj][level]['total_time'] += q['time_taken']

                if q['status'] == 'answered':
                    subject_difficulty_stats[subj][level]['answered'] += 1
                    if q.get('correct', False):
                        subject_difficulty_stats[subj][level]['correct'] += 1
                    else:
                        subject_difficulty_stats[subj][level]['incorrect'] += 1
                elif q['status'] == 'markedReview':
                    subject_difficulty_stats[subj][level]['marked_review'] += 1
                elif q['status'] == 'notAnswered':
                    subject_difficulty_stats[subj][level]['not_answered'] += 1

    # Display subject-wise difficulty stats
    for subj in subject_difficulty_stats:
        context += f"### {subj}\n\n"
        for difficulty, stats in subject_difficulty_stats[subj].items():
            if stats['total'] > 0:
                accuracy = (stats['correct'] / stats['answered'] * 100) if stats['answered'] > 0 else 0
                avg_time = stats['total_time'] / stats['answered'] if stats['answered'] > 0 else 0

                context += f"#### {difficulty.capitalize()} Level Questions\n"
                context += f"- Total Questions: {stats['total']}\n"
                context += f"- Attempted: {stats['answered']} | Correct: {stats['correct']} | Incorrect: {stats['incorrect']}\n"
                context += f"- Not Attempted: {stats['not_answered']} | Marked for Review: {stats['marked_review']}\n"
                context += f"- Accuracy: {accuracy:.1f}%\n"
                context += f"- Average Time per Attempted Question: {avg_time:.1f} seconds\n"
                context += f"- Total Time Spent: {stats['total_time']} seconds\n\n"
        context += "\n"

    # Add chapter-wise details with concept analysis
    context += "## Chapter-wise Analysis with Concepts\n\n"

    for subject, chapters in processed_data['chapter_details'].items():
        context += f"### {subject}\n\n"
        for chapter, stats in chapters.items():
            context += f"""**{chapter}**
- Total Questions: {stats['questions_total']}
- Attempted: {stats['answered']} | Not Attempted: {stats['not_answered']} | Marked for Review: {stats['marked_review']}
- Performance: {stats['correct']} correct, {stats['incorrect']} incorrect
- Accuracy: {stats['accuracy_on_answered_percent']:.1f}%
- Avg Time/Answered: {stats['avg_time_per_answered_q_seconds']:.1f} seconds
- Difficulty Distribution: Easy({stats['difficulty_counts']['easy']}), Medium({stats['difficulty_counts']['medium']}), Tough({stats['difficulty_counts']['tough']})

"""

            # Add concept analysis for this chapter
            chapter_concepts = concept_stats.get((subject, chapter), {})
            if chapter_concepts:
                strong_concepts = []
                weak_concepts = []
                moderate_concepts = []

                for concept, cstats in chapter_concepts.items():
                    if cstats['total'] > 0:
                        accuracy = (cstats['correct'] / cstats['total']) * 100
                        concept_info = f"  - {concept}: {cstats['correct']}/{cstats['total']} ({accuracy:.1f}%)"

                        if accuracy >= 80:
                            strong_concepts.append(concept_info)
                        elif accuracy <= 60:
                            weak_concepts.append(concept_info)
                        else:
                            moderate_concepts.append(concept_info)

                if strong_concepts:
                    context += "**Strong Concepts (≥80% accuracy):**\n"
                    context += "\n".join(strong_concepts) + "\n\n"

                if moderate_concepts:
                    context += "**Moderate Concepts (60-80% accuracy):**\n"
                    context += "\n".join(moderate_concepts) + "\n\n"

                if weak_concepts:
                    context += "**Weak Concepts (≤60% accuracy):**\n"
                    context += "\n".join(weak_concepts) + "\n\n"
            else:
                context += "*No concepts attempted in this chapter*\n\n"

    # Add overall insights
    context += """## Key Insights

### Overall Concept Performance:
"""

    # Aggregate all concepts across subjects
    all_strong_concepts = []
    all_weak_concepts = []

    for (subject, chapter), concepts in concept_stats.items():
        for concept, cstats in concepts.items():
            if cstats['total'] > 0:
                accuracy = (cstats['correct'] / cstats['total']) * 100
                if accuracy >= 80:
                    all_strong_concepts.append(f"- {subject} ({chapter}): {concept} - {accuracy:.1f}%")
                elif accuracy <= 60:
                    all_weak_concepts.append(f"- {subject} ({chapter}): {concept} - {accuracy:.1f}%")

    context += "\n**Strong Concepts Across All Subjects:**\n"
    context += "\n".join(all_strong_concepts) if all_strong_concepts else "- No concepts with ≥80% accuracy\n"

    context += "\n\n**Weak Concepts Needing Improvement:**\n"
    context += "\n".join(all_weak_concepts) if all_weak_concepts else "- No concepts with ≤60% accuracy\n"

    return context

# --- Main Execution ---
def main():
    file_path = "/content/sample_submission_analysis_1.json"
    json_data = load_json_data(file_path)
    if json_data:
        processed_data, concept_stats, debug_counts = process_data(json_data)
        if processed_data:
            llm_context = prepare_comprehensive_llm_context(processed_data, concept_stats, debug_counts)
            print("=== Comprehensive LLM Context ===")
            print(llm_context)
        else:
            print("Failed to process JSON data")
    else:
        print("Failed to load JSON file")

if __name__ == "__main__":
    main()

=== Comprehensive LLM Context ===
# Test Performance Analysis Report

## Overall Test Summary
- **Total Score**: 133/300 marks
- **Questions Attempted**: 47/75
- **Correct Answers**: 36
- **Overall Accuracy**: 76.6%
- **Time Taken**: 83.3 minutes

## Subject-wise Performance

### Physics
- Score: 44/100 marks
- Questions: 16/25 attempted
- Correct: 12 | Incorrect: 4
- Accuracy: 75.0%
- Avg Time/Question: 186.5 seconds

### Chemistry
- Score: 60/100 marks
- Questions: 20/25 attempted
- Correct: 16 | Incorrect: 4
- Accuracy: 80.0%
- Avg Time/Question: 69.8 seconds

### Mathematics
- Score: 29/100 marks
- Questions: 11/25 attempted
- Correct: 8 | Incorrect: 3
- Accuracy: 72.7%
- Avg Time/Question: 56.1 seconds

## Overall Difficulty-wise Analysis

### Easy Level Questions
- Total Questions: 25
- Attempted: 19 | Correct: 14 | Incorrect: 5
- Not Attempted: 1 | Marked for Review: 5
- Accuracy: 73.7%
- Average Time per Attempted Question: 138.3 seconds
- Total Time Spent: 2628 seconds

### Me

In [3]:
import re
from collections import defaultdict

def parse_llm_context(llm_context):
    """
    Parse the llm_context string into a structured performance data dictionary.
    """
    performance_data = {
        "overall_summary": {},
        "subject_summary": defaultdict(dict),
        "difficulty_summary": defaultdict(dict),
        "chapter_concepts": defaultdict(lambda: defaultdict(dict))
    }

    # Helper function to clean percentage values
    def clean_percent(value):
        return value.strip('%') if isinstance(value, str) else value

    # Helper function to parse fraction strings (e.g., "47/75")
    def parse_fraction(fraction):
        if isinstance(fraction, str):
            num, denom = fraction.split('/')
            return int(num), int(denom)
        return fraction, 1

    # Parse Overall Test Summary
    overall_match = re.search(
        r"## Overall Test Summary\n"
        r"- \*\*Total Score\*\*: (\d+/\d+) marks\n"
        r"- \*\*Questions Attempted\*\*: (\d+/\d+)\n"
        r"- \*\*Correct Answers\*\*: (\d+)\n"
        r"- \*\*Overall Accuracy\*\*: ([\d.]+)%\n"
        r"- \*\*Time Taken\*\*: ([\d.]+) minutes",
        llm_context
    )
    if overall_match:
        performance_data["overall_summary"] = {
            "total_score": overall_match.group(1),
            "questions_attempted": overall_match.group(2),
            "correct_answers": int(overall_match.group(3)),
            "overall_accuracy": f"{float(overall_match.group(4))}%",
            "time_taken": f"{float(overall_match.group(5))} minutes"
        }

    # Parse Subject-wise Performance
    subject_matches = re.finditer(
        r"### (Physics|Chemistry|Mathematics)\n"
        r"- Score: (\d+/\d+) marks\n"
        r"- Questions: (\d+/\d+) attempted\n"
        r"- Correct: (\d+) \| Incorrect: (\d+)\n"
        r"- Accuracy: ([\d.]+)%\n"
        r"- Avg Time/Question: ([\d.]+) seconds",
        llm_context
    )
    for match in subject_matches:
        subject = match.group(1)
        performance_data["subject_summary"][subject] = {
            "score": match.group(2),
            "questions_attempted": match.group(3),
            "correct": int(match.group(4)),
            "incorrect": int(match.group(5)),
            "accuracy": f"{float(match.group(6))}%",
            "avg_time_per_question": f"{float(match.group(7))} seconds"
        }

    # Parse Overall Difficulty-wise Analysis
    difficulty_matches = re.finditer(
        r"### (Easy|Medium|Tough) Level Questions\n"
        r"- Total Questions: (\d+)\n"
        r"- Attempted: (\d+) \| Correct: (\d+) \| Incorrect: (\d+)\n"
        r"- Not Attempted: \d+ \| Marked for Review: \d+\n"
        r"- Accuracy: ([\d.]+)%\n"
        r"- Average Time per Attempted Question: ([\d.]+) seconds",
        llm_context
    )
    for match in difficulty_matches:
        difficulty = match.group(1)
        performance_data["difficulty_summary"][difficulty] = {
            "total": int(match.group(2)),
            "attempted": int(match.group(3)),
            "correct": int(match.group(4)),
            "incorrect": int(match.group(5)),
            "accuracy": f"{float(match.group(6))}%",
            "avg_time": f"{float(match.group(7))} seconds"
        }

    # Parse Chapter-wise Analysis with Concepts
    chapter_matches = re.finditer(
        r"\*\*(Electrostatics|Capacitance|Solutions|Electrochemistry|Functions|Sets and Relations)\*\*\n"
        r"- Total Questions: (\d+)\n"
        r"- Attempted: (\d+) \| Not Attempted: \d+ \| Marked for Review: \d+\n"
        r"- Performance: (\d+) correct, (\d+) incorrect\n"
        r"- Accuracy: ([\d.]+)%\n"
        r"- Avg Time/Answered: ([\d.]+) seconds\n"
        r"- Difficulty Distribution: Easy\((\d+)\), Medium\((\d+)\), Tough\((\d+)\)\n\n"
        r"(?:\*\*Strong Concepts \(≥80% accuracy\):\*\*\n((?:  - .*\n)*))?"
        r"(?:\*\*Moderate Concepts \(60-80% accuracy\):\*\*\n((?:  - .*\n)*))?"
        r"(?:\*\*Weak Concepts \(≤60% accuracy\):\*\*\n((?:  - .*\n)*))?",
        llm_context
    )
    for match in chapter_matches:
        chapter = match.group(1)
        subject = (
            "Physics" if chapter in ["Electrostatics", "Capacitance"] else
            "Chemistry" if chapter in ["Solutions", "Electrochemistry"] else
            "Mathematics"
        )
        performance_data["chapter_concepts"][subject][chapter] = {
            "total_questions": int(match.group(2)),
            "attempted": int(match.group(3)),
            "correct": int(match.group(4)),
            "incorrect": int(match.group(5)),
            "accuracy": f"{float(match.group(6))}%",
            "avg_time": f"{float(match.group(7))} seconds",
            "difficulty_distribution": {
                "easy": int(match.group(8)),
                "medium": int(match.group(9)),
                "tough": int(match.group(10))
            },
            "strong_concepts": [],
            "weak_concepts": []
        }

        # Parse strong concepts
        strong_concepts = match.group(11) or ""
        for concept_line in strong_concepts.split('\n'):
            concept_match = re.match(r"  - (.*?): \d+/\d+ \(([\d.]+)%\)", concept_line.strip())
            if concept_match:
                performance_data["chapter_concepts"][subject][chapter]["strong_concepts"].append({
                    "concept": concept_match.group(1),
                    "accuracy": f"{float(concept_match.group(2))}%"
                })

        # Parse weak concepts
        weak_concepts = match.group(13) or ""
        for concept_line in weak_concepts.split('\n'):
            concept_match = re.match(r"  - (.*?): \d+/\d+ \(([\d.]+)%\)", concept_line.strip())
            if concept_match:
                performance_data["chapter_concepts"][subject][chapter]["weak_concepts"].append({
                    "concept": concept_match.group(1),
                    "accuracy": f"{float(concept_match.group(2))}%"
                })

    return performance_data

In [16]:
performance_data, concept_stats, debug_counts = process_data(json_data)



In [30]:
performance_data

{'overall_summary': {'total_marks_scored': 133,
  'total_marks_possible': 300,
  'total_time_taken_seconds': 4998,
  'total_questions_in_test': 75,
  'final_attempted': 47,
  'final_correct': 36,
  'overall_accuracy_percent': 76.59574468085107,
  'time_taken_minutes': 83.3,
  'total_questions_calculated': 75},
 'subject_summary': defaultdict(dict,
             {'Physics': {'marks_scored': 44,
               'total_marks_possible': 100,
               'time_seconds': 2984,
               'attempted': 16,
               'correct': 12,
               'incorrect': 4,
               'accuracy_percent': 75,
               'avg_time_per_attempted_q_seconds': 186.5,
               'total_questions': 25},
              'Chemistry': {'marks_scored': 60,
               'total_marks_possible': 100,
               'time_seconds': 1397,
               'attempted': 20,
               'correct': 16,
               'incorrect': 4,
               'accuracy_percent': 80,
               'avg_time_per_atte

In [31]:
concept_stats

defaultdict(<function __main__.process_data.<locals>.<lambda>()>,
            {('Physics',
              'Capacitance'): defaultdict(<function __main__.process_data.<locals>.<lambda>.<locals>.<lambda>()>, {'Multiple dielectric slabs in capacior': {'total': 1,
                           'correct': 0,
                           'incorrect': 1},
                          'Charging of Capacitors': {'total': 1,
                           'correct': 1,
                           'incorrect': 0},
                          'Force on plates of capacitor': {'total': 1,
                           'correct': 0,
                           'incorrect': 1},
                          'Series and Parallel Combinations of Capacitor': {'total': 3,
                           'correct': 3,
                           'incorrect': 0},
                          'Induced charge on dielectric': {'total': 1,
                           'correct': 1,
                           'incorrect': 0},
                    

In [33]:
debug_counts

defaultdict(list,
            {('Physics',
              'Capacitance'): [{'status': 'answered',
               'time_taken': 25,
               'level': 'medium',
               'concepts': ['Multiple dielectric slabs in capacior'],
               'subject': 'Physics',
               'correct': False}, {'status': 'markedReview',
               'time_taken': 173,
               'level': 'easy',
               'concepts': ['Series and Parallel Combinations of Capacitor'],
               'subject': 'Physics'}, {'status': 'answered',
               'time_taken': 91,
               'level': 'medium',
               'concepts': ['Charging of Capacitors'],
               'subject': 'Physics',
               'correct': True}, {'status': 'answered',
               'time_taken': 57,
               'level': 'easy',
               'concepts': ['Force on plates of capacitor'],
               'subject': 'Physics',
               'correct': False}, {'status': 'answered',
               'time_taken':

In [35]:
# Assuming your input data is already defined as 'performance_data', 'concept_stats', and 'debug_counts'
# Extract the components from the provided performance_data
overall_summary = performance_data['overall_summary']
subject_summary = performance_data['subject_summary']
chapter_details = performance_data['chapter_details']

# Initialize the new performance data structure
new_performance_data = {
    "overall_summary": {
        "total_score": f"{overall_summary['total_marks_scored']}/{overall_summary['total_marks_possible']}",
        "questions_attempted": f"{overall_summary['final_attempted']}/{overall_summary['total_questions_in_test']}",
        "correct_answers": overall_summary['final_correct'],
        "overall_accuracy": f"{round(overall_summary['overall_accuracy_percent'], 1)}%",
        "time_taken": f"{overall_summary['time_taken_minutes']} minutes"
    },
    "subject_summary": {
        subject: {
            "score": f"{data['marks_scored']}/{data['total_marks_possible']}",
            "questions_attempted": f"{data['attempted']}/{data['total_questions']}",
            "correct": data['correct'],
            "incorrect": data['incorrect'],
            "accuracy": f"{round(data['accuracy_percent'], 1)}%",
            "avg_time_per_question": f"{round(data['avg_time_per_attempted_q_seconds'], 1)} seconds"
        }
        for subject, data in subject_summary.items()
    },
    "difficulty_summary": {},
    "chapter_concepts": {
        subject: {
            chapter: {
                "total_questions": chapter_data['questions_total'],
                "attempted": chapter_data['answered'],
                "correct": chapter_data['correct'],
                "accuracy": f"{round(chapter_data['accuracy_on_answered_percent'], 1)}%",
                "avg_time": f"{round(chapter_data['avg_time_per_answered_q_seconds'], 1)} seconds",
                "strong_concepts": [],
                "weak_concepts": []
            }
            for chapter, chapter_data in chapters.items()
        }
        for subject, chapters in chapter_details.items()
    }
}

# Process difficulty_summary from debug_counts
difficulty_stats = {
    "Easy": {"total": 0, "attempted": 0, "correct": 0, "incorrect": 0, "time_sum": 0},
    "Medium": {"total": 0, "attempted": 0, "correct": 0, "incorrect": 0, "time_sum": 0},
    "Tough": {"total": 0, "attempted": 0, "correct": 0, "incorrect": 0, "time_sum": 0}
}

for (subject, chapter), questions in debug_counts.items():
    for q in questions:
        level = q['level'].capitalize()
        difficulty_stats[level]["total"] += 1
        if q['status'] == 'answered':
            difficulty_stats[level]["attempted"] += 1
            difficulty_stats[level]["time_sum"] += q['time_taken']
            if q.get('correct', False):
                difficulty_stats[level]["correct"] += 1
            else:
                difficulty_stats[level]["incorrect"] += 1

for level, stats in difficulty_stats.items():
    accuracy = (stats['correct'] / stats['attempted'] * 100) if stats['attempted'] > 0 else 0
    avg_time = (stats['time_sum'] / stats['attempted']) if stats['attempted'] > 0 else 0
    new_performance_data["difficulty_summary"][level] = {
        "total": stats['total'],
        "attempted": stats['attempted'],
        "correct": stats['correct'],
        "incorrect": stats['incorrect'],
        "accuracy": f"{round(accuracy, 1)}%",
        "avg_time": f"{round(avg_time, 1)} seconds"
    }

# Process strong and weak concepts from concept_stats
for subject, chapters in new_performance_data["chapter_concepts"].items():
    for chapter, chapter_data in chapters.items():
        concepts = concept_stats.get((subject, chapter), {})
        for concept, stats in concepts.items():
            accuracy = (stats['correct'] / stats['total'] * 100) if stats['total'] > 0 else 0
            concept_entry = {"concept": concept, "accuracy": f"{round(accuracy, 1)}%"}
            if accuracy >= 75:
                new_performance_data["chapter_concepts"][subject][chapter]["strong_concepts"].append(concept_entry)
            else:
                new_performance_data["chapter_concepts"][subject][chapter]["weak_concepts"].append(concept_entry)

# The resulting new_performance_data matches your desired structure

In [38]:
# Assuming new_performance_data is already created from the previous code
new_performance_data

{'overall_summary': {'total_score': '133/300',
  'questions_attempted': '47/75',
  'correct_answers': 36,
  'overall_accuracy': '76.6%',
  'time_taken': '83.3 minutes'},
 'subject_summary': {'Physics': {'score': '44/100',
   'questions_attempted': '16/25',
   'correct': 12,
   'incorrect': 4,
   'accuracy': '75%',
   'avg_time_per_question': '186.5 seconds'},
  'Chemistry': {'score': '60/100',
   'questions_attempted': '20/25',
   'correct': 16,
   'incorrect': 4,
   'accuracy': '80%',
   'avg_time_per_question': '69.8 seconds'},
  'Mathematics': {'score': '29/100',
   'questions_attempted': '11/25',
   'correct': 8,
   'incorrect': 3,
   'accuracy': '72.7%',
   'avg_time_per_question': '56.1 seconds'}},
 'difficulty_summary': {'Easy': {'total': 25,
   'attempted': 19,
   'correct': 14,
   'incorrect': 5,
   'accuracy': '73.7%',
   'avg_time': '121.4 seconds'},
  'Medium': {'total': 30,
   'attempted': 18,
   'correct': 14,
   'incorrect': 4,
   'accuracy': '77.8%',
   'avg_time': '64.

In [27]:
import requests
import json
import os

# Gemini API configuration
API_KEY = os.getenv("GEMINI_API_KEY", "AIzaSyB_XxC9j3ySjkVagQKMdn49E-LanT6A118")  # Replace with your Gemini API key or set as environment variable
API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent"

# Performance data from prepare_llm_context_comprehensive output
performance_data = {
    "overall_summary": {
        "total_score": "133/300",
        "questions_attempted": "47/75",
        "correct_answers": 36,
        "overall_accuracy": "76.6%",
        "time_taken": "83.3 minutes"
    },
    "subject_summary": {
        "Physics": {
            "score": "44/100",
            "questions_attempted": "16/25",
            "correct": 12,
            "incorrect": 4,
            "accuracy": "75.0%",
            "avg_time_per_question": "186.5 seconds"
        },
        "Chemistry": {
            "score": "60/100",
            "questions_attempted": "20/25",
            "correct": 16,
            "incorrect": 4,
            "accuracy": "80.0%",
            "avg_time_per_question": "69.8 seconds"
        },
        "Mathematics": {
            "score": "29/100",
            "questions_attempted": "11/25",
            "correct": 8,
            "incorrect": 3,
            "accuracy": "72.7%",
            "avg_time_per_question": "56.1 seconds"
        }
    },
    "difficulty_summary": {
        "Easy": {
            "total": 25,
            "attempted": 19,
            "correct": 14,
            "incorrect": 5,
            "accuracy": "73.7%",
            "avg_time": "138.3 seconds"
        },
        "Medium": {
            "total": 30,
            "attempted": 18,
            "correct": 14,
            "incorrect": 4,
            "accuracy": "77.8%",
            "avg_time": "77.8 seconds"
        },
        "Tough": {
            "total": 20,
            "attempted": 10,
            "correct": 8,
            "incorrect": 2,
            "accuracy": "80.0%",
            "avg_time": "97.0 seconds"
        }
    },
    "chapter_concepts": {
        "Physics": {
            "Capacitance": {
                "total_questions": 10,
                "attempted": 8,
                "correct": 6,
                "accuracy": "75.0%",
                "avg_time": "62.5 seconds",
                "strong_concepts": [
                    {"concept": "Charging of Capacitors", "accuracy": "100.0%"},
                    {"concept": "Series and Parallel Combinations of Capacitor", "accuracy": "100.0%"},
                    {"concept": "Induced charge on dielectric", "accuracy": "100.0%"},
                    {"concept": "Energy stored in capacitor", "accuracy": "100.0%"}
                ],
                "weak_concepts": [
                    {"concept": "Multiple dielectric slabs in capacitor", "accuracy": "0.0%"},
                    {"concept": "Force on plates of capacitor", "accuracy": "0.0%"}
                ]
            },
            "Electrostatics": {
                "total_questions": 15,
                "attempted": 12,
                "correct": 10,
                "accuracy": "83.3%",
                "avg_time": "74.8 seconds",
                "strong_concepts": [
                    {"concept": "Electric Field and Force due to Dipole", "accuracy": "100.0%"},
                    {"concept": "Properties of Electric Dipole", "accuracy": "100.0%"},
                    {"concept": "Electric Field due to continuous charge distribution", "accuracy": "100.0%"},
                    {"concept": "Electric flux", "accuracy": "100.0%"},
                    {"concept": "Gauss theorem", "accuracy": "100.0%"},
                    {"concept": "Electric potential due to point charge", "accuracy": "100.0%"},
                    {"concept": "Electric field by group of charges", "accuracy": "100.0%"}
                ],
                "weak_concepts": [
                    {"concept": "Coulombs Law", "accuracy": "0.0%"},
                    {"concept": "Charged Particle in electric field", "accuracy": "50.0%"}
                ]
            }
        },
        "Chemistry": {
            "Electrochemistry": {
                "total_questions": 13,
                "attempted": 1,
                "correct": 1,
                "accuracy": "100.0%",
                "avg_time": "122.0 seconds",
                "strong_concepts": [
                    {"concept": "Factors which enhance corrosion", "accuracy": "100.0%"}
                ],
                "weak_concepts": []
            },
            "Solutions": {
                "total_questions": 12,
                "attempted": 10,
                "correct": 7,
                "accuracy": "70.0%",
                "avg_time": "49.5 seconds",
                "strong_concepts": [
                    {"concept": "Henry's law", "accuracy": "100.0%"},
                    {"concept": "Azeotropic solutions", "accuracy": "100.0%"},
                    {"concept": "Elevation in boiling point", "accuracy": "100.0%"},
                    {"concept": "Vapour pressure of solution containing volatile solute and volatile solvent", "accuracy": "100.0%"}
                ],
                "weak_concepts": [
                    {"concept": "Osmotic pressure", "accuracy": "33.3%"},
                    {"concept": "Depression in freezing point", "accuracy": "0.0%"}
                ]
            }
        },
        "Mathematics": {
            "Functions": {
                "total_questions": 18,
                "attempted": 10,
                "correct": 7,
                "accuracy": "70.0%",
                "avg_time": "177.1 seconds",
                "strong_concepts": [
                    {"concept": "Questions on determining odd and even functions", "accuracy": "100.0%"},
                    {"concept": "domain of modulus functions", "accuracy": "100.0%"},
                    {"concept": "questions based on functional equations", "accuracy": "100.0%"},
                    {"concept": "range involving modulus functions", "accuracy": "100.0%"}
                ],
                "weak_concepts": [
                    {"concept": "finding fog and gof", "accuracy": "50.0%"},
                    {"concept": "period of normal functions", "accuracy": "0.0%"}
                ]
            },
            "Sets and Relations": {
                "total_questions": 7,
                "attempted": 6,
                "correct": 5,
                "accuracy": "83.3%",
                "avg_time": "202.2 seconds",
                "strong_concepts": [
                    {"concept": "Questions on number of relations and sets", "accuracy": "100.0%"},
                    {"concept": "Questions on Venn Diagram", "accuracy": "100.0%"}
                ],
                "weak_concepts": [
                    {"concept": "Questions on Symmetric Transitive and Reflexive Properties", "accuracy": "50.0%"}
                ]
            }
        }
    }
}

# API prompt for generating feedback
prompt = f"""
You are an expert tutor providing personalized feedback for a student's test performance. Based on the following data, generate a detailed, human-like feedback report that is motivating, encouraging, and actionable. The report should include:

1. An 'Overall Performance' section summarizing the total score, questions attempted, correct answers, accuracy, and time taken.
2. A personalized, motivating introduction (highlight specific achievements like strong concepts, acknowledge challenges like low attempt rates, avoid generic phrases).
3. A performance breakdown by difficulty level (Easy, Medium, Tough) across subjects (Physics, Chemistry, Mathematics), presented concisely.
4. Time vs. accuracy insights, explaining how time allocation impacts performance and identifying patterns (e.g., spending too long on easy questions).
5. A chapter-wise concept analysis, listing strong (≥80% accuracy) and weak (≤60% accuracy) concepts for each chapter in Physics (Electrostatics, Capacitance), Chemistry (Solutions, Electrochemistry), and Mathematics (Functions, Sets and Relations).
6. 2–3 actionable suggestions for improvement, focusing on specific weaknesses and leveraging strengths.

Use a friendly, supportive tone and keep the response clear and concise. Avoid technical jargon. Here is the performance data:

Overall Summary:
- Total Score: {performance_data['overall_summary']['total_score']}
- Questions Attempted: {performance_data['overall_summary']['questions_attempted']}
- Correct Answers: {performance_data['overall_summary']['correct_answers']}
- Overall Accuracy: {performance_data['overall_summary']['overall_accuracy']}
- Time Taken: {performance_data['overall_summary']['time_taken']}

Subject-wise Summary:
- Physics: Score: {performance_data['subject_summary']['Physics']['score']}, Attempted: {performance_data['subject_summary']['Physics']['questions_attempted']}, Correct: {performance_data['subject_summary']['Physics']['correct']}, Incorrect: {performance_data['subject_summary']['Physics']['incorrect']}, Accuracy: {performance_data['subject_summary']['Physics']['accuracy']}, Avg Time/Question: {performance_data['subject_summary']['Physics']['avg_time_per_question']}
- Chemistry: Score: {performance_data['subject_summary']['Chemistry']['score']}, Attempted: {performance_data['subject_summary']['Chemistry']['questions_attempted']}, Correct: {performance_data['subject_summary']['Chemistry']['correct']}, Incorrect: {performance_data['subject_summary']['Chemistry']['incorrect']}, Accuracy: {performance_data['subject_summary']['Chemistry']['accuracy']}, Avg Time/Question: {performance_data['subject_summary']['Chemistry']['avg_time_per_question']}
- Mathematics: Score: {performance_data['subject_summary']['Mathematics']['score']}, Attempted: {performance_data['subject_summary']['Mathematics']['questions_attempted']}, Correct: {performance_data['subject_summary']['Mathematics']['correct']}, Incorrect: {performance_data['subject_summary']['Mathematics']['incorrect']}, Accuracy: {performance_data['subject_summary']['Mathematics']['accuracy']}, Avg Time/Question: {performance_data['subject_summary']['Mathematics']['avg_time_per_question']}

Difficulty-wise Summary:
- Easy: Total: {performance_data['difficulty_summary']['Easy']['total']}, Attempted: {performance_data['difficulty_summary']['Easy']['attempted']}, Correct: {performance_data['difficulty_summary']['Easy']['correct']}, Incorrect: {performance_data['difficulty_summary']['Easy']['incorrect']}, Accuracy: {performance_data['difficulty_summary']['Easy']['accuracy']}, Avg Time: {performance_data['difficulty_summary']['Easy']['avg_time']}
- Medium: Total: {performance_data['difficulty_summary']['Medium']['total']}, Attempted: {performance_data['difficulty_summary']['Medium']['attempted']}, Correct: {performance_data['difficulty_summary']['Medium']['correct']}, Incorrect: {performance_data['difficulty_summary']['Medium']['incorrect']}, Accuracy: {performance_data['difficulty_summary']['Medium']['accuracy']}, Avg Time: {performance_data['difficulty_summary']['Medium']['avg_time']}
- Tough: Total: {performance_data['difficulty_summary']['Tough']['total']}, Attempted: {performance_data['difficulty_summary']['Tough']['attempted']}, Correct: {performance_data['difficulty_summary']['Tough']['correct']}, Incorrect: {performance_data['difficulty_summary']['Tough']['incorrect']}, Accuracy: {performance_data['difficulty_summary']['Tough']['accuracy']}, Avg Time: {performance_data['difficulty_summary']['Tough']['avg_time']}

Chapter-wise Concepts:
- Physics (Capacitance): Total: {performance_data['chapter_concepts']['Physics']['Capacitance']['total_questions']}, Attempted: {performance_data['chapter_concepts']['Physics']['Capacitance']['attempted']}, Correct: {performance_data['chapter_concepts']['Physics']['Capacitance']['correct']}, Accuracy: {performance_data['chapter_concepts']['Physics']['Capacitance']['accuracy']}, Avg Time: {performance_data['chapter_concepts']['Physics']['Capacitance']['avg_time']}
  Strong Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in performance_data['chapter_concepts']['Physics']['Capacitance']['strong_concepts']])}
  Weak Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in performance_data['chapter_concepts']['Physics']['Capacitance']['weak_concepts']])}
- Physics (Electrostatics): Total: {performance_data['chapter_concepts']['Physics']['Electrostatics']['total_questions']}, Attempted: {performance_data['chapter_concepts']['Physics']['Electrostatics']['attempted']}, Correct: {performance_data['chapter_concepts']['Physics']['Electrostatics']['correct']}, Accuracy: {performance_data['chapter_concepts']['Physics']['Electrostatics']['accuracy']}, Avg Time: {performance_data['chapter_concepts']['Physics']['Electrostatics']['avg_time']}
  Strong Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in performance_data['chapter_concepts']['Physics']['Electrostatics']['strong_concepts']])}
  Weak Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in performance_data['chapter_concepts']['Physics']['Electrostatics']['weak_concepts']])}
- Chemistry (Electrochemistry): Total: {performance_data['chapter_concepts']['Chemistry']['Electrochemistry']['total_questions']}, Attempted: {performance_data['chapter_concepts']['Chemistry']['Electrochemistry']['attempted']}, Correct: {performance_data['chapter_concepts']['Chemistry']['Electrochemistry']['correct']}, Accuracy: {performance_data['chapter_concepts']['Chemistry']['Electrochemistry']['accuracy']}, Avg Time: {performance_data['chapter_concepts']['Chemistry']['Electrochemistry']['avg_time']}
  Strong Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in performance_data['chapter_concepts']['Chemistry']['Electrochemistry']['strong_concepts']])}
  Weak Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in performance_data['chapter_concepts']['Chemistry']['Electrochemistry']['weak_concepts']])}
- Chemistry (Solutions): Total: {performance_data['chapter_concepts']['Chemistry']['Solutions']['total_questions']}, Attempted: {performance_data['chapter_concepts']['Chemistry']['Solutions']['attempted']}, Correct: {performance_data['chapter_concepts']['Chemistry']['Solutions']['correct']}, Accuracy: {performance_data['chapter_concepts']['Chemistry']['Solutions']['accuracy']}, Avg Time: {performance_data['chapter_concepts']['Chemistry']['Solutions']['avg_time']}
  Strong Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in performance_data['chapter_concepts']['Chemistry']['Solutions']['strong_concepts']])}
  Weak Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in performance_data['chapter_concepts']['Chemistry']['Solutions']['weak_concepts']])}
- Mathematics (Functions): Total: {performance_data['chapter_concepts']['Mathematics']['Functions']['total_questions']}, Attempted: {performance_data['chapter_concepts']['Mathematics']['Functions']['attempted']}, Correct: {performance_data['chapter_concepts']['Mathematics']['Functions']['correct']}, Accuracy: {performance_data['chapter_concepts']['Mathematics']['Functions']['accuracy']}, Avg Time: {performance_data['chapter_concepts']['Mathematics']['Functions']['avg_time']}
  Strong Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in performance_data['chapter_concepts']['Mathematics']['Functions']['strong_concepts']])}
  Weak Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in performance_data['chapter_concepts']['Mathematics']['Functions']['weak_concepts']])}
- Mathematics (Sets and Relations): Total: {performance_data['chapter_concepts']['Mathematics']['Sets and Relations']['total_questions']}, Attempted: {performance_data['chapter_concepts']['Mathematics']['Sets and Relations']['attempted']}, Correct: {performance_data['chapter_concepts']['Mathematics']['Sets and Relations']['correct']}, Accuracy: {performance_data['chapter_concepts']['Mathematics']['Sets and Relations']['accuracy']}, Avg Time: {performance_data['chapter_concepts']['Mathematics']['Sets and Relations']['avg_time']}
  Strong Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in performance_data['chapter_concepts']['Mathematics']['Sets and Relations']['strong_concepts']])}
  Weak Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in performance_data['chapter_concepts']['Mathematics']['Sets and Relations']['weak_concepts']])}

Format the response in clear sections with headers: 'Overall Performance', 'Motivating Introduction', 'Performance Breakdown', 'Time vs. Accuracy Insights', 'Chapter-wise Concept Analysis', and 'Actionable Suggestions'. Ensure the response is under 2000 tokens to stay within free tier limits.
"""

# API request
headers = {
    "Content-Type": "application/json"
}

payload = {
    "contents": [
        {
            "parts": [
                {"text": prompt}
            ],
            "role": "user"
        }
    ]
}

try:
    response = requests.post(f"{API_URL}?key={API_KEY}", headers=headers, json=payload)
    response.raise_for_status()
    feedback = response.json().get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "")
    print("Generated Feedback:")
    print(feedback)
except requests.RequestException as e:
    print(f"Error calling Gemini API: {e}")
    feedback = "Failed to generate feedback due to API error."

# Save feedback for PDF generation
with open("feedback_output.txt", "w") as f:
    f.write(feedback)

Generated Feedback:
**Overall Performance**

Total Score: 133/300
Questions Attempted: 47/75
Correct Answers: 36
Overall Accuracy: 76.6%
Time Taken: 83.3 minutes

**Motivating Introduction**

Hey there!  Looking at your results, I'm impressed by your strong performance in Chemistry, especially considering your 80% accuracy.  Your grasp of several key concepts in Physics and Mathematics is also evident, with some excellent scores in specific areas. While your attempt rate could be improved, the accuracy you demonstrated on the questions you *did* attempt shows a solid understanding of the material.  Let's work together to build on this strong foundation!

**Performance Breakdown**

| Subject       | Score | Attempted | Accuracy |
|---------------|-------|------------|-----------|
| Physics       | 44/100 | 16/25      | 75.0%     |
| Chemistry     | 60/100 | 20/25      | 80.0%     |
| Mathematics   | 29/100 | 11/25      | 72.7%     |

| Difficulty | Attempted | Accuracy | Avg Time (secon

In [40]:
import requests
import json
import os

# Gemini API configuration
API_KEY = os.getenv("GEMINI_API_KEY", "AIzaSyB_XxC9j3ySjkVagQKMdn49E-LanT6A118")  # Replace with your Gemini API key or set as environment variable
API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent"



# API prompt for generating feedback
prompt = f"""
You are an expert tutor providing personalized feedback for a student's test performance. Based on the following data, generate a detailed, human-like feedback report that is motivating, encouraging, and actionable. The report should include:

1. An 'Overall Performance' section summarizing the total score, questions attempted, correct answers, accuracy, and time taken.
2. A personalized, motivating introduction (highlight specific achievements like strong concepts, acknowledge challenges like low attempt rates, avoid generic phrases).
3. A performance breakdown by difficulty level (Easy, Medium, Tough) across subjects (Physics, Chemistry, Mathematics), presented concisely.
4. Time vs. accuracy insights, explaining how time allocation impacts performance and identifying patterns (e.g., spending too long on easy questions).
5. A chapter-wise concept analysis, listing strong (≥80% accuracy) and weak (≤60% accuracy) concepts for each chapter in Physics (Electrostatics, Capacitance), Chemistry (Solutions, Electrochemistry), and Mathematics (Functions, Sets and Relations).
6. 2–3 actionable suggestions for improvement, focusing on specific weaknesses and leveraging strengths.

Use a friendly, supportive tone and keep the response clear and concise. Avoid technical jargon. Here is the performance data:

Overall Summary:
- Total Score: {new_performance_data['overall_summary']['total_score']}
- Questions Attempted: {new_performance_data['overall_summary']['questions_attempted']}
- Correct Answers: {new_performance_data['overall_summary']['correct_answers']}
- Overall Accuracy: {new_performance_data['overall_summary']['overall_accuracy']}
- Time Taken: {new_performance_data['overall_summary']['time_taken']}

Subject-wise Summary:
- Physics: Score: {new_performance_data['subject_summary']['Physics']['score']}, Attempted: {new_performance_data['subject_summary']['Physics']['questions_attempted']}, Correct: {new_performance_data['subject_summary']['Physics']['correct']}, Incorrect: {new_performance_data['subject_summary']['Physics']['incorrect']}, Accuracy: {new_performance_data['subject_summary']['Physics']['accuracy']}, Avg Time/Question: {new_performance_data['subject_summary']['Physics']['avg_time_per_question']}
- Chemistry: Score: {new_performance_data['subject_summary']['Chemistry']['score']}, Attempted: {new_performance_data['subject_summary']['Chemistry']['questions_attempted']}, Correct: {new_performance_data['subject_summary']['Chemistry']['correct']}, Incorrect: {new_performance_data['subject_summary']['Chemistry']['incorrect']}, Accuracy: {new_performance_data['subject_summary']['Chemistry']['accuracy']}, Avg Time/Question: {new_performance_data['subject_summary']['Chemistry']['avg_time_per_question']}
- Mathematics: Score: {new_performance_data['subject_summary']['Mathematics']['score']}, Attempted: {new_performance_data['subject_summary']['Mathematics']['questions_attempted']}, Correct: {new_performance_data['subject_summary']['Mathematics']['correct']}, Incorrect: {new_performance_data['subject_summary']['Mathematics']['incorrect']}, Accuracy: {new_performance_data['subject_summary']['Mathematics']['accuracy']}, Avg Time/Question: {new_performance_data['subject_summary']['Mathematics']['avg_time_per_question']}

Difficulty-wise Summary:
- Easy: Total: {new_performance_data['difficulty_summary']['Easy']['total']}, Attempted: {new_performance_data['difficulty_summary']['Easy']['attempted']}, Correct: {new_performance_data['difficulty_summary']['Easy']['correct']}, Incorrect: {new_performance_data['difficulty_summary']['Easy']['incorrect']}, Accuracy: {new_performance_data['difficulty_summary']['Easy']['accuracy']}, Avg Time: {new_performance_data['difficulty_summary']['Easy']['avg_time']}
- Medium: Total: {new_performance_data['difficulty_summary']['Medium']['total']}, Attempted: {new_performance_data['difficulty_summary']['Medium']['attempted']}, Correct: {new_performance_data['difficulty_summary']['Medium']['correct']}, Incorrect: {new_performance_data['difficulty_summary']['Medium']['incorrect']}, Accuracy: {new_performance_data['difficulty_summary']['Medium']['accuracy']}, Avg Time: {new_performance_data['difficulty_summary']['Medium']['avg_time']}
- Tough: Total: {new_performance_data['difficulty_summary']['Tough']['total']}, Attempted: {new_performance_data['difficulty_summary']['Tough']['attempted']}, Correct: {new_performance_data['difficulty_summary']['Tough']['correct']}, Incorrect: {new_performance_data['difficulty_summary']['Tough']['incorrect']}, Accuracy: {new_performance_data['difficulty_summary']['Tough']['accuracy']}, Avg Time: {new_performance_data['difficulty_summary']['Tough']['avg_time']}

Chapter-wise Concepts:
- Physics (Capacitance): Total: {new_performance_data['chapter_concepts']['Physics']['Capacitance']['total_questions']}, Attempted: {new_performance_data['chapter_concepts']['Physics']['Capacitance']['attempted']}, Correct: {new_performance_data['chapter_concepts']['Physics']['Capacitance']['correct']}, Accuracy: {new_performance_data['chapter_concepts']['Physics']['Capacitance']['accuracy']}, Avg Time: {new_performance_data['chapter_concepts']['Physics']['Capacitance']['avg_time']}
  Strong Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in new_performance_data['chapter_concepts']['Physics']['Capacitance']['strong_concepts']])}
  Weak Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in new_performance_data['chapter_concepts']['Physics']['Capacitance']['weak_concepts']])}
- Physics (Electrostatics): Total: {new_performance_data['chapter_concepts']['Physics']['Electrostatics']['total_questions']}, Attempted: {new_performance_data['chapter_concepts']['Physics']['Electrostatics']['attempted']}, Correct: {new_performance_data['chapter_concepts']['Physics']['Electrostatics']['correct']}, Accuracy: {new_performance_data['chapter_concepts']['Physics']['Electrostatics']['accuracy']}, Avg Time: {new_performance_data['chapter_concepts']['Physics']['Electrostatics']['avg_time']}
  Strong Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in new_performance_data['chapter_concepts']['Physics']['Electrostatics']['strong_concepts']])}
  Weak Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in new_performance_data['chapter_concepts']['Physics']['Electrostatics']['weak_concepts']])}
- Chemistry (Electrochemistry): Total: {new_performance_data['chapter_concepts']['Chemistry']['Electrochemistry']['total_questions']}, Attempted: {new_performance_data['chapter_concepts']['Chemistry']['Electrochemistry']['attempted']}, Correct: {new_performance_data['chapter_concepts']['Chemistry']['Electrochemistry']['correct']}, Accuracy: {new_performance_data['chapter_concepts']['Chemistry']['Electrochemistry']['accuracy']}, Avg Time: {new_performance_data['chapter_concepts']['Chemistry']['Electrochemistry']['avg_time']}
  Strong Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in new_performance_data['chapter_concepts']['Chemistry']['Electrochemistry']['strong_concepts']])}
  Weak Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in new_performance_data['chapter_concepts']['Chemistry']['Electrochemistry']['weak_concepts']])}
- Chemistry (Solutions): Total: {new_performance_data['chapter_concepts']['Chemistry']['Solutions']['total_questions']}, Attempted: {new_performance_data['chapter_concepts']['Chemistry']['Solutions']['attempted']}, Correct: {new_performance_data['chapter_concepts']['Chemistry']['Solutions']['correct']}, Accuracy: {new_performance_data['chapter_concepts']['Chemistry']['Solutions']['accuracy']}, Avg Time: {new_performance_data['chapter_concepts']['Chemistry']['Solutions']['avg_time']}
  Strong Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in new_performance_data['chapter_concepts']['Chemistry']['Solutions']['strong_concepts']])}
  Weak Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in new_performance_data['chapter_concepts']['Chemistry']['Solutions']['weak_concepts']])}
- Mathematics (Functions): Total: {new_performance_data['chapter_concepts']['Mathematics']['Functions']['total_questions']}, Attempted: {new_performance_data['chapter_concepts']['Mathematics']['Functions']['attempted']}, Correct: {new_performance_data['chapter_concepts']['Mathematics']['Functions']['correct']}, Accuracy: {new_performance_data['chapter_concepts']['Mathematics']['Functions']['accuracy']}, Avg Time: {new_performance_data['chapter_concepts']['Mathematics']['Functions']['avg_time']}
  Strong Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in new_performance_data['chapter_concepts']['Mathematics']['Functions']['strong_concepts']])}
  Weak Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in new_performance_data['chapter_concepts']['Mathematics']['Functions']['weak_concepts']])}
- Mathematics (Sets and Relations): Total: {new_performance_data['chapter_concepts']['Mathematics']['Sets and Relations']['total_questions']}, Attempted: {new_performance_data['chapter_concepts']['Mathematics']['Sets and Relations']['attempted']}, Correct: {new_performance_data['chapter_concepts']['Mathematics']['Sets and Relations']['correct']}, Accuracy: {new_performance_data['chapter_concepts']['Mathematics']['Sets and Relations']['accuracy']}, Avg Time: {new_performance_data['chapter_concepts']['Mathematics']['Sets and Relations']['avg_time']}
  Strong Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in new_performance_data['chapter_concepts']['Mathematics']['Sets and Relations']['strong_concepts']])}
  Weak Concepts: {', '.join([f"{c['concept']} ({c['accuracy']})" for c in new_performance_data['chapter_concepts']['Mathematics']['Sets and Relations']['weak_concepts']])}

Format the response in clear sections with headers: 'Overall Performance', 'Motivating Introduction', 'Performance Breakdown', 'Time vs. Accuracy Insights', 'Chapter-wise Concept Analysis', and 'Actionable Suggestions'. Ensure the response is under 2000 tokens to stay within free tier limits.
"""

# API request
headers = {
    "Content-Type": "application/json"
}

payload = {
    "contents": [
        {
            "parts": [
                {"text": prompt}
            ],
            "role": "user"
        }
    ]
}

try:
    response = requests.post(f"{API_URL}?key={API_KEY}", headers=headers, json=payload)
    response.raise_for_status()
    feedback = response.json().get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "")
    print("Generated Feedback:")
    print(feedback)
except requests.RequestException as e:
    print(f"Error calling Gemini API: {e}")
    feedback = "Failed to generate feedback due to API error."

# Save feedback for PDF generation
with open("feedback_output.txt", "w") as f:
    f.write(feedback)

Generated Feedback:
**Overall Performance**

* Total Score: 133/300 (44.3%)
* Questions Attempted: 47/75 (62.7%)
* Correct Answers: 36
* Overall Accuracy: 76.6%
* Time Taken: 83.3 minutes


**Motivating Introduction**

Hey there!  Looking at your results, it's clear you have a strong grasp of many concepts, particularly in Chemistry and parts of Physics and Mathematics.  Your 76.6% accuracy is commendable, showing a solid understanding of the material.  While your attempt rate could be improved, focusing on building confidence and efficient time management will significantly boost your score.  Let's dive into the specifics to see how we can build on your successes and address areas for growth.


**Performance Breakdown**

| Subject       | Score | Attempted | Accuracy |
|---------------|-------|------------|-----------|
| Physics       | 44/100 | 16/25      | 75%       |
| Chemistry     | 60/100 | 20/25      | 80%       |
| Mathematics   | 29/100 | 11/25      | 72.7%     |

| Difficult

In [42]:
import json
import matplotlib.pyplot as plt
import numpy as np
from collections import defaultdict
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image, PageBreak
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
import os
import tempfile
import time
import re
import unicodedata

# --- Load JSON Data ---
def load_json_data(file_path):
    try:
        if not os.path.exists(file_path):
            raise FileNotFoundError(f"JSON file not found: {file_path}")
        with open(file_path, 'r') as file:
            return json.load(file)
    except Exception as e:
        print(f"Error loading JSON: {e}")
        return None

# --- Process Data ---
def process_data(json_data):
    if not json_data or not isinstance(json_data, list):
        print("Invalid JSON data")
        return None

    processed_data = {
        "overall_summary": {},
        "subject_summary": defaultdict(dict),
        "chapter_details": defaultdict(dict)
    }
    chapter_stats = defaultdict(lambda: {
        "questions_total": 0,
        "answered": 0,
        "correct": 0,
        "incorrect": 0,
        "marked_review": 0,
        "not_answered": 0,
        "total_time_seconds": 0,
        "difficulty_counts": defaultdict(int),
        "difficulty_stats": defaultdict(lambda: {"correct": 0, "incorrect": 0, "unattempted": 0})
    })

    concept_stats = defaultdict(lambda: defaultdict(lambda: {"total": 0, "correct": 0, "incorrect": 0}))

    section_subject_map = {
        "Physics Single Correct": "Physics",
        "Physics Numerical": "Physics",
        "Chemistry Single Correct": "Chemistry",
        "Chemistry Numerical": "Chemistry",
        "Mathematics Single Correct": "Mathematics",
        "Mathematics Numerical": "Mathematics"
    }

    subject_map = {
        "607018ee404ae53194e73d92": "Physics",
        "607018ee404ae53194e73d90": "Chemistry",
        "607018ee404ae53194e73d91": "Mathematics"
    }

    debug_counts = defaultdict(list)

    data = json_data[0]
    processed_data["overall_summary"] = {
        "total_marks_scored": data.get("totalMarkScored", 0),
        "total_marks_possible": data.get("totalMarks", 300),
        "total_time_taken_seconds": data.get("totalTimeTaken", 0),
        "total_questions_in_test": data.get("test", {}).get("totalQuestions", 0),
        "final_attempted": data.get("totalAttempted", 0),
        "final_correct": data.get("totalCorrect", 0),
        "overall_accuracy_percent": data.get("accuracy", 0),
        "time_taken_minutes": data.get("totalTimeTaken", 0) / 60.0
    }

    for subject in data.get("subjects", []):
        subject_id = subject.get("subjectId", {}).get("$oid", "")
        subject_name = subject_map.get(subject_id, "Unknown")
        processed_data["subject_summary"][subject_name] = {
            "marks_scored": subject.get("totalMarkScored", 0),
            "total_marks_possible": subject.get("totalMarks", 100),
            "time_seconds": subject.get("totalTimeTaken", 0),
            "attempted": subject.get("totalAttempted", 0),
            "correct": subject.get("totalCorrect", 0),
            "incorrect": subject.get("totalAttempted", 0) - subject.get("totalCorrect", 0),
            "accuracy_percent": subject.get("accuracy", 0),
            "avg_time_per_attempted_q_seconds": (subject.get("totalTimeTaken", 0) / subject.get("totalAttempted", 0)) if subject.get("totalAttempted", 0) > 0 else 0
        }

    for section in data.get("sections", []):
        section_title = section.get("sectionId", {}).get("title", "")
        subject = section_subject_map.get(section_title, "Unknown")

        for question in section.get("questions", []):
            chapter = question.get("questionId", {}).get("chapters", [{}])[0].get("title", "Unknown")
            status = question.get("status", "notAnswered")
            time_taken = question.get("timeTaken", 0)
            level = question.get("questionId", {}).get("level", "unknown")
            concepts = [concept.get("title", "Unknown") for concept in question.get("questionId", {}).get("concepts", [])]

            if (subject == "Physics" and chapter not in ["Electrostatics", "Capacitance"]) or \
               (subject == "Chemistry" and chapter not in ["Solutions", "Electrochemistry"]) or \
               (subject == "Mathematics" and chapter not in ["Functions", "Sets and Relations"]):
                continue

            chapter_stats[(subject, chapter)]["questions_total"] += 1
            chapter_stats[(subject, chapter)]["total_time_seconds"] += time_taken
            chapter_stats[(subject, chapter)]["difficulty_counts"][level] += 1

            debug_info = {
                "status": status,
                "time_taken": time_taken,
                "level": level,
                "concepts": concepts
            }

            if status == "answered":
                chapter_stats[(subject, chapter)]["answered"] += 1
                is_correct = False
                marked_options = question.get("markedOptions", [])
                input_value = question.get("inputValue", {})

                if marked_options:
                    is_correct = any(opt.get("isCorrect", False) for opt in marked_options)
                elif input_value.get("value") is not None:
                    is_correct = input_value.get("isCorrect", False)

                if is_correct:
                    chapter_stats[(subject, chapter)]["correct"] += 1
                    chapter_stats[(subject, chapter)]["difficulty_stats"][level]["correct"] += 1
                    debug_info["correct"] = True
                else:
                    chapter_stats[(subject, chapter)]["incorrect"] += 1
                    chapter_stats[(subject, chapter)]["difficulty_stats"][level]["incorrect"] += 1
                    debug_info["correct"] = False

                for concept in concepts:
                    concept_stats[(subject, chapter)][concept]["total"] += 1
                    if is_correct:
                        concept_stats[(subject, chapter)][concept]["correct"] += 1
                    else:
                        concept_stats[(subject, chapter)][concept]["incorrect"] += 1
            elif status == "markedReview":
                chapter_stats[(subject, chapter)]["marked_review"] += 1
                chapter_stats[(subject, chapter)]["difficulty_stats"][level]["unattempted"] += 1
            elif status == "notAnswered":
                chapter_stats[(subject, chapter)]["not_answered"] += 1
                chapter_stats[(subject, chapter)]["difficulty_stats"][level]["unattempted"] += 1

            debug_counts[(subject, chapter)].append(debug_info)

    for (subject, chapter), stats in chapter_stats.items():
        stats["accuracy_on_answered_percent"] = (stats["correct"] / stats["answered"] * 100) if stats["answered"] > 0 else 0.0
        stats["avg_time_per_answered_q_seconds"] = (stats["total_time_seconds"] / stats["answered"]) if stats["answered"] > 0 else 0.0
        processed_data["chapter_details"][subject][chapter] = stats

    subject_questions = defaultdict(int)
    for subject in processed_data["chapter_details"]:
        subject_questions[subject] = sum(chapter["questions_total"] for chapter in processed_data["chapter_details"][subject].values())
        processed_data["subject_summary"][subject]["total_questions"] = subject_questions[subject]

    processed_data["overall_summary"]["total_questions_calculated"] = sum(subject_questions.values())

    return processed_data

# --- Function to Extract Data for Charts ---
def extract_chart_data(processed_data):
    chart_data = {}
    difficulty_levels = ['easy', 'medium', 'tough']

    for subject in processed_data["chapter_details"]:
        chart_data[subject] = {
            'Easy': {'correct': 0, 'incorrect': 0, 'unattempted': 0, 'total': 0},
            'Medium': {'correct': 0, 'incorrect': 0, 'unattempted': 0, 'total': 0},
            'Tough': {'correct': 0, 'incorrect': 0, 'unattempted': 0, 'total': 0}
        }
        for chapter, stats in processed_data["chapter_details"][subject].items():
            for level in difficulty_levels:
                level_key = level.capitalize()
                chart_data[subject][level_key]['correct'] += stats["difficulty_stats"][level]["correct"]
                chart_data[subject][level_key]['incorrect'] += stats["difficulty_stats"][level]["incorrect"]
                chart_data[subject][level_key]['unattempted'] += stats["difficulty_stats"][level]["unattempted"]
                chart_data[subject][level_key]['total'] += stats["difficulty_counts"][level]

    return chart_data

# --- Function to Plot Bar Chart for a Subject and Save as Image ---
def plot_subject_chart(subject, subject_data, temp_image_path):
    difficulty_levels = ['Easy', 'Medium', 'Tough']
    correct = [subject_data[level]['correct'] for level in difficulty_levels]
    incorrect = [subject_data[level]['incorrect'] for level in difficulty_levels]
    unattempted = [subject_data[level]['unattempted'] for level in difficulty_levels]
    totals = [subject_data[level]['total'] for level in difficulty_levels]

    colors = {'Correct': '#36A2EB', 'Incorrect': '#FF6384', 'Unattempted': '#FFCE56'}

    x = np.arange(len(difficulty_levels))
    width = 0.25

    fig, ax = plt.subplots(figsize=(5, 3))  # Smaller size for grid layout

    bar1 = ax.bar(x - width, correct, width, label='Correct', color=colors['Correct'])
    bar2 = ax.bar(x, incorrect, width, label='Incorrect', color=colors['Incorrect'])
    bar3 = ax.bar(x + width, unattempted, width, label='Unattempted', color=colors['Unattempted'])

    for bar in [bar1, bar2, bar3]:
        for b in bar:
            height = b.get_height()
            ax.text(b.get_x() + b.get_width() / 2., height, f'{int(height)}',
                    ha='center', va='bottom', color='black', fontsize=8)

    ax.set_xlabel('Difficulty', fontsize=8)
    ax.set_ylabel('Questions', fontsize=8)
    ax.set_title(f'{subject} Performance', fontsize=10)
    ax.set_xticks(x)
    ax.set_xticklabels(difficulty_levels, fontsize=8)
    ax.legend(fontsize=6)

    ax.set_ylim(0, max(totals) + 2)

    try:
        plt.tight_layout()
        plt.savefig(temp_image_path, format='png', dpi=300, bbox_inches='tight')
        plt.close(fig)
        time.sleep(0.2)
        if not os.path.exists(temp_image_path):
            raise FileNotFoundError(f"Failed to save image at {temp_image_path}")
        if not os.access(temp_image_path, os.R_OK):
            raise PermissionError(f"Cannot read image at {temp_image_path}")
        print(f"Chart saved successfully for {subject} at {temp_image_path}")
    except Exception as e:
        print(f"Error saving chart image for {subject}: {e}")
        plt.close(fig)
        return None, None

    description = f"Total questions - Easy: {totals[0]}, Medium: {totals[1]}, Tough: {totals[2]}"
    return temp_image_path, description

# --- Function to Clean Document Content ---
def clean_document_content(content):
    cleaned_content = content.replace('*', '')
    lines = cleaned_content.split('\n')
    output_lines = []
    in_table = False
    table_data = []
    table_count = 0

    for line in lines:
        line = line.strip()
        if not line:
            if in_table and table_data:
                table_data = [row for row in table_data if not all(cell.strip().startswith('-') and len(cell.strip()) > 1 for cell in row)]
                if table_data:
                    table_count += 1
                    output_lines.append({'type': 'table', 'data': table_data, 'table_type': 'subject' if table_count == 1 else 'difficulty'})
                table_data = []
                in_table = False
            continue
        if line.startswith('|'):
            in_table = True
            table_data.append([cell.strip() for cell in line.split('|')[1:-1]])
            continue
        if in_table and not line.startswith('|'):
            in_table = False
            if table_data:
                table_data = [row for row in table_data if not all(cell.strip().startswith('-') and len(cell.strip()) > 1 for cell in row)]
                if table_data:
                    table_count += 1
                    output_lines.append({'type': 'table', 'data': table_data, 'table_type': 'subject' if table_count == 1 else 'difficulty'})
                table_data = []
            output_lines.append({'type': 'text', 'content': line})
        else:
            output_lines.append({'type': 'text', 'content': line})

    if table_data:
        table_data = [row for row in table_data if not all(cell.strip().startswith('-') and len(cell.strip()) > 1 for cell in row)]
        if table_data:
            table_count += 1
            output_lines.append({'type': 'table', 'data': table_data, 'table_type': 'difficulty'})

    return output_lines

def create_styled_pdf(cleaned_content, chart_data, output_filename='feedback_report.pdf'):
    doc = SimpleDocTemplate(output_filename, pagesize=letter, rightMargin=0.75*inch, leftMargin=0.75*inch, topMargin=0.75*inch, bottomMargin=0.75*inch)
    styles = getSampleStyleSheet()

    main_title_style = ParagraphStyle(
        name='MainTitleStyle',
        fontSize=18,
        leading=22,
        spaceAfter=10,
        fontName='Helvetica-Bold',
        textColor=colors.navy,
        alignment=1  # Center
    )

    section_title_style = ParagraphStyle(
        name='SectionTitleStyle',
        fontSize=14,
        leading=16,
        spaceAfter=6,
        fontName='Helvetica-Bold',
        textColor=colors.darkblue,
        alignment=0  # Left
    )

    subheading_style = ParagraphStyle(
        name='SubheadingStyle',
        fontSize=10,
        leading=12,
        spaceAfter=4,
        fontName='Helvetica-Oblique',
        textColor=colors.darkslategray
    )

    body_style = ParagraphStyle(
        name='BodyStyle',
        fontSize=8,
        leading=10,
        spaceAfter=3,
        fontName='Times-Roman',
        textColor=colors.black
    )

    list_style = ParagraphStyle(
        name='ListStyle',
        fontSize=8,
        leading=10,
        spaceAfter=3,
        fontName='Times-Roman',
        textColor=colors.black,
        leftIndent=16,
        bulletFontName='Times-Roman',
        bulletFontSize=8,
        bulletIndent=8
    )

    story = []
    temp_files = []  # Track temporary files for cleanup
    current_section = None
    in_performance_breakdown = False
    table_count = 0  # Track number of tables in Performance Breakdown
    page_width = letter[0] - 1.5*inch  # Available width after margins
    charts_added = False  # Flag to prevent duplicate charts

    # Add main title
    story.append(Paragraph("Test Performance Report", main_title_style))
    story.append(Spacer(1, 0.08*inch))

    def add_chart(subject, image_path, description):
        nonlocal story, temp_files
        try:
            from PIL import Image as PILImage
            with PILImage.open(image_path) as img:
                img_width, img_height = img.size
                aspect_ratio = img_height / img_width
                target_width = min(page_width, 5.5*inch)
                target_height = target_width * aspect_ratio
                if target_height > 3.5*inch:
                    target_height = 3.5*inch
                    target_width = target_height / aspect_ratio
            story.append(Spacer(1, 0.05*inch))
            story.append(Paragraph(f"{subject} Performance Chart", subheading_style))
            img = Image(image_path, width=target_width, height=target_height)
            img.hAlign = 'CENTER'
            story.append(img)
            story.append(Paragraph(description, body_style))
            story.append(Spacer(1, 0.05*inch))
            temp_files.append(image_path)
            print(f"Added chart for {subject} to PDF")
        except Exception as e:
            print(f"Error embedding image for {subject}: {e}")

    for item in cleaned_content:
        if item['type'] == 'text':
            content = item['content']
            if content in ['Overall Performance', 'Motivating Introduction', 'Performance Breakdown',
                          'Time vs. Accuracy Insights', 'Chapter-wise Concept Analysis', 'Actionable Suggestions']:
                if in_performance_breakdown and table_count >= 2 and not charts_added:
                    # Add Physics and Chemistry charts on page 2
                    story.append(PageBreak())
                    for subject in ['Physics', 'Chemistry']:
                        if subject in chart_data:
                            temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
                            temp_image_path = temp_file.name
                            temp_file.close()
                            print(f"Generating chart for {subject} at {temp_image_path}")
                            image_path, description = plot_subject_chart(subject, chart_data[subject], temp_image_path)
                            if image_path and os.path.exists(image_path):
                                add_chart(subject, image_path, description)
                    # Add Mathematics chart on page 3
                    story.append(PageBreak())
                    if chart_data.get('Mathematics'):
                        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
                        temp_image_path = temp_file.name
                        temp_file.close()
                        print(f"Generating chart for Mathematics at {temp_image_path}")
                        image_path, description = plot_subject_chart('Mathematics', chart_data['Mathematics'], temp_image_path)
                        if image_path and os.path.exists(image_path):
                            add_chart('Mathematics', image_path, description)
                    charts_added = True
                story.append(Paragraph(content, section_title_style))
                current_section = content
                in_performance_breakdown = (content == 'Performance Breakdown')
                table_count = 0  # Reset table count for new section
                continue
            if content in ['Physics:', 'Chemistry:', 'Mathematics:']:
                story.append(Paragraph(content[:-1], subheading_style))
                continue
            if content.startswith('Electrostatics:') or content.startswith('Capacitance:') or \
               content.startswith('Solutions:') or content.startswith('Electrochemistry:') or \
               content.startswith('Functions:') or content.startswith('Sets and Relations:'):
                story.append(Paragraph(f"• {content}", list_style))
                continue
            if current_section == 'Time vs. Accuracy Insights':
                try:
                    cleaned_content = re.sub(r'^\s*[-•]|\$\s*\\cdot\s*\d*\s*', '', content).strip()
                    cleaned_content = re.sub(r'\s+', ' ', cleaned_content.replace('\n', ' ')).strip()
                    sentences = re.split(r'(?<!\d)\.(?!\d)', cleaned_content)
                    sentences = [s.strip() for s in sentences if s.strip()]
                    for sentence in sentences:
                        if sentence and sentence != '.':
                            story.append(Paragraph(f"• {sentence}.", list_style))
                except re.error as e:
                    print(f"Regex error in bullet point processing: {e}")
                    cleaned_fallback = re.sub(r'\s+', ' ', content.replace('\n', ' ')).strip()
                    sentences = re.split(r'(?<!\d)\.(?!\d)', cleaned_fallback)
                    sentences = [s.strip() for s in sentences if s.strip()]
                    for sentence in sentences:
                        if sentence and sentence != '.':
                            story.append(Paragraph(f"• {sentence}.", list_style))
                continue
            if current_section == 'Actionable Suggestions':
                # Enhanced cleaning
                cleaned_content = unicodedata.normalize('NFKD', content)  # Normalize Unicode
                cleaned_content = re.sub(r'[^\x20-\x7E]', '', cleaned_content)  # Keep only printable ASCII characters
                cleaned_content = re.sub(r'^\s*[-•]|\$\s*\\cdot\s*\d*\s*', '', cleaned_content).strip()
                sections = re.split(r'\s*\d+\s*(?=\n)', cleaned_content)
                sections = [s.strip() for s in sections if s.strip()]
                section_number = 1
                for section in sections:
                    section_cleaned = re.sub(r'\s+', ' ', section.replace('\n', ' ')).strip()
                    if section_cleaned:
                        sentences = re.split(r'(?<!\d)\.(?!\d)', section_cleaned)
                        sentences = [s.strip() for s in sentences if s.strip()]
                        if sentences:
                            paragraph_text = '. '.join(sentences)
                            if not paragraph_text.endswith('.'):
                                paragraph_text += '.'
                            print(f"Actionable Suggestion {section_number}: {paragraph_text}")  # Debug logging
                            story.append(Paragraph(paragraph_text, body_style))
                            story.append(Spacer(1, 0.03*inch))
                    section_number += 1
                continue
            story.append(Paragraph(content.replace('%', '%'), body_style))
            story.append(Spacer(1, 0.03*inch))

        elif item['type'] == 'table' and in_performance_breakdown:
            table_title = "Subject Performance" if item['table_type'] == 'subject' else "Difficulty Performance"
            story.append(Paragraph(table_title, subheading_style))
            col_count = len(item['data'][0])
            col_width = (page_width - 0.3*inch) / col_count
            if item['table_type'] == 'difficulty':
                item['data'][0] = [col.replace('Avg Time (seconds)', 'Avg Time(s)') for col in item['data'][0]]
            table = Table(item['data'], colWidths=[col_width] * col_count)
            table.setStyle(TableStyle([
                ('BACKGROUND', (0, 0), (-1, 0), colors.lightblue),
                ('TEXTCOLOR', (0, 0), (-1, 0), colors.black),
                ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
                ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
                ('FONTSIZE', (0, 0), (-1, -1), 9),
                ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
                ('TOPPADDING', (0, 0), (-1, -1), 6),
                ('LEFTPADDING', (0, 0), (-1, -1), 5),
                ('RIGHTPADDING', (0, 0), (-1, -1), 5),
                ('BACKGROUND', (0, 1), (-1, -1), colors.white),
                ('GRID', (0, 0), (-1, -1), 1, colors.black),
                ('BOX', (0, 0), (-1, -1), 1, colors.black),
            ]))
            story.append(table)
            story.append(Spacer(1, 0.05*inch))
            table_count += 1

    # Build the PDF
    try:
        doc.build(story)
        print(f"PDF generated: {output_filename}")
    except Exception as e:
        print(f"Error building PDF: {e}")
    finally:
        # Clean up temporary files
        for temp_file in temp_files:
            try:
                if os.path.exists(temp_file):
                    os.remove(temp_file)
                    print(f"Removed temporary file {temp_file}")
            except Exception as e:
                print(f"Error removing temporary file {temp_file}: {e}")

# --- Main Execution ---
file_path = "/content/sample_submission_analysis_1.json"
text_file_path = "/content/feedback_output.txt"
pdf_path = "feedback2_report.pdf"

if not os.path.exists(text_file_path):
    print(f"Text file not found: {text_file_path}")
    exit(1)

json_data = load_json_data(file_path)
if json_data:
    processed_data = process_data(json_data)
    if processed_data:
        chart_data = extract_chart_data(processed_data)
        try:
            with open(text_file_path, "r") as file:
                document_content = file.read()
            print(f"Successfully loaded text file: {text_file_path}")
            cleaned_content = clean_document_content(document_content)
            create_styled_pdf(cleaned_content, chart_data, pdf_path)
        except Exception as e:
            print(f"Error processing text file: {e}")
else:
    print("Failed to load JSON file")

Successfully loaded text file: /content/feedback_output.txt
Generating chart for Physics at /tmp/tmppe8owx_k.png
Chart saved successfully for Physics at /tmp/tmppe8owx_k.png
Added chart for Physics to PDF
Generating chart for Chemistry at /tmp/tmpltarogu9.png
Chart saved successfully for Chemistry at /tmp/tmpltarogu9.png
Added chart for Chemistry to PDF
Generating chart for Mathematics at /tmp/tmp0gjulwvk.png
Chart saved successfully for Mathematics at /tmp/tmp0gjulwvk.png
Added chart for Mathematics to PDF
Actionable Suggestion 1: 1. Time Management Strategy: Practice timed tests focusing on improving speed on easier questions. Use the time saved to tackle more challenging problems effectively. Try techniques like eliminating obviously wrong answers first to save time.
Actionable Suggestion 1: 2. Targeted Concept Review: Create a focused study plan addressing the weak concepts identified above. Use practice problems and different resources to understand these concepts thoroughly. For 

In [43]:
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image, PageBreak
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch

def create_styled_pdf(cleaned_content, chart_data, output_filename='feedback_report.pdf'):
    doc = SimpleDocTemplate(output_filename, pagesize=letter, rightMargin=0.75*inch, leftMargin=0.75*inch, topMargin=0.75*inch, bottomMargin=0.75*inch)
    styles = getSampleStyleSheet()

    # Existing styles
    main_title_style = ParagraphStyle(
        name='MainTitleStyle',
        fontSize=18,
        leading=22,
        spaceAfter=10,
        fontName='Helvetica-Bold',
        textColor=colors.navy,
        alignment=1  # Center
    )

    section_title_style = ParagraphStyle(
        name='SectionTitleStyle',
        fontSize=14,
        leading=16,
        spaceAfter=6,
        fontName='Helvetica-Bold',
        textColor=colors.darkblue,
        alignment=0  # Left
    )

    subheading_style = ParagraphStyle(
        name='SubheadingStyle',
        fontSize=10,
        leading=12,
        spaceAfter=4,
        fontName='Helvetica-Oblique',
        textColor=colors.darkslategray
    )

    body_style = ParagraphStyle(
        name='BodyStyle',
        fontSize=8,
        leading=10,
        spaceAfter=3,
        fontName='Times-Roman',
        textColor=colors.black
    )

    list_style = ParagraphStyle(
        name='ListStyle',
        fontSize=8,
        leading=10,
        spaceAfter=3,
        fontName='Times-Roman',
        textColor=colors.black,
        leftIndent=16,
        bulletFontName='Times-Roman',
        bulletFontSize=8,
        bulletIndent=8
    )

    # New style for Actionable Suggestions
    suggestion_style = ParagraphStyle(
        name='SuggestionStyle',
        fontSize=8,
        leading=12,  # Slightly more leading for readability
        spaceAfter=6,  # More spacing after each suggestion
        fontName='Helvetica-Bold',  # Bold for emphasis
        textColor=colors.darkgreen,  # Distinct color (dark green) to stand out
        leftIndent=20,  # Indent to visually separate from other content
        bulletFontName='Helvetica-Bold',  # Bold bullet
        bulletFontSize=8,
        bulletIndent=10,  # Indent for bullet
        alignment=0  # Left-aligned
    )

    story = []
    temp_files = []  # Track temporary files for cleanup
    current_section = None
    in_performance_breakdown = False
    table_count = 0  # Track number of tables in Performance Breakdown
    page_width = letter[0] - 1.5*inch  # Available width after margins
    charts_added = False  # Flag to prevent duplicate charts

    # Add main title
    story.append(Paragraph("Test Performance Report", main_title_style))
    story.append(Spacer(1, 0.08*inch))

    def add_chart(subject, image_path, description):
        nonlocal story, temp_files
        try:
            from PIL import Image as PILImage
            with PILImage.open(image_path) as img:
                img_width, img_height = img.size
                aspect_ratio = img_height / img_width
                target_width = min(page_width, 5.5*inch)
                target_height = target_width * aspect_ratio
                if target_height > 3.5*inch:
                    target_height = 3.5*inch
                    target_width = target_height / aspect_ratio
            story.append(Spacer(1, 0.05*inch))
            story.append(Paragraph(f"{subject} Performance Chart", subheading_style))
            img = Image(image_path, width=target_width, height=target_height)
            img.hAlign = 'CENTER'
            story.append(img)
            story.append(Paragraph(description, body_style))
            story.append(Spacer(1, 0.05*inch))
            temp_files.append(image_path)
            print(f"Added chart for {subject} to PDF")
        except Exception as e:
            print(f"Error embedding image for {subject}: {e}")

    for item in cleaned_content:
        if item['type'] == 'text':
            content = item['content']
            if content in ['Overall Performance', 'Motivating Introduction', 'Performance Breakdown',
                          'Time vs. Accuracy Insights', 'Chapter-wise Concept Analysis', 'Actionable Suggestions']:
                if in_performance_breakdown and table_count >= 2 and not charts_added:
                    # Add Physics and Chemistry charts on page 2
                    story.append(PageBreak())
                    for subject in ['Physics', 'Chemistry']:
                        if subject in chart_data:
                            temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
                            temp_image_path = temp_file.name
                            temp_file.close()
                            print(f"Generating chart for {subject} at {temp_image_path}")
                            image_path, description = plot_subject_chart(subject, chart_data[subject], temp_image_path)
                            if image_path and os.path.exists(image_path):
                                add_chart(subject, image_path, description)
                    # Add Mathematics chart on page 3
                    story.append(PageBreak())
                    if chart_data.get('Mathematics'):
                        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
                        temp_image_path = temp_file.name
                        temp_file.close()
                        print(f"Generating chart for Mathematics at {temp_image_path}")
                        image_path, description = plot_subject_chart('Mathematics', chart_data['Mathematics'], temp_image_path)
                        if image_path and os.path.exists(image_path):
                            add_chart('Mathematics', image_path, description)
                    charts_added = True
                story.append(Paragraph(content, section_title_style))
                current_section = content
                in_performance_breakdown = (content == 'Performance Breakdown')
                table_count = 0  # Reset table count for new section
                continue
            if content in ['Physics:', 'Chemistry:', 'Mathematics:']:
                story.append(Paragraph(content[:-1], subheading_style))
                continue
            if content.startswith('Electrostatics:') or content.startswith('Capacitance:') or \
               content.startswith('Solutions:') or content.startswith('Electrochemistry:') or \
               content.startswith('Functions:') or content.startswith('Sets and Relations:'):
                story.append(Paragraph(f"• {content}", list_style))
                continue
            if current_section == 'Time vs. Accuracy Insights':
                try:
                    cleaned_content = re.sub(r'^\s*[-•]|\$\s*\\cdot\s*\d*\s*', '', content).strip()
                    cleaned_content = re.sub(r'\s+', ' ', cleaned_content.replace('\n', ' ')).strip()
                    sentences = re.split(r'(?<!\d)\.(?!\d)', cleaned_content)
                    sentences = [s.strip() for s in sentences if s.strip()]
                    for sentence in sentences:
                        if sentence and sentence != '.':
                            story.append(Paragraph(f"• {sentence}.", list_style))
                except re.error as e:
                    print(f"Regex error in bullet point processing: {e}")
                    cleaned_fallback = re.sub(r'\s+', ' ', content.replace('\n', ' ')).strip()
                    sentences = re.split(r'(?<!\d)\.(?!\d)', cleaned_fallback)
                    sentences = [s.strip() for s in sentences if s.strip()]
                    for sentence in sentences:
                        if sentence and sentence != '.':
                            story.append(Paragraph(f"• {sentence}.", list_style))
                continue
            if current_section == 'Actionable Suggestions':
                # Enhanced cleaning
                cleaned_content = unicodedata.normalize('NFKD', content)  # Normalize Unicode
                cleaned_content = re.sub(r'[^\x20-\x7E]', '', cleaned_content)  # Keep only printable ASCII characters
                cleaned_content = re.sub(r'^\s*[-•]|\$\s*\\cdot\s*\d*\s*', '', cleaned_content).strip()
                sections = re.split(r'\s*\d+\s*(?=\n)', cleaned_content)
                sections = [s.strip() for s in sections if s.strip()]
                section_number = 1
                for section in sections:
                    section_cleaned = re.sub(r'\s+', ' ', section.replace('\n', ' ')).strip()
                    if section_cleaned:
                        sentences = re.split(r'(?<!\d)\.(?!\d)', section_cleaned)
                        sentences = [s.strip() for s in sentences if s.strip()]
                        if sentences:
                            paragraph_text = '. '.join(sentences)
                            if not paragraph_text.endswith('.'):
                                paragraph_text += '.'
                            # Apply suggestion style with numbered bullet
                            numbered_text = f"{section_number}. {paragraph_text}"
                            print(f"Actionable Suggestion {section_number}: {numbered_text}")  # Debug logging
                            story.append(Paragraph(numbered_text, suggestion_style))
                            story.append(Spacer(1, 0.05*inch))  # Increased spacing for clarity
                    section_number += 1
                continue
            story.append(Paragraph(content.replace('%', '%'), body_style))
            story.append(Spacer(1, 0.03*inch))

        elif item['type'] == 'table' and in_performance_breakdown:
            table_title = "Subject Performance" if item['table_type'] == 'subject' else "Difficulty Performance"
            story.append(Paragraph(table_title, subheading_style))
            col_count = len(item['data'][0])
            col_width = (page_width - 0.3*inch) / col_count
            if item['table_type'] == 'difficulty':
                item['data'][0] = [col.replace('Avg Time (seconds)', 'Avg Time(s)') for col in item['data'][0]]
            table = Table(item['data'], colWidths=[col_width] * col_count)
            table.setStyle(TableStyle([
                ('BACKGROUND', (0, 0), (-1, 0), colors.lightblue),
                ('TEXTCOLOR', (0, 0), (-1, 0), colors.black),
                ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
                ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
                ('FONTSIZE', (0, 0), (-1, -1), 9),
                ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
                ('TOPPADDING', (0, 0), (-1, -1), 6),
                ('LEFTPADDING', (0, 0), (-1, -1), 5),
                ('RIGHTPADDING', (0, 0), (-1, -1), 5),
                ('BACKGROUND', (0, 1), (-1, -1), colors.white),
                ('GRID', (0, 0), (-1, -1), 1, colors.black),
                ('BOX', (0, 0), (-1, -1), 1, colors.black),
            ]))
            story.append(table)
            story.append(Spacer(1, 0.05*inch))
            table_count += 1

    # Build the PDF
    try:
        doc.build(story)
        print(f"PDF generated: {output_filename}")
    except Exception as e:
        print(f"Error building PDF: {e}")
    finally:
        # Clean up temporary files
        for temp_file in temp_files:
            try:
                if os.path.exists(temp_file):
                    os.remove(temp_file)
                    print(f"Removed temporary file {temp_file}")
            except Exception as e:
                print(f"Error removing temporary file {temp_file}: {e}")

# --- Main Execution ---
file_path = "/content/sample_submission_analysis_1.json"
text_file_path = "/content/feedback_output.txt"
pdf_path = "feedback3_report.pdf"

if not os.path.exists(text_file_path):
    print(f"Text file not found: {text_file_path}")
    exit(1)

json_data = load_json_data(file_path)
if json_data:
    processed_data = process_data(json_data)
    if processed_data:
        chart_data = extract_chart_data(processed_data)
        try:
            with open(text_file_path, "r") as file:
                document_content = file.read()
            print(f"Successfully loaded text file: {text_file_path}")
            cleaned_content = clean_document_content(document_content)
            create_styled_pdf(cleaned_content, chart_data, pdf_path)
        except Exception as e:
            print(f"Error processing text file: {e}")
else:
    print("Failed to load JSON file")

Successfully loaded text file: /content/feedback_output.txt
Generating chart for Physics at /tmp/tmpgdvrqkrr.png
Chart saved successfully for Physics at /tmp/tmpgdvrqkrr.png
Added chart for Physics to PDF
Generating chart for Chemistry at /tmp/tmp_ckzrwfk.png
Chart saved successfully for Chemistry at /tmp/tmp_ckzrwfk.png
Added chart for Chemistry to PDF
Generating chart for Mathematics at /tmp/tmpa3tmu3jh.png
Chart saved successfully for Mathematics at /tmp/tmpa3tmu3jh.png
Added chart for Mathematics to PDF
Actionable Suggestion 1: 1. 1. Time Management Strategy: Practice timed tests focusing on improving speed on easier questions. Use the time saved to tackle more challenging problems effectively. Try techniques like eliminating obviously wrong answers first to save time.
Actionable Suggestion 1: 1. 2. Targeted Concept Review: Create a focused study plan addressing the weak concepts identified above. Use practice problems and different resources to understand these concepts thoroughly

In [44]:
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image, PageBreak
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
import re
import unicodedata

def clean_document_content(content):
    cleaned_content = content.replace('*', '')
    lines = cleaned_content.split('\n')
    output_lines = []
    in_table = False
    table_data = []
    table_count = 0

    for line in lines:
        line = line.strip()
        if not line:
            if in_table and table_data:
                table_data = [row for row in table_data if not all(cell.strip().startswith('-') and len(cell.strip()) > 1 for cell in row)]
                if table_data:
                    table_count += 1
                    output_lines.append({'type': 'table', 'data': table_data, 'table_type': 'subject' if table_count == 1 else 'difficulty'})
                table_data = []
                in_table = False
            continue
        if line.startswith('|'):
            in_table = True
            table_data.append([cell.strip() for cell in line.split('|')[1:-1]])
            continue
        if in_table and not line.startswith('|'):
            in_table = False
            if table_data:
                table_data = [row for row in table_data if not all(cell.strip().startswith('-') and len(cell.strip()) > 1 for cell in row)]
                if table_data:
                    table_count += 1
                    output_lines.append({'type': 'table', 'data': table_data, 'table_type': 'subject' if table_count == 1 else 'difficulty'})
                table_data = []
            output_lines.append({'type': 'text', 'content': line})
        else:
            output_lines.append({'type': 'text', 'content': line})

    if table_data:
        table_data = [row for row in table_data if not all(cell.strip().startswith('-') and len(cell.strip()) > 1 for cell in row)]
        if table_data:
            table_count += 1
            output_lines.append({'type': 'table', 'data': table_data, 'table_type': 'difficulty'})

    return output_lines

def create_styled_pdf(cleaned_content, chart_data, output_filename='feedback_report.pdf'):
    doc = SimpleDocTemplate(output_filename, pagesize=letter, rightMargin=0.75*inch, leftMargin=0.75*inch, topMargin=0.75*inch, bottomMargin=0.75*inch)
    styles = getSampleStyleSheet()

    # Existing styles
    main_title_style = ParagraphStyle(
        name='MainTitleStyle',
        fontSize=18,
        leading=22,
        spaceAfter=10,
        fontName='Helvetica-Bold',
        textColor=colors.navy,
        alignment=1  # Center
    )

    section_title_style = ParagraphStyle(
        name='SectionTitleStyle',
        fontSize=14,
        leading=16,
        spaceAfter=6,
        fontName='Helvetica-Bold',
        textColor=colors.darkblue,
        alignment=0  # Left
    )

    subheading_style = ParagraphStyle(
        name='SubheadingStyle',
        fontSize=10,
        leading=12,
        spaceAfter=4,
        fontName='Helvetica-Oblique',
        textColor=colors.darkslategray
    )

    body_style = ParagraphStyle(
        name='BodyStyle',
        fontSize=8,
        leading=10,
        spaceAfter=3,
        fontName='Times-Roman',
        textColor=colors.black
    )

    list_style = ParagraphStyle(
        name='ListStyle',
        fontSize=8,
        leading=10,
        spaceAfter=3,
        fontName='Times-Roman',
        textColor=colors.black,
        leftIndent=16,
        bulletFontName='Times-Roman',
        bulletFontSize=8,
        bulletIndent=8
    )

    # Updated style for Actionable Suggestions
    suggestion_style = ParagraphStyle(
        name='SuggestionStyle',
        fontSize=10,  # Larger size for emphasis
        leading=14,  # More spacing for readability
        spaceAfter=8,  # Extra spacing after each suggestion
        fontName='Times-Bold',  # Distinct font family, bold for impact
        textColor=colors.darkblue,  # Vivid dark blue for attention
        leftIndent=15,  # Moderate indent to align with content but stand out
        alignment=0  # Left-aligned
    )

    story = []
    temp_files = []  # Track temporary files for cleanup
    current_section = None
    in_performance_breakdown = False
    table_count = 0  # Track number of tables in Performance Breakdown
    page_width = letter[0] - 1.5*inch  # Available width after margins
    charts_added = False  # Flag to prevent duplicate charts

    # Add main title
    story.append(Paragraph("Test Performance Report", main_title_style))
    story.append(Spacer(1, 0.08*inch))

    def add_chart(subject, image_path, description):
        nonlocal story, temp_files
        try:
            from PIL import Image as PILImage
            with PILImage.open(image_path) as img:
                img_width, img_height = img.size
                aspect_ratio = img_height / img_width
                target_width = min(page_width, 5.5*inch)
                target_height = target_width * aspect_ratio
                if target_height > 3.5*inch:
                    target_height = 3.5*inch
                    target_width = target_height / aspect_ratio
            story.append(Spacer(1, 0.05*inch))
            story.append(Paragraph(f"{subject} Performance Chart", subheading_style))
            img = Image(image_path, width=target_width, height=target_height)
            img.hAlign = 'CENTER'
            story.append(img)
            story.append(Paragraph(description, body_style))
            story.append(Spacer(1, 0.05*inch))
            temp_files.append(image_path)
            print(f"Added chart for {subject} to PDF")
        except Exception as e:
            print(f"Error embedding image for {subject}: {e}")

    for item in cleaned_content:
        if item['type'] == 'text':
            content = item['content']
            if content in ['Overall Performance', 'Motivating Introduction', 'Performance Breakdown',
                          'Time vs. Accuracy Insights', 'Chapter-wise Concept Analysis', 'Actionable Suggestions']:
                if in_performance_breakdown and table_count >= 2 and not charts_added:
                    # Add Physics and Chemistry charts on page 2
                    story.append(PageBreak())
                    for subject in ['Physics', 'Chemistry']:
                        if subject in chart_data:
                            temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
                            temp_image_path = temp_file.name
                            temp_file.close()
                            print(f"Generating chart for {subject} at {temp_image_path}")
                            image_path, description = plot_subject_chart(subject, chart_data[subject], temp_image_path)
                            if image_path and os.path.exists(image_path):
                                add_chart(subject, image_path, description)
                    # Add Mathematics chart on page 3
                    story.append(PageBreak())
                    if chart_data.get('Mathematics'):
                        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
                        temp_image_path = temp_file.name
                        temp_file.close()
                        print(f"Generating chart for Mathematics at {temp_image_path}")
                        image_path, description = plot_subject_chart('Mathematics', chart_data['Mathematics'], temp_image_path)
                        if image_path and os.path.exists(image_path):
                            add_chart('Mathematics', image_path, description)
                    charts_added = True
                story.append(Paragraph(content, section_title_style))
                current_section = content
                in_performance_breakdown = (content == 'Performance Breakdown')
                table_count = 0  # Reset table count for new section
                continue
            if content in ['Physics:', 'Chemistry:', 'Mathematics:']:
                story.append(Paragraph(content[:-1], subheading_style))
                continue
            if content.startswith('Electrostatics:') or content.startswith('Capacitance:') or \
               content.startswith('Solutions:') or content.startswith('Electrochemistry:') or \
               content.startswith('Functions:') or content.startswith('Sets and Relations:'):
                story.append(Paragraph(f"• {content}", list_style))
                continue
            if current_section == 'Time vs. Accuracy Insights':
                try:
                    cleaned_content = re.sub(r'^\s*[-•]|\$\s*\\cdot\s*\d*\s*', '', content).strip()
                    cleaned_content = re.sub(r'\s+', ' ', cleaned_content.replace('\n', ' ')).strip()
                    sentences = re.split(r'(?<!\d)\.(?!\d)', cleaned_content)
                    sentences = [s.strip() for s in sentences if s.strip()]
                    for sentence in sentences:
                        if sentence and sentence != '.':
                            story.append(Paragraph(f"• {sentence}.", list_style))
                except re.error as e:
                    print(f"Regex error in bullet point processing: {e}")
                    cleaned_fallback = re.sub(r'\s+', ' ', content.replace('\n', ' ')).strip()
                    sentences = re.split(r'(?<!\d)\.(?!\d)', cleaned_fallback)
                    sentences = [s.strip() for s in sentences if s.strip()]
                    for sentence in sentences:
                        if sentence and sentence != '.':
                            story.append(Paragraph(f"• {sentence}.", list_style))
                continue
            if current_section == 'Actionable Suggestions':
                # Enhanced cleaning
                cleaned_content = unicodedata.normalize('NFKD', content)  # Normalize Unicode
                cleaned_content = re.sub(r'[^\x20-\x7E]', '', cleaned_content)  # Keep only printable ASCII characters
                cleaned_content = re.sub(r'^\s*[-•]|\$\s*\\cdot\s*\d*\s*|\d+\.\s*\d+\.\s*', '', cleaned_content).strip()  # Remove redundant numbering
                sections = re.split(r'\s*\d+\s*(?=\n)', cleaned_content)
                sections = [s.strip() for s in sections if s.strip()]
                section_number = 1
                for section in sections:
                    section_cleaned = re.sub(r'\s+', ' ', section.replace('\n', ' ')).strip()
                    if section_cleaned:
                        sentences = re.split(r'(?<!\d)\.(?!\d)', section_cleaned)
                        sentences = [s.strip() for s in sentences if s.strip()]
                        if sentences:
                            paragraph_text = '. '.join(sentences)
                            if not paragraph_text.endswith('.'):
                                paragraph_text += '.'
                            # Wrap suggestion in a table for background and border
                            numbered_text = f"{section_number}. {paragraph_text}"
                            suggestion_paragraph = Paragraph(numbered_text, suggestion_style)
                            suggestion_table = Table([[suggestion_paragraph]], colWidths=[page_width - 0.3*inch])
                            suggestion_table.setStyle(TableStyle([
                                ('BACKGROUND', (0, 0), (-1, -1), colors.lightyellow),  # Light yellow background
                                ('BOX', (0, 0), (-1, -1), 1, colors.darkblue),  # Dark blue border
                                ('GRID', (0, 0), (-1, -1), 1, colors.darkblue),  # Matching grid
                                ('LEFTPADDING', (0, 0), (-1, -1), 10),  # Padding for content
                                ('RIGHTPADDING', (0, 0), (-1, -1), 10),
                                ('TOPPADDING', (0, 0), (-1, -1), 8),
                                ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
                            ]))
                            story.append(suggestion_table)
                            story.append(Spacer(1, 0.1*inch))  # Extra spacing for separation
                            print(f"Actionable Suggestion {section_number}: {numbered_text}")  # Debug logging
                    section_number += 1
                continue
            story.append(Paragraph(content.replace('%', '%'), body_style))
            story.append(Spacer(1, 0.03*inch))

        elif item['type'] == 'table' and in_performance_breakdown:
            table_title = "Subject Performance" if item['table_type'] == 'subject' else "Difficulty Performance"
            story.append(Paragraph(table_title, subheading_style))
            col_count = len(item['data'][0])
            col_width = (page_width - 0.3*inch) / col_count
            if item['table_type'] == 'difficulty':
                item['data'][0] = [col.replace('Avg Time (seconds)', 'Avg Time(s)') for col in item['data'][0]]
            table = Table(item['data'], colWidths=[col_width] * col_count)
            table.setStyle(TableStyle([
                ('BACKGROUND', (0, 0), (-1, 0), colors.lightblue),
                ('TEXTCOLOR', (0, 0), (-1, 0), colors.black),
                ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
                ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
                ('FONTSIZE', (0, 0), (-1, -1), 9),
                ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
                ('TOPPADDING', (0, 0), (-1, -1), 6),
                ('LEFTPADDING', (0, 0), (-1, -1), 5),
                ('RIGHTPADDING', (0, 0), (-1, -1), 5),
                ('BACKGROUND', (0, 1), (-1, -1), colors.white),
                ('GRID', (0, 0), (-1, -1), 1, colors.black),
                ('BOX', (0, 0), (-1, -1), 1, colors.black),
            ]))
            story.append(table)
            story.append(Spacer(1, 0.05*inch))
            table_count += 1

    # Build the PDF
    try:
        doc.build(story)
        print(f"PDF generated: {output_filename}")
    except Exception as e:
        print(f"Error building PDF: {e}")
    finally:
        # Clean up temporary files
        for temp_file in temp_files:
            try:
                if os.path.exists(temp_file):
                    os.remove(temp_file)
                    print(f"Removed temporary file {temp_file}")
            except Exception as e:
                print(f"Error removing temporary file {temp_file}: {e}")

# --- Main Execution ---
file_path = "/content/sample_submission_analysis_1.json"
text_file_path = "/content/feedback_output.txt"
pdf_path = "feedback3_report.pdf"

if not os.path.exists(text_file_path):
    print(f"Text file not found: {text_file_path}")
    exit(1)

json_data = load_json_data(file_path)
if json_data:
    processed_data = process_data(json_data)
    if processed_data:
        chart_data = extract_chart_data(processed_data)
        try:
            with open(text_file_path, "r") as file:
                document_content = file.read()
            print(f"Successfully loaded text file: {text_file_path}")
            cleaned_content = clean_document_content(document_content)
            create_styled_pdf(cleaned_content, chart_data, pdf_path)
        except Exception as e:
            print(f"Error processing text file: {e}")
else:
    print("Failed to load JSON file")

Successfully loaded text file: /content/feedback_output.txt
Generating chart for Physics at /tmp/tmpzrp7pyyl.png
Chart saved successfully for Physics at /tmp/tmpzrp7pyyl.png
Added chart for Physics to PDF
Generating chart for Chemistry at /tmp/tmpm5zs01ha.png
Chart saved successfully for Chemistry at /tmp/tmpm5zs01ha.png
Added chart for Chemistry to PDF
Generating chart for Mathematics at /tmp/tmpul7_nei7.png
Chart saved successfully for Mathematics at /tmp/tmpul7_nei7.png
Added chart for Mathematics to PDF
Actionable Suggestion 1: 1. 1. Time Management Strategy: Practice timed tests focusing on improving speed on easier questions. Use the time saved to tackle more challenging problems effectively. Try techniques like eliminating obviously wrong answers first to save time.
Actionable Suggestion 1: 1. 2. Targeted Concept Review: Create a focused study plan addressing the weak concepts identified above. Use practice problems and different resources to understand these concepts thoroughly