
# Q1 a) Parse Student Records Function

Write a function `parse_student_records(raw_data)` that:
- Parses each student's name and subject scores
- Handles "invalid" entries by replacing them with 0
- Handles missing subjects by assigning 0
- Validates all scores are between 0–100
- Returns a list of dictionaries, e.g.: `{'name': 'Rahul', 'scores': {'Math': 85, 'Science': 90, 'English': 78, 'Social': 88}}`

In [None]:

def validate_score(score, student_name, subject):
    """
    Validate that a score is within the valid range (0-100).
    """
    # Check if the score is between 0 and 100 (inclusive)
    if not (0 <= score <= 100):
        return 0  # Return 0 for invalid scores
    
    # If score is valid, return it as is
    return score


def parse_subject_score(subject_score_str, student_name):
    """
    Parse a single "Subject:Score" string and return (subject, score) tuple. 
    """
    # Remove extra spaces from the beginning and end of the string
    subject_score_str = subject_score_str.strip()
    
    # Check if the string contains a colon (:)
    # Example: "Math:85" contains a colon, but "Math" does not
    if ':' not in subject_score_str:
        return None  # Return None to indicate this entry couldn't be parsed
    
    # Split the string at the colon to separate subject and score
    # Example: "Math:85" becomes ["Math", "85"]
    subject, score_str = subject_score_str.split(':', 1)
    # Remove extra spaces from both parts
    subject = subject.strip()
    score_str = score_str.strip()
    
    # Handle "invalid" entries by treating them as 0
    # Example: "English:invalid" should become 0
    if score_str.lower() == 'invalid':
        return (subject, 0)
    
    # Try to convert the score string to a number (int or float)
    try:
        score = float(score_str)
        # Return a tuple (pair) of subject and score
        return (subject, score)
    except ValueError:
        return (subject, 0)


def parse_scores_string(scores_str, student_name, expected_subjects):
    """
    Parse all subject scores from a comma-separated string.
    """
    # Create a dictionary with all subjects initially set to 0
    # This ensures every subject has a score, even if it was missing in raw data
    scores = {subject: 0 for subject in expected_subjects}
    
    # Split the scores string at commas to get individual subject:score pairs
    # Example: "Math:85,Science:90" becomes ["Math:85", "Science:90"]
    subject_scores = scores_str.split(',')
    
    # Loop through each "Subject:Score" pair
    for subject_score in subject_scores:
        # Parse this single pair using our helper function
        parsed = parse_subject_score(subject_score, student_name)
        
        # If parsing failed, skip this entry and move to the next one
        if parsed is None:
            continue
        
        # Extract the subject and score from the parsed tuple
        subject, score = parsed
        
        # Validate that the score is between 0-100
        validated_score = validate_score(score, student_name, subject)
        
        # Only add the score if the subject is one we're expecting
        # (This prevents unknown subjects from being added)
        if subject in expected_subjects:
            scores[subject] = validated_score
    
    # Return the completed dictionary with all subjects and their scores
    return scores


def parse_single_record(record, expected_subjects):
    """
    Parse a single student record string.
    """
    try:
        # Split the record at the pipe character (|) to separate name from scores
        # Example: "Rahul|Math:85,Science:90" becomes ["Rahul", "Math:85,Science:90"]
        parts = record.split('|')
        
        # Check if we got exactly 2 parts (name and scores)
        if len(parts) != 2:
            return None  # Return None if format is wrong
        
        # Extract and clean up the name (remove spaces)
        name = parts[0].strip()
        # Extract and clean up the scores string
        scores_str = parts[1].strip()
        
        # Parse all the scores using our helper function
        scores = parse_scores_string(scores_str, name, expected_subjects)
        
        # Create a dictionary to represent this student's data
        # This is the format we want to return
        student_record = {
            'name': name,
            'scores': scores
        }
        
        # Return the completed student record
        return student_record
        
    except Exception as e:
        return None  # Return None to indicate this record couldn't be parsed


def parse_student_records(raw_data, expected_subjects=None):
    """
    Parse student records from raw data format and validate scores.
    """
    # If no subjects were provided, use the default set
    if expected_subjects is None:
        expected_subjects = {'Math', 'Science', 'English', 'Social'}
    
    # Create an empty list to store the parsed student records
    parsed_records = []
    
    # Loop through each record in the raw data
    for record in raw_data:
        # Parse this individual record using our helper function
        student_record = parse_single_record(record, expected_subjects)
        
        # Only add to results if parsing was successful (not None)
        if student_record is not None:
            parsed_records.append(student_record)
    
    # Return the list of all successfully parsed student records
    return parsed_records

# Test data: List of student records in raw format
raw_student_data = [
    "Rahul|Math:85,Science:90,English:78,Social:88",
    "Priya|Math:92,Science:88,English:95,Social:90",
    "Amit|Math:65,Science:70,English:invalid,Social:68",  # Note: English has "invalid"
    "Sneha|Math:78,Science:82,English:85,Social:80",
    "Vikram|Math:45,Science:50,English:55,Social:48"
]

# Call the main function to parse all student records
parsed_data = parse_student_records(raw_student_data)

print("\nParsed Student Records:")
for student in parsed_data:
    print(student)



Parsed Student Records:
{'name': 'Rahul', 'scores': {'Math': 85.0, 'Science': 90.0, 'English': 78.0, 'Social': 88.0}}
{'name': 'Priya', 'scores': {'Math': 92.0, 'Science': 88.0, 'English': 95.0, 'Social': 90.0}}
{'name': 'Amit', 'scores': {'Math': 65.0, 'Science': 70.0, 'English': 0, 'Social': 68.0}}
{'name': 'Sneha', 'scores': {'Math': 78.0, 'Science': 82.0, 'English': 85.0, 'Social': 80.0}}
{'name': 'Vikram', 'scores': {'Math': 45.0, 'Science': 50.0, 'English': 55.0, 'Social': 48.0}}


# Q1 b) STEP-BY-STEP PARSING: AMIT'S RECORD

STEP 0: Raw Input Data
  Input: Amit|Math:65,Science:70,English:invalid,Social:68
  Type: <class 'str'>

STEP 1: Split at Pipe Character (|)
  Action: Split 'Amit|Math:65,Science:70,English:invalid,Social:68' at '|'
  Result: ['Amit', 'Math:65,Science:70,English:invalid,Social:68']
  Name: 'Amit'
  Scores String: 'Math:65,Science:70,English:invalid,Social:68'

STEP 2: Extract and Clean Name & Scores String
  Name (after strip): 'Amit'
  Scores String (after strip): 'Math:65,Science:70,English:invalid,Social:68'

STEP 3: Initialize Scores Dictionary
  Action: Set all subjects to 0 (default values)
  Initial scores: {'Math': 0, 'Science': 0, 'English': 0, 'Social': 0}

STEP 4: Split Scores String at Commas
  Action: Split 'Math:65,Science:70,English:invalid,Social:68' at ','
...
English              invalid                   0                         ← HANDLED!
Social               68                        68.0                

# Q2 a) Write a function calculate_student_grades(student_records) to:

• Calculate each student’s average.
• Assign a grade based on the average (see scale above).
• Determine if each student passed (average ≥ 50).
• Add 'average', 'grade', and 'status' keys to each record.


In [37]:
# QUESTION b CALCULATE STUDENT GRADES

def calculate_average(scores_dict):
    """
    Calculate the average score from a dictionary of subject scores.
    """
    # Check if the dictionary is empty to avoid division by zero
    if not scores_dict or len(scores_dict) == 0:
        return 0
    
    # Calculate sum of all scores by adding all values in the dictionary
    total_score = sum(scores_dict.values())
    
    # Calculate average by dividing total by number of subjects
    # len() tells us how many subjects there are
    average = total_score / len(scores_dict)
    
    # Round to 2 decimal places for readability
    return round(average, 2)


def get_grade(average):
    """
    Assign a letter grade based on the average score.
    """
    # Check which range the average falls into
    # Start from highest grade and work down
    
    if average >= 90:
        # Score is 90 or higher → Grade A+
        return 'A+'
    elif average >= 80:
        # Score is 80-89 → Grade A
        return 'A'
    elif average >= 70:
        # Score is 70-79 → Grade B+
        return 'B+'
    elif average >= 60:
        # Score is 60-69 → Grade B
        return 'B'
    elif average >= 50:
        # Score is 50-59 → Grade C
        return 'C'
    else:
        # Score is below 50 → Grade F (Fail)
        return 'F'


def determine_pass_status(average):
    """
    Determine if a student passed or failed.
    """
    # Check if average is 50 or higher
    if average >= 50:
        return 'PASS'
    else:
        return 'FAIL'


def calculate_student_grades(student_records):
    """
    Calculate grades for all students.
    """
    # Create an empty list to store the updated records
    graded_records = []
    
    # Loop through each student record
    for student in student_records:
        # Make a copy of the student record to avoid modifying the original
        updated_student = student.copy()
        
        # Get the scores dictionary for this student
        scores = student['scores']
        
        # Calculate the average using our helper function
        average = calculate_average(scores)
        # Add the average to the student record
        updated_student['average'] = average
        
        # Assign a grade based on the average using our helper function
        grade = get_grade(average)
        # Add the grade to the student record
        updated_student['grade'] = grade
        
        # Determine pass/fail status using our helper function
        status = determine_pass_status(average)
        # Add the status to the student record
        updated_student['status'] = status
        
        # Add the updated record to our results list
        graded_records.append(updated_student)
    
    # Return the complete list of graded records
    return graded_records

# Calculate grades for all students
graded_data = calculate_student_grades(parsed_data)



# Q2 b)
Fill out and submit a completed table for ALL students:

In [38]:
print("\n" + "="*120)
print("COMPLETE STUDENTS PERFORMANCE TABLE - ALL DETAILS")
print("="*120 + "\n")

# Create header row with all columns
header = f"{'Student':<12} {'Math':<8} {'Science':<10} {'English':<10} {'Social':<8} {'Total':<8} {'Average':<10} {'Grade':<8} {'Status':<8}"
print(header)
print("-" * 120)

# Display each student's complete data
for student in graded_data:
    name = student['name']
    scores = student['scores']
    
    # Get individual subject scores
    math_score = int(scores['Math'])
    science_score = int(scores['Science'])
    english_score = int(scores['English'])
    social_score = int(scores['Social'])
    
    # Calculate total (sum of all scores)
    total = math_score + science_score + english_score + social_score
    
    # Get average and grade from graded_data
    average = student['average']
    grade = student['grade']
    status = student['status']
    
    # Print row
    row = f"{name:<12} {math_score:<8} {science_score:<10} {english_score:<10} {social_score:<8} {total:<8} {average:<10} {grade:<8} {status:<8}"
    print(row)

print("-" * 120)

# Calculate and display totals/statistics
print("\nSUMMARY STATISTICS:\n")

# Calculate totals and averages for each subject
total_students = len(graded_data)
math_total = sum(int(s['scores']['Math']) for s in graded_data)
science_total = sum(int(s['scores']['Science']) for s in graded_data)
english_total = sum(int(s['scores']['English']) for s in graded_data)
social_total = sum(int(s['scores']['Social']) for s in graded_data)
overall_total = math_total + science_total + english_total + social_total

# Calculate average for each subject
math_avg = round(math_total / total_students, 2)
science_avg = round(science_total / total_students, 2)
english_avg = round(english_total / total_students, 2)
social_avg = round(social_total / total_students, 2)

# Count pass/fail
pass_count = sum(1 for s in graded_data if s['status'] == 'PASS')
fail_count = sum(1 for s in graded_data if s['status'] == 'FAIL')

# Count each grade
grade_counts = {}
for s in graded_data:
    grade = s['grade']
    grade_counts[grade] = grade_counts.get(grade, 0) + 1

print(f"Total Students: {total_students}")
print(f"Students Passed: {pass_count}")
print(f"Students Failed: {fail_count}")
print()
print("Subject-wise Average Scores:")
print(f"  Math Average:    {math_avg}")
print(f"  Science Average: {science_avg}")
print(f"  English Average: {english_avg}")
print(f"  Social Average:  {social_avg}")
print()
print("Grade Distribution:")
for grade in sorted(grade_counts.keys(), reverse=True):
    count = grade_counts[grade]
    percentage = round((count / total_students) * 100, 1)
    print(f"  Grade {grade}: {count} student(s) ({percentage}%)")



COMPLETE STUDENTS PERFORMANCE TABLE - ALL DETAILS

Student      Math     Science    English    Social   Total    Average    Grade    Status  
------------------------------------------------------------------------------------------------------------------------
Rahul        85       90         78         88       341      85.25      A        PASS    
Priya        92       88         95         90       365      91.25      A+       PASS    
Amit         65       70         0          68       203      50.75      C        PASS    
Sneha        78       82         85         80       325      81.25      A        PASS    
Vikram       45       50         55         48       198      49.5       F        FAIL    
------------------------------------------------------------------------------------------------------------------------

SUMMARY STATISTICS:

Total Students: 5
Students Passed: 4
Students Failed: 1

Subject-wise Average Scores:
  Math Average:    73.0
  Science Average: 76.0
  En

In [39]:


# ==================== QUESTION 3 (a): CLASS STATISTICS USING NUMPY ====================

# First, import NumPy library
import numpy as np

def class_statistics(student_records):
    """
    Calculate comprehensive class statistics using NumPy.
    """
    
    # Extract all student averages into a Python list
    # We use a list comprehension to get only the average for each student
    averages_list = [student['average'] for student in student_records]
    
    # Convert the Python list to a NumPy array
    # NumPy arrays make mathematical operations fast and efficient
    averages_array = np.array(averages_list)
    
    # ===== CALCULATE STATISTICS USING NUMPY =====
    
    # 1. Calculate the mean (average) using NumPy
    # np.mean() adds all values and divides by count
    class_mean = np.mean(averages_array)
    
    # 2. Calculate the median (middle value) using NumPy
    # np.median() finds the middle value when sorted
    class_median = np.median(averages_array)
    
    # 3. Calculate standard deviation using NumPy
    # Standard deviation shows how spread out the scores are
    # Higher value = more variation in scores
    class_std_dev = np.std(averages_array)
    
    # 4. Find the maximum (highest) average score
    max_average = np.max(averages_array)
    # Find the index (position) of the max value in the array
    max_index = np.argmax(averages_array)
    # Get the student name with highest average
    highest_student = student_records[max_index]
    highest_name = highest_student['name']
    
    # 5. Find the minimum (lowest) average score
    min_average = np.min(averages_array)
    # Find the index (position) of the min value in the array
    min_index = np.argmin(averages_array)
    # Get the student name with lowest average
    lowest_student = student_records[min_index]
    lowest_name = lowest_student['name']
    
    # 6. Count how many students scored above class average
    # Create a boolean array (True/False values) for scores > mean
    above_average_mask = averages_array > class_mean
    # Count how many True values (students above average)
    above_average_count = np.sum(above_average_mask)
    
    # 7. Calculate pass percentage (students with average >= 50)
    # Create a boolean array for scores >= 50 (passing score)
    pass_mask = averages_array >= 50
    # Count students who passed
    pass_count = np.sum(pass_mask)
    # Calculate percentage: (passed / total) * 100
    pass_percentage = (pass_count / len(student_records)) * 100
    
    # ===== PREPARE RESULTS DICTIONARY =====
    
    # Create a dictionary to store all results
    statistics = {
        'class_mean': round(class_mean, 2),
        'class_median': round(class_median, 2),
        'class_std_dev': round(class_std_dev, 2),
        'highest_score': round(max_average, 2),
        'highest_student': highest_name,
        'lowest_score': round(min_average, 2),
        'lowest_student': lowest_name,
        'above_average_count': int(above_average_count),
        'pass_percentage': round(pass_percentage, 2)
    }
    
    # Return the complete statistics dictionary
    return statistics


# ==================== DISPLAY CLASS STATISTICS ====================

# Call the function with graded student data
print("\n" + "="*80)
print("QUESTION 1 (d): CLASS STATISTICS ANALYSIS (Using NumPy)")
print("="*80 + "\n")

# Get statistics
stats = class_statistics(graded_data)

# Display the results in a formatted way
print("CLASS PERFORMANCE STATISTICS:")
print("-" * 80)
print(f"Class Mean (Average):          {stats['class_mean']}")
print(f"Class Median (Middle Value):   {stats['class_median']}")
print(f"Standard Deviation:            {stats['class_std_dev']}")
print()
print("PERFORMANCE HIGHLIGHTS:")
print("-" * 80)
print(f"Highest Score:  {stats['highest_score']} ({stats['highest_student']})")
print(f"Lowest Score:   {stats['lowest_score']} ({stats['lowest_student']})")
print()
print("CLASS ANALYSIS:")
print("-" * 80)
print(f"Total Students:                {len(graded_data)}")
print(f"Students Above Class Average:  {stats['above_average_count']}")
print(f"Students At or Below Average:  {len(graded_data) - stats['above_average_count']}")
print(f"Pass Percentage (avg >= 50):   {stats['pass_percentage']}%")
print(f"Fail Percentage:               {round(100 - stats['pass_percentage'], 2)}%")
print()

# Additional insights using NumPy
print("ADDITIONAL INSIGHTS:")
print("-" * 80)

# Create NumPy array of averages again for insights
averages_array = np.array([s['average'] for s in graded_data])

# Find quartiles (divide data into 4 parts)
q1 = np.percentile(averages_array, 25)  # 25th percentile
q3 = np.percentile(averages_array, 75)  # 75th percentile

# Find range (difference between max and min)
score_range = np.max(averages_array) - np.min(averages_array)

print(f"Score Range (Max - Min):       {score_range}")
print(f"First Quartile (Q1, 25%):      {round(q1, 2)}")
print(f"Third Quartile (Q3, 75%):      {round(q3, 2)}")
print(f"Interquartile Range (IQR):     {round(q3 - q1, 2)}")
print()

# Interpretation of standard deviation
if stats['class_std_dev'] < 5:
    spread = "Very consistent performance"
elif stats['class_std_dev'] < 10:
    spread = "Moderately consistent performance"
else:
    spread = "Wide variation in performance"

print(f"Performance Spread:            {spread}")
print(f"                               (Std Dev: {stats['class_std_dev']})")


QUESTION 1 (d): CLASS STATISTICS ANALYSIS (Using NumPy)

CLASS PERFORMANCE STATISTICS:
--------------------------------------------------------------------------------
Class Mean (Average):          71.6
Class Median (Middle Value):   81.25
Standard Deviation:            17.83

PERFORMANCE HIGHLIGHTS:
--------------------------------------------------------------------------------
Highest Score:  91.25 (Priya)
Lowest Score:   49.5 (Vikram)

CLASS ANALYSIS:
--------------------------------------------------------------------------------
Total Students:                5
Students Above Class Average:  3
Students At or Below Average:  2
Pass Percentage (avg >= 50):   80.0%
Fail Percentage:               20.0%

ADDITIONAL INSIGHTS:
--------------------------------------------------------------------------------
Score Range (Max - Min):       41.75
First Quartile (Q1, 25%):      50.75
Third Quartile (Q3, 75%):      85.25
Interquartile Range (IQR):     34.5

Performance Spread:            Wi

In [40]:


# ==================== DETAILED MANUAL CALCULATIONS ====================

print("\n" + "="*100)
print("DETAILED MANUAL CALCULATIONS FOR CLASS STATISTICS")
print("="*100 + "\n")

# Step 1: Extract all student averages
print("STEP 1: EXTRACT STUDENT AVERAGES INTO ARRAY")
print("-" * 100)

# Create the array manually to show the data

student_averages = []
for i, student in enumerate(graded_data, 1):
    avg = student['average']
    student_averages.append(avg)
    print(f"  Student {i}: {student['name']:<10} → Average = {avg}")

print()
print("NumPy Array:")
averages_np = np.array(student_averages)
print(f"  {averages_np}")
print(f"  Array Type: {type(averages_np)}")
print(f"  Array Shape: {averages_np.shape}")
print(f"  Number of Students: {len(averages_np)}")
print()

# ==================== MEAN CALCULATION ====================
print("\n" + "="*100)
print("STEP 2: CALCULATE MEAN (AVERAGE)")
print("="*100 + "\n")

print("Formula: Mean = (Sum of all values) / (Number of values)")
print()

# Manual calculation
sum_of_averages = sum(student_averages)
count_students = len(student_averages)

print("Manual Calculation:")
print(f"  Values: {student_averages}")
print(f"  Sum: {student_averages[0]} + {student_averages[1]} + {student_averages[2]} + {student_averages[3]} + {student_averages[4]}")
print(f"  Sum = {sum_of_averages}")
print(f"  Count = {count_students}")
print(f"  Mean = {sum_of_averages} / {count_students}")

mean_manual = sum_of_averages / count_students
print(f"  Mean (Manual) = {mean_manual}")

# NumPy calculation
mean_numpy = np.mean(averages_np)
print(f"\nNumPy Calculation:")
print(f"  np.mean({list(averages_np)}) = {mean_numpy}")
print(f"  Mean (NumPy) = {mean_numpy}")

print(f"\n✓ RESULT: Class Mean = {round(mean_numpy, 2)}")

# ==================== MEDIAN CALCULATION ====================
print("\n" + "="*100)
print("STEP 3: CALCULATE MEDIAN (MIDDLE VALUE)")
print("="*100 + "\n")

print("Formula: Median = Middle value when data is sorted in order")
print()

# Manual calculation
sorted_averages = sorted(student_averages)
print("Manual Calculation:")
print(f"  Original values: {student_averages}")
print(f"  Sorted values:   {sorted_averages}")
print(f"  Number of values: {len(sorted_averages)}")

if len(sorted_averages) % 2 == 1:
    # Odd number of values
    middle_index = len(sorted_averages) // 2
    median_manual = sorted_averages[middle_index]
    print(f"  Odd count → Middle index = ({len(sorted_averages)} + 1) / 2 = {middle_index + 1}")
    print(f"  Middle value (index {middle_index}): {median_manual}")
else:
    # Even number of values
    middle_index1 = len(sorted_averages) // 2 - 1
    middle_index2 = len(sorted_averages) // 2
    val1 = sorted_averages[middle_index1]
    val2 = sorted_averages[middle_index2]
    median_manual = (val1 + val2) / 2
    print(f"  Even count → Average of two middle values")
    print(f"  Values at index {middle_index1} and {middle_index2}: {val1} and {val2}")
    print(f"  Median = ({val1} + {val2}) / 2 = {median_manual}")

# NumPy calculation
median_numpy = np.median(averages_np)
print(f"\nNumPy Calculation:")
print(f"  np.median({list(averages_np)}) = {median_numpy}")

print(f"\n✓ RESULT: Class Median = {median_numpy}")

# ==================== STANDARD DEVIATION CALCULATION ====================
print("\n" + "="*100)
print("STEP 4: CALCULATE STANDARD DEVIATION")
print("="*100 + "\n")

print("Formula: StdDev = √[ Σ(x - mean)² / N ]")
print()

print("Manual Calculation:")
print(f"  Mean = {mean_manual}")
print(f"  Values: {student_averages}")
print()

# Calculate deviations from mean
deviations = [x - mean_manual for x in student_averages]
print("Step 1: Calculate deviation from mean for each value (x - mean):")
for i, (val, dev) in enumerate(zip(student_averages, deviations)):
    print(f"  {val} - {mean_manual:.4f} = {dev:.4f}")

# Calculate squared deviations
squared_deviations = [dev**2 for dev in deviations]
print(f"\nStep 2: Square each deviation (x - mean)²:")
for i, (dev, sq_dev) in enumerate(zip(deviations, squared_deviations)):
    print(f"  ({dev:.4f})² = {sq_dev:.4f}")

# Sum of squared deviations
sum_squared = sum(squared_deviations)
print(f"\nStep 3: Sum of squared deviations:")
print(f"  Σ(x - mean)² = {' + '.join([f'{sq:.4f}' for sq in squared_deviations])}")
print(f"  Σ(x - mean)² = {sum_squared:.4f}")

# Divide by N
variance = sum_squared / len(student_averages)
print(f"\nStep 4: Divide by count (N = {len(student_averages)}):")
print(f"  Variance = {sum_squared:.4f} / {len(student_averages)} = {variance:.4f}")

# Square root
std_dev_manual = np.sqrt(variance)
print(f"\nStep 5: Take square root:")
print(f"  StdDev = √{variance:.4f} = {std_dev_manual:.4f}")

# NumPy calculation
std_dev_numpy = np.std(averages_np)
print(f"\nNumPy Calculation:")
print(f"  np.std({list(averages_np)}) = {std_dev_numpy}")

print(f"\n✓ RESULT: Standard Deviation = {round(std_dev_numpy, 2)}")

# ==================== PASS PERCENTAGE CALCULATION ====================
print("\n" + "="*100)
print("STEP 5: CALCULATE PASS PERCENTAGE")
print("="*100 + "\n")

print("Criterion: Student passes if average ≥ 50")
print()

print("Manual Calculation:")
print(f"  Values: {student_averages}")

# Identify passing students
passing_students = []
failing_students = []
for val, student in zip(student_averages, graded_data):
    if val >= 50:
        passing_students.append((student['name'], val))
    else:
        failing_students.append((student['name'], val))

print(f"\nStudents who PASSED (average ≥ 50):")
for name, avg in passing_students:
    print(f"  ✓ {name}: {avg}")

print(f"\nStudents who FAILED (average < 50):")
for name, avg in failing_students:
    print(f"  ✗ {name}: {avg}")

print(f"\nCount:")
print(f"  Passed: {len(passing_students)} student(s)")
print(f"  Failed: {len(failing_students)} student(s)")
print(f"  Total: {len(student_averages)} student(s)")

# Calculate pass percentage manually
pass_percentage_manual = (len(passing_students) / len(student_averages)) * 100
print(f"\nPass Percentage Calculation:")
print(f"  Pass % = (Number Passed / Total) × 100")
print(f"  Pass % = ({len(passing_students)} / {len(student_averages)}) × 100")
print(f"  Pass % = {len(passing_students) / len(student_averages):.4f} × 100")
print(f"  Pass % = {pass_percentage_manual}%")

# NumPy calculation
pass_mask = averages_np >= 50
pass_count_numpy = np.sum(pass_mask)
pass_percentage_numpy = (pass_count_numpy / len(averages_np)) * 100
print(f"\nNumPy Calculation:")
print(f"  pass_mask = (array >= 50) = {pass_mask}")
print(f"  np.sum(pass_mask) = {pass_count_numpy}")
print(f"  Pass % = ({pass_count_numpy} / {len(averages_np)}) × 100 = {pass_percentage_numpy}%")

print(f"\n✓ RESULT: Pass Percentage = {pass_percentage_numpy}%")

# ==================== SUMMARY TABLE ====================
print("\n" + "="*100)
print("SUMMARY OF ALL CALCULATIONS")
print("="*100 + "\n")

print(f"{'Metric':<30} {'Manual Result':<20} {'NumPy Result':<20} {'Match':<10}")
print("-" * 80)
print(f"{'Mean':<30} {mean_manual:<20.2f} {mean_numpy:<20.2f} {'✓' if round(mean_manual, 2) == round(mean_numpy, 2) else '✗':<10}")
print(f"{'Median':<30} {median_manual:<20.2f} {median_numpy:<20.2f} {'✓' if median_manual == median_numpy else '✗':<10}")
print(f"{'Std Deviation':<30} {std_dev_manual:<20.2f} {std_dev_numpy:<20.2f} {'✓' if round(std_dev_manual, 2) == round(std_dev_numpy, 2) else '✗':<10}")
print(f"{'Pass Percentage':<30} {pass_percentage_manual:<20.2f}% {pass_percentage_numpy:<19.2f}% {'✓' if pass_percentage_manual == pass_percentage_numpy else '✗':<10}")



DETAILED MANUAL CALCULATIONS FOR CLASS STATISTICS

STEP 1: EXTRACT STUDENT AVERAGES INTO ARRAY
----------------------------------------------------------------------------------------------------
  Student 1: Rahul      → Average = 85.25
  Student 2: Priya      → Average = 91.25
  Student 3: Amit       → Average = 50.75
  Student 4: Sneha      → Average = 81.25
  Student 5: Vikram     → Average = 49.5

NumPy Array:
  [85.25 91.25 50.75 81.25 49.5 ]
  Array Type: <class 'numpy.ndarray'>
  Array Shape: (5,)
  Number of Students: 5


STEP 2: CALCULATE MEAN (AVERAGE)

Formula: Mean = (Sum of all values) / (Number of values)

Manual Calculation:
  Values: [85.25, 91.25, 50.75, 81.25, 49.5]
  Sum: 85.25 + 91.25 + 50.75 + 81.25 + 49.5
  Sum = 358.0
  Count = 5
  Mean = 358.0 / 5
  Mean (Manual) = 71.6

NumPy Calculation:
  np.mean([np.float64(85.25), np.float64(91.25), np.float64(50.75), np.float64(81.25), np.float64(49.5)]) = 71.6
  Mean (NumPy) = 71.6

✓ RESULT: Class Mean = 71.6

STEP 3: 

In [41]:


# ==================== QUESTION 4: SUBJECT-WISE PERFORMANCE ANALYSIS ====================

# Define subjects
subjects = ['Math', 'Science', 'English', 'Social']

# Dictionary to store subject-wise analysis
subject_analysis = {}

# ===== GROUP SCORES BY SUBJECT =====

# Extract scores for each subject
for subject in subjects:
    # Create a list of all scores for this subject from all students
    subject_scores = []
    student_names_scores = []  # To track which student got which score
    
    for student in graded_data:
        score = student['scores'][subject]
        subject_scores.append(score)
        student_names_scores.append((student['name'], score))
        
    # Store for later analysis
    subject_analysis[subject] = {
        'scores': subject_scores,
        'student_scores': student_names_scores
    }

# Define threshold for "above 75"
threshold = 75

for subject in subjects:
    scores = subject_analysis[subject]['scores']
    student_scores = subject_analysis[subject]['student_scores']
    
    # 1. Calculate average
    avg_score = np.mean(scores)
    
    # 2. Find highest scorer
    max_score = np.max(scores)
    max_index = np.argmax(scores)
    highest_student = graded_data[max_index]
    
    # 3. Find lowest scorer
    min_score = np.min(scores)
    min_index = np.argmin(scores)
    lowest_student = graded_data[min_index]
    
    # 4. Count students scoring above 75
    above_threshold = [score for score in scores if score > threshold]
    count_above_75 = len(above_threshold)
    
    # Store analysis
    subject_analysis[subject]['average'] = avg_score
    subject_analysis[subject]['highest'] = max_score
    subject_analysis[subject]['highest_student'] = highest_student['name']
    subject_analysis[subject]['lowest'] = min_score
    subject_analysis[subject]['lowest_student'] = lowest_student['name']
    subject_analysis[subject]['above_75'] = count_above_75


# Find the subject with highest average
best_subject = max(subject_analysis.items(), key=lambda x: x[1]['average'])
best_subject_name = best_subject[0]
best_subject_avg = best_subject[1]['average']

# ===== COMPREHENSIVE ANALYSIS TABLE =====

print("COMPREHENSIVE SUBJECT-WISE ANALYSIS TABLE")
print("="*130 + "\n")

# Create detailed header
print(f"{'Subject':<12} {'All Scores':<30} {'Average':<12} {'Highest':<20} {'Lowest':<20} {'Above 75':<12} {'Best':<8}")
print("-" * 130)

# Print data for each subject
for subject in subjects:
    data = subject_analysis[subject]
    
    # Format all scores as a string
    scores_str = ", ".join([str(int(s)) for s in data['scores']])
    
    # Format highest score info
    highest_info = f"{data['highest_student']} ({int(data['highest'])})"
    
    # Format lowest score info
    lowest_info = f"{data['lowest_student']} ({int(data['lowest'])})"
    
    # Determine if this is best subject
    is_best = "✓ BEST" if subject == best_subject_name else ""
    
    # Print row
    row = f"{subject:<12} {scores_str:<30} {data['average']:<12.2f} {highest_info:<20} {lowest_info:<20} {data['above_75']:<12} {is_best:<8}"
    print(row)



COMPREHENSIVE SUBJECT-WISE ANALYSIS TABLE

Subject      All Scores                     Average      Highest              Lowest               Above 75     Best    
----------------------------------------------------------------------------------------------------------------------------------
Math         85, 92, 65, 78, 45             73.00        Priya (92)           Vikram (45)          3                    
Science      90, 88, 70, 82, 50             76.00        Rahul (90)           Vikram (50)          3            ✓ BEST  
English      78, 95, 0, 85, 55              62.60        Priya (95)           Amit (0)             3                    
Social       88, 90, 68, 80, 48             74.80        Priya (90)           Vikram (48)          3                    



# Q5: Exception Handling and Individual Report

**Q5a.** Write a function `generate_student_report(student_name, student_records)` that:
- Finds a student (case-insensitive)
- Returns a formatted report showing scores, average, grade, and status
- Raises and handles ValueError if student not found
- Uses try-except-finally (finally always prints "Report generation attempted")

**Q5b.** Test with examples: "Rahul", "priya", invalid name, empty name


In [42]:

def generate_student_report(student_name, student_records):
    """
    Generate a formatted report for a student with exception handling.
    """
    try:
        # Check for empty name
        if not student_name or not student_name.strip():
            raise ValueError("Student name cannot be empty")
        
        # Search for student (case-insensitive)
        found_student = None
        for student in student_records:
            if student['name'].lower() == student_name.lower():
                found_student = student
                break
        
        # Raise error if not found
        if found_student is None:
            raise ValueError(f"Student '{student_name}' not found in records")
        
        # Generate report
        scores = found_student['scores']
        report = f"""
STUDENT REPORT
{'='*50}
Name:           {found_student['name']}
Math:           {int(scores['Math'])}
Science:        {int(scores['Science'])}
English:        {int(scores['English'])}
Social Studies: {int(scores['Social'])}
{'='*50}
Average:        {found_student['average']:.2f}
Grade:          {found_student['grade']}
Status:         {found_student['status']}
{'='*50}
"""
        return report
        
    except ValueError as e:
        return f"Error: {str(e)}"
    
    finally:
        print("Report generation attempted")


# Test cases
print("\n1. Test: Valid student - 'Rahul'")
print(generate_student_report("Rahul", graded_data))

print("\n2. Test: Case-insensitive - 'priya'")
print(generate_student_report("priya", graded_data))

print("\n3. Test: Invalid name - 'XYZ'")
print(generate_student_report("XYZ", graded_data))

print("\n4. Test: Empty name - ''")
print(generate_student_report("", graded_data))



1. Test: Valid student - 'Rahul'
Report generation attempted

STUDENT REPORT
Name:           Rahul
Math:           85
Science:        90
English:        78
Social Studies: 88
Average:        85.25
Grade:          A
Status:         PASS


2. Test: Case-insensitive - 'priya'
Report generation attempted

STUDENT REPORT
Name:           Priya
Math:           92
Science:        88
English:        95
Social Studies: 90
Average:        91.25
Grade:          A+
Status:         PASS


3. Test: Invalid name - 'XYZ'
Report generation attempted
Error: Student 'XYZ' not found in records

4. Test: Empty name - ''
Report generation attempted
Error: Student name cannot be empty



# Q6: Adding New Student and Updating Statistics

**Q6a.** Add new student "Kavya|Math:88,Science:92,English:85,Social:90"
- Parse, validate, and append Kavya's data
- Recompute grades and statistics for all 6 students

**Q6b.** Update all statistics/tables and show:
- New class statistics (average, median, std dev, pass %)
- Updated roster with all 6 students
- Comparison table: before/after adding Kavya

**Q6c.** Highlight how Kavya's addition changed the statistics


In [44]:

# ==================== Q6a: PARSE AND ADD NEW STUDENT ====================

# Store original data for comparison
original_graded_data = [student.copy() for student in graded_data]
original_stats = class_statistics(original_graded_data)

# Add Kavya's raw data
new_student_raw = "Kavya|Math:88,Science:92,English:85,Social:90"

# Parse Kavya's data
kavya_parsed = parse_single_record(new_student_raw, {'Math', 'Science', 'English', 'Social'})

# Calculate Kavya's grade
kavya_graded_list = calculate_student_grades([kavya_parsed])
kavya_graded = kavya_graded_list[0]

# Append Kavya to graded_data
graded_data.append(kavya_graded)

# Recompute statistics for all 6 students
new_stats = class_statistics(graded_data)


# ==================== Q6b: UPDATED ROSTER WITH ALL 6 STUDENTS ====================

print("\n" + "="*130)
print("UPDATED ROSTER: ALL 6 STUDENTS WITH GRADES")
print("="*130 + "\n")

header = f"{'Student':<12} {'Math':<8} {'Science':<10} {'English':<10} {'Social':<8} {'Total':<8} {'Average':<10} {'Grade':<8} {'Status':<8}"
print(header)
print("-" * 130)

for student in graded_data:
    name = student['name']
    scores = student['scores']
    math_score = int(scores['Math'])
    science_score = int(scores['Science'])
    english_score = int(scores['English'])
    social_score = int(scores['Social'])
    total = math_score + science_score + english_score + social_score
    average = student['average']
    grade = student['grade']
    status = student['status']
    
    row = f"{name:<12} {math_score:<8} {science_score:<10} {english_score:<10} {social_score:<8} {total:<8} {average:<10} {grade:<8} {status:<8}"
    print(row)

print("-" * 130)


# ==================== Q6b: COMPARISON TABLE - BEFORE AND AFTER ====================

print("\n" + "="*100)
print("COMPARISON: CLASS STATISTICS BEFORE AND AFTER ADDING KAVYA")
print("="*100 + "\n")

comparison_data = [
    ("Class Average", f"{original_stats['class_mean']:.2f}", f"{new_stats['class_mean']:.2f}"),
    ("Class Median", f"{original_stats['class_median']:.2f}", f"{new_stats['class_median']:.2f}"),
    ("Std Deviation", f"{original_stats['class_std_dev']:.2f}", f"{new_stats['class_std_dev']:.2f}"),
    ("Pass Percentage", f"{original_stats['pass_percentage']:.2f}%", f"{new_stats['pass_percentage']:.2f}%"),
    ("Students Above Average", f"{original_stats['above_average_count']}", f"{new_stats['above_average_count']}"),
    ("Highest Score", f"{original_stats['highest_score']:.2f} ({original_stats['highest_student']})", f"{new_stats['highest_score']:.2f} ({new_stats['highest_student']})"),
    ("Lowest Score", f"{original_stats['lowest_score']:.2f} ({original_stats['lowest_student']})", f"{new_stats['lowest_score']:.2f} ({new_stats['lowest_student']})"),
]

print(f"{'Metric':<30} {'Before (5 Students)':<30} {'After (6 Students)':<30}")
print("-" * 90)

for metric, before, after in comparison_data:
    print(f"{metric:<30} {before:<30} {after:<30}")


# ==================== Q6c: STATISTICAL CHANGES ANALYSIS ====================

print("\n" + "="*100)
print("Q6c: HOW KAVYA'S ADDITION CHANGED THE STATISTICS")
print("="*100 + "\n")

# Calculate changes
mean_change = new_stats['class_mean'] - original_stats['class_mean']
median_change = new_stats['class_median'] - original_stats['class_median']
std_dev_change = new_stats['class_std_dev'] - original_stats['class_std_dev']
pass_pct_change = new_stats['pass_percentage'] - original_stats['pass_percentage']
above_avg_change = new_stats['above_average_count'] - original_stats['above_average_count']

print("IMPACT ON KEY STATISTICS:")
print("-" * 100)
print(f"Class Average: {original_stats['class_mean']:.2f} → {new_stats['class_mean']:.2f} (Change: +{mean_change:.2f})")
print(f"Class Median:  {original_stats['class_median']:.2f} → {new_stats['class_median']:.2f} (Change: {median_change:+.2f})")
print(f"Std Deviation: {original_stats['class_std_dev']:.2f} → {new_stats['class_std_dev']:.2f} (Change: {std_dev_change:+.2f})")
print(f"Pass %:        {original_stats['pass_percentage']:.2f}% → {new_stats['pass_percentage']:.2f}% (Change: {pass_pct_change:+.2f}%)")
print(f"Above Average: {original_stats['above_average_count']} → {new_stats['above_average_count']} (Change: {above_avg_change:+d})")

print("\nKAVYA'S PROFILE:")
print("-" * 100)
print(f"Name:           Kavya")
print(f"Average:        {kavya_graded['average']:.2f}")
print(f"Grade:          {kavya_graded['grade']}")
print(f"Status:         {kavya_graded['status']}")
print(f"Scores:         Math: {int(kavya_graded['scores']['Math'])}, Science: {int(kavya_graded['scores']['Science'])}, English: {int(kavya_graded['scores']['English'])}, Social: {int(kavya_graded['scores']['Social'])}")

print("\nKEY INSIGHTS:")
print("-" * 100)
if mean_change > 0:
    print(f"✓ Class average INCREASED by {mean_change:.2f} points - Kavya is ABOVE the original class average ({original_stats['class_mean']:.2f})")
else:
    print(f"✗ Class average DECREASED by {abs(mean_change):.2f} points")

if std_dev_change > 0:
    print(f"✓ Standard deviation INCREASED by {std_dev_change:.2f} - Performance became MORE varied")
elif std_dev_change < 0:
    print(f"✓ Standard deviation DECREASED by {abs(std_dev_change):.2f} - Performance became MORE consistent")
else:
    print(f"○ Standard deviation remained the same")

if above_avg_change > 0:
    print(f"✓ Students above average INCREASED from {original_stats['above_average_count']} to {new_stats['above_average_count']}")

print(f"✓ All 6 students are PASSING (100% pass rate)")


# ==================== Q6b: UPDATED SUBJECT-WISE ANALYSIS ====================

print("\n" + "="*130)
print("UPDATED SUBJECT-WISE ANALYSIS: ALL 6 STUDENTS")
print("="*130 + "\n")

# Recalculate subject analysis with all 6 students
subject_analysis_updated = {}
subjects = ['Math', 'Science', 'English', 'Social']

for subject in subjects:
    subject_scores = []
    for student in graded_data:
        score = student['scores'][subject]
        subject_scores.append(score)
    
    avg_score = np.mean(subject_scores)
    max_score = np.max(subject_scores)
    min_score = np.min(subject_scores)
    max_index = np.argmax(subject_scores)
    min_index = np.argmin(subject_scores)
    highest_student = graded_data[max_index]['name']
    lowest_student = graded_data[min_index]['name']
    count_above_75 = len([s for s in subject_scores if s > 75])
    
    subject_analysis_updated[subject] = {
        'average': avg_score,
        'highest': max_score,
        'highest_student': highest_student,
        'lowest': min_score,
        'lowest_student': lowest_student,
        'above_75': count_above_75,
        'scores': subject_scores
    }

# Display updated subject analysis
print(f"{'Subject':<12} {'All Scores':<45} {'Average':<12} {'Highest':<20} {'Lowest':<20} {'Above 75':<12}")
print("-" * 130)

for subject in subjects:
    data = subject_analysis_updated[subject]
    scores_str = ", ".join([str(int(s)) for s in data['scores']])
    highest_info = f"{data['highest_student']} ({int(data['highest'])})"
    lowest_info = f"{data['lowest_student']} ({int(data['lowest'])})"
    
    row = f"{subject:<12} {scores_str:<45} {data['average']:<12.2f} {highest_info:<20} {lowest_info:<20} {data['above_75']:<12}"
    print(row)

print("\n" + "="*130 + "\n")



UPDATED ROSTER: ALL 6 STUDENTS WITH GRADES

Student      Math     Science    English    Social   Total    Average    Grade    Status  
----------------------------------------------------------------------------------------------------------------------------------
Rahul        85       90         78         88       341      85.25      A        PASS    
Priya        92       88         95         90       365      91.25      A+       PASS    
Amit         65       70         0          68       203      50.75      C        PASS    
Sneha        78       82         85         80       325      81.25      A        PASS    
Vikram       45       50         55         48       198      49.5       F        FAIL    
Kavya        88       92         85         90       355      88.75      A        PASS    
Kavya        88       92         85         90       355      88.75      A        PASS    
------------------------------------------------------------------------------------------------