In [5]:
import pandas as pd
from pathlib import Path

## 1. DATA LOADING AND PREPROCESSING

In [6]:
def load_data():
    df = pd.read_csv("ML_Dataset.csv", header=[0, 1])
    
    students = df.columns[3:].get_level_values(0).tolist()
    test_codes = df.loc[1, students].values
    
    df.columns = ['Subject', 'Question', 'Topic'] + students
    df = df.drop([0, 1]).reset_index(drop=True)
    
    return df, students, test_codes


# 2. STUDENT ANALYSIS FUNCTIONS

In [7]:
def calculate_subject_scores(student_df, student):
    subject_totals = student_df.groupby('Subject')[student].sum().reset_index()
    subject_totals.columns = ['Subject', 'Marks']
    
    max_marks = student_df.groupby('Subject')[student].max()
    
    subject_totals['Percentage'] = (subject_totals['Marks'] / max_marks.loc[subject_totals['Subject']].values) * 100
    subject_totals['Percentage'] = subject_totals['Percentage'].round(2)
    
    return subject_totals, max_marks

def determine_qualification(subject_totals, total_percentage):
    return (total_percentage > 60) and all(subject_totals['Percentage'] > 60)

# 3. REPORT GENERATION FUNCTIONS

In [9]:
def generate_student_html(student_data):

    html = f"""
    <html>
    <head>
        <style>
            body {{ font-family: Arial, sans-serif; margin: 20px; }}
            h1 {{ color: #2c3e50; }}
            h2 {{ color: #3498db; margin-top: 30px; }}
            table {{ border-collapse: collapse; width: 100%; margin-bottom: 20px; }}
            th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
            th {{ background-color: #f2f2f2; }}
            .selected {{ color: green; font-weight: bold; }}
            .rejected {{ color: red; font-weight: bold; }}
        </style>
    </head>
    <body>
        <h1>Student Report: {student_data['student']}</h1>
        <p>Test Code: {student_data['test_code']}</p>
        
        <h2>Total Marks</h2>
        {student_data['subject_totals'].to_html(index=False)}
        <p>Total Marks: {student_data['total_marks']} ({student_data['total_percentage']}%)</p>
        <p>Status: <span class="{'selected' if student_data['qualified'] else 'rejected'}">
            {'SELECTED' if student_data['qualified'] else 'REJECTED'}
        </span></p>
        
        <h2>Topics Answered Correctly</h2>
        {student_data['correct_topics'].to_html(index=False)}
        
        <h2>Topics Answered Incorrectly</h2>
        {student_data['incorrect_topics'].to_html(index=False)}
    </body>
    </html>
    """
    return html

def generate_summary_html(all_student_data):

    summary_data = []
    for data in all_student_data:
        summary_data.append({
            'Student': data['student'],
            'Test Code': data['test_code'],
            'Total Marks': data['total_marks'],
            'Percentage': data['total_percentage'],
            'Status': 'SELECTED' if data['qualified'] else 'REJECTED'
        })
    
    summary_df = pd.DataFrame(summary_data)
    
    html = f"""
    <html>
    <head>
        <style>
            body {{ font-family: Arial, sans-serif; margin: 20px; }}
            h1 {{ color: #2c3e50; text-align: center; }}
            table {{ border-collapse: collapse; width: 80%; margin: 20px auto; }}
            th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
            th {{ background-color: #f2f2f2; }}
            .selected {{ color: green; }}
            .rejected {{ color: red; }}
        </style>
    </head>
    <body>
        <h1>Machine Learning Course - Qualification Summary</h1>
        {summary_df.to_html(index=False, classes='summary-table', 
                          formatters={'Status': lambda x: f'<span class={"selected" if x == "SELECTED" else "rejected"}>{x}</span>'},
                          escape=False)}
    </body>
    </html>
    """
    return html

# 4. MAIN EXECUTION FLOW

In [10]:
def main():
    output_dir = Path("student_reports")
    output_dir.mkdir(exist_ok=True)
    
    df, students, test_codes = load_data()
    all_student_data = []
    
    for student, test_code in zip(students, test_codes):
        print(f"Processing {student}...")
        
        student_df = df[['Subject', 'Question', 'Topic', student]].copy()
        student_df = student_df[student_df[student] != '-'].copy()
        student_df[student] = student_df[student].astype(float)
        
        subject_totals, max_marks = calculate_subject_scores(student_df, student)
        total_marks = subject_totals['Marks'].sum()
        total_percentage = (total_marks / max_marks.sum()) * 100
        
        student_data = {
            'student': student,
            'test_code': test_code,
            'subject_totals': subject_totals,
            'total_marks': total_marks,
            'total_percentage': round(total_percentage, 2),
            'qualified': determine_qualification(subject_totals, total_percentage),
            'correct_topics': student_df[student_df[student] > 0][['Subject', 'Topic', student]],
            'incorrect_topics': student_df[student_df[student] == 0][['Subject', 'Topic', student]]
        }
        all_student_data.append(student_data)
        
        html_content = generate_student_html(student_data)
        with open(output_dir / f"{student}.html", "w") as f:
            f.write(html_content)
    
    summary_html = generate_summary_html(all_student_data)
    with open(output_dir / "result.html", "w") as f:
        f.write(summary_html)
    
    print(f"\nSuccessfully generated:")
    print(f"- Individual reports for {len(students)} students")
    print(f"- Summary report (result.html)")

if __name__ == "__main__":
    main()

Processing ST1...
Processing ST2...
Processing ST3...
Processing ST4...
Processing ST5...
Processing ST6...
Processing ST7...
Processing ST8...

Successfully generated:
- Individual reports for 8 students
- Summary report (result.html)
