In [None]:
import pandas as pd
import numpy as np

# -----------------------------
# Helper Functions
# -----------------------------

def compute_attendance(df):
    """
    Compute attendance % for each student.
    Expects multiple rows per student with 'attendance' column.
    """
    def att_percent(group):
        total_days = len(group)
        present_days = (group['attendance'].str.lower() == 'present').sum()
        return (present_days / total_days) * 100 if total_days > 0 else 0

    return df.groupby('student').apply(att_percent).to_dict()


def compute_hw_cw_score(df):
    """
    Compute homework and classwork scores (1-10) per student.
    Expects boolean columns 'HW_issue' and 'CW_issue'.
    True = Issue (so lower score), False = Done (high score)
    """
    scores = {}
    for student, group in df.groupby('student'):
        hw_done_ratio = (~group['HW_issue']).mean()  # ratio of homework done
        cw_done_ratio = (~group['CW_issue']).mean()
        hw_score = int(round(1 + hw_done_ratio*9))
        cw_score = int(round(1 + cw_done_ratio*9))
        scores[student] = {'homework': hw_score, 'classwork': cw_score}
    return scores


def compute_exam_score(df):
    """
    Compute exam score (percentage) per student based on daily exams.
    Columns: daily_exam1_mark, daily_exam2_mark
    """
    scores = {}
    for student, group in df.groupby('student'):
        avg_score = group[['daily_exam1_mark','daily_exam2_mark']].mean().mean()
        scores[student] = avg_score
    return scores


def compute_class_focus(attendance_dict, hwcw_scores, exam_scores):
    """
    Compute class focus % as weighted average:
    45% exam, 25% attendance, 15% HW, 15% CW
    """
    cf = {}
    for student in attendance_dict.keys():
        att = attendance_dict[student]
        hw = hwcw_scores[student]['homework'] / 10 * 100
        cw = hwcw_scores[student]['classwork'] / 10 * 100
        exam = exam_scores[student]
        cf[student] = 0.45*exam + 0.25*att + 0.15*hw + 0.15*cw
    return cf


def infer_skills_from_comments(comments):
    """
    Keyword-based skill extraction from teacher comments.
    Returns scores 1-10 for each skill.
    """
    skills_dict = {}
    for student, comment in comments.items():
        text = str(comment).lower()
        # Problem solving
        problem_keys = ["math","science","logical","problem","solve","reason"]
        problem = sum(k in text for k in problem_keys)
        # Communication
        comm_keys = ["communication","speak","english","bangla","write","express"]
        communication = sum(k in text for k in comm_keys)
        # Discipline
        disc_keys = ["regular","punctual","attendance","disciplined","homework"]
        discipline = sum(k in text for k in disc_keys)

        # Scale counts to 1-10
        def scale_count(n):
            return min(max(1, n*2),10)  # simple scale

        skills_dict[student] = {
            'problem_solving': scale_count(problem),
            'communication': scale_count(communication),
            'discipline': scale_count(discipline)
        }
    return skills_dict


# -----------------------------
# Main Function
# -----------------------------

def process_student_csv(df, comment_dict=None):
    """
    df: concatenated dataframe for all students (multiple rows per student)
    comment_dict: optional dict {student_name: comment_text}
    """
    attendance = compute_attendance(df)
    hwcw = compute_hw_cw_score(df)
    exams = compute_exam_score(df)
    class_focus = compute_class_focus(attendance, hwcw, exams)
    skills = infer_skills_from_comments(comment_dict or {})

    # Compile final structured output
    results = {}
    for student in attendance.keys():
        results[student] = {
            "attendance": round(attendance[student],2),
            "homework": hwcw[student]['homework'],
            "classwork": hwcw[student]['classwork'],
            "class_focus": round(class_focus[student],2),
            "exam": round(exams[student],2),
            "skills": skills.get(student, {'problem_solving':5,'communication':5,'discipline':5})
        }

    return results


# -----------------------------
# Example Usage
# -----------------------------

# Example: load CSVs for 3 students and concatenate
# df_amin = pd.read_csv("Amin.csv")
# df_rina = pd.read_csv("Rina.csv")
# df_jamil = pd.read_csv("Jamil.csv")
# df_all = pd.concat([df_amin, df_rina, df_jamil], ignore_index=True)

# Optional teacher comments per student
# comments = {
#     "Amin": "Strong in math and logical problem solving",
#     "Rina": "Very good communication and English writing",
#     "Jamil": "Excellent in English and Bangla. Good speaker."
# }

# results = process_student_csv(df_all, comment_dict=comments)
# print(results)