In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Read student dataset (csv file) and display the dataset contents

df = pd.read_csv("datasets/student_9.csv")
print(df)

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
# List students whose average attendance is >= 80%

df[df["Average Attendance"] >= 80]

In [None]:
# List students whose average attendance is below 60% (Not allowed in Semester Exam)

df[df["Average Attendance"] < 60]

In [None]:
# Identify students with attendance below 20% in any one subject (Subject-wise deficiency)

attendance_cols = [
    "Attendance % in Operating Systems",
    "Attendance % in Database Management System",
    "Attendance % in Artificial Intelligence",
    "Attendance % in Algorithm-II",
    "Attendance % in Soft-Skill Development-IV",
    "Attendance % in Design Thinking",
    "Attendance % in Human Values and Ethics",
]

df[df[attendance_cols].lt(20).any(axis=1)][["Student Name"] + attendance_cols]

In [None]:
# Find students with at least 80% attendance in all subjects

df[df[attendance_cols].ge(80).all(axis=1)][["Student Name"] + attendance_cols]

In [None]:
# Find students with 100% attendance in all subjects (Perfect Record)

df[df[attendance_cols].eq(100).all(axis=1)][["Student Name"] + attendance_cols]

In [None]:
# Display Top 5 Students based on average attendance

df.nlargest(5, "Average Attendance")[["Student Name", "Enrolment No."]]

In [None]:
# Display Subject-wise Attendance Report

print("Avg. Attendance (%) per Subject:\n")
df[attendance_cols].mean().round(2)

In [None]:
# Calculate total number of classes attended by each student across all subjects

present_in = [
    "Present in Operating Systems",
    "Present in Database Management System",
    "Present in Artificial Intelligence",
    "Present in Algorithm-II",
    "Present in Soft-Skill Development-IV",
    "Present in Design Thinking",
    "Present in Human Values and Ethics",
]

df["Total Present"] = df[present_in].sum(axis=1)
df[["Student Name", "Enrolment No.", "Registration No.", "Total Present"]]

In [None]:
# Group students into different categories according to average attendance:

"""
Excellent (attendance >= 85),
Good (attendance >= 70 and attendance < 85),
Poor (attendance >= 50 and attendance < 70)
Defaulter (attendance < 50)
"""


def category(x):
    if x >= 85:
        return "Excellent"
    elif x >= 70 and x < 85:
        return "Good"
    elif x >= 50 and x < 70:
        return "Poor"
    else:
        return "Defaulter"


df["Category"] = df["Average Attendance"].apply(category)
df["Category"].value_counts()

# df["Average Attendance"].apply(category).value_counts()

In [None]:
# Generate a list of students for the defaulter (attendance < 50%) students

df[df["Category"] == "Defaulter"][["Student Name", "Enrolment No.", "Average Attendance"]]

In [None]:
# Generate a report for Head of the Department: number of students falling below threshold per subject (attendance < 40)

df[df[attendance_cols].lt(40).any(axis=1)][["Student Name"] + attendance_cols]

In [None]:
# Identify student(s) with full absent in a particular subject (lets say Design Thinking)

df[df["Present in Design Thinking"] == 0][["Student Name", "Enrolment No."]]

In [None]:
# Find Student(s) with 0% attendance in ALL Subjects
df[df[attendance_cols].eq(0).all(axis=1)][["Student Name"] + attendance_cols]

In [None]:
# Generate a Summary Table (Table will display mean attendance per subject)

summary = df[attendance_cols].mean().round(2)
subjects = ["OS", "DBMS", "AI", "Algo-II", "SSD", "DT", "HVE"]
summary_table = pd.DataFrame({"Subject": subjects, "Avg. Attendance": summary.values})

print("Avg. Attendance (%) per Subject:\n")
print(summary_table)

In [None]:
# Show a Bar chart to visualize top 10 students by average attendance

top10 = df.nlargest(10, "Average Attendance")

plt.bar(
    top10["Student Name"],
    top10["Average Attendance"],
    color="lightgreen",
    edgecolor="black",
)
plt.xticks(rotation=90)
plt.title("Top 10 Students by Avg Attendance")
plt.ylabel("Average Attendance %")
plt.xlabel("Student Name")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.show()

In [None]:
# Show a Pie chart to visualize Attendance Category Distribution

def category(x):
    if x >= 85:
        return "Excellent"
    elif x >= 70 and x < 85:
        return "Good"
    elif x >= 50 and x < 70:
        return "Poor"
    else:
        return "Defaulter"


count = df["Average Attendance"].apply(category).value_counts()

plt.pie(count.values, labels=count.index, autopct="%1.1f%%")
plt.title("Attendance Categories")
plt.show()

In [None]:
# Plot a line chart to visualize subject-wise attendance for a specific student

student = df[df["Student Name"] == "SHREYA CHOWDHURY"]

attendance_cols = [
    "Attendance % in Operating Systems",
    "Attendance % in Database Management System",
    "Attendance % in Artificial Intelligence",
    "Attendance % in Algorithm-II",
    "Attendance % in Soft-Skill Development-IV",
    "Attendance % in Design Thinking",
    "Attendance % in Human Values and Ethics",
]
data = student[attendance_cols].values.flatten()

# data=student[[col for col in df.columns if 'Attendance %' in col and 'Average' not in col]].values.flatten()

subjects = ["OS", "DBMS", "AI", "Algo-II", "SSD", "DT", "HVE"]

plt.plot(subjects, data, marker="o")
plt.title("Subject-wise Attendance for SHREYA CHOWDHURY")
plt.ylabel("Attendance %")
plt.grid(linestyle="--", alpha=0.7)
plt.show()

In [None]:
# Visualize a histogram of average attendance for the whole class

wholeclass = df["Average Attendance"]
bins = [0, 50, 70, 85, 100]

plt.hist(wholeclass, bins=bins, color="skyblue", edgecolor="black")
plt.title("Histogram of Average Attendance")
plt.xlabel("Average Attendance %")
plt.ylabel("Number of Students")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.show()

In [None]:
# Plot a Horizontal bar chart to visualize Students with lowest 10 attendance

bottom10 = df.nsmallest(10, "Average Attendance")

plt.barh(
    bottom10["Student Name"],
    bottom10["Average Attendance"],
    color="orange",
    edgecolor="black",
)
plt.title("Bottom 10 Students by Avg Attendance")
plt.xlabel("Average Attendance %")
plt.ylabel("Student Name")
plt.grid(axis="x", linestyle="--", alpha=0.7)
plt.show()