In [None]:
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "source": [
        "# Attendance Percentage Analyzer\n",
        "# Minor Project â€“ 2\n",
        "# Dataset Source: UCI Student Performance Dataset (student-mat.csv)\n",
        "\n",
        "import pandas as pd\n",
        "import matplotlib.pyplot as plt"
      ],
      "metadata": {
        "id": "4BFUWaXUG6Ar"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "# Load uploaded dataset\n",
        "df = pd.read_csv(\"/content/sample_data/student-mat.csv\", sep=\";\")\n",
        "\n",
        "print(\"Dataset Loaded Successfully\\n\")\n",
        "print(df.head())"
      ],
      "metadata": {
        "id": "ZxZHBECYPBeW"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "# -------------------------------\n",
        "# Attendance Computation Logic\n",
        "# -------------------------------\n",
        "\n",
        "total_days = 100\n",
        "\n",
        "# Days present derived from absences\n",
        "df[\"Days_Present\"] = total_days - df[\"absences\"]\n",
        "\n",
        "# Attendance percentage calculation\n",
        "df[\"Attendance_Percentage\"] = (df[\"Days_Present\"] / total_days) * 100\n",
        "\n",
        "# Basic statistics for logic justification\n",
        "mean_attendance = df[\"Attendance_Percentage\"].mean()\n",
        "std_attendance = df[\"Attendance_Percentage\"].std()\n",
        "\n",
        "print(\"\\nAttendance Statistics\")\n",
        "print(\"Mean Attendance:\", round(mean_attendance, 2))\n",
        "print(\"Standard Deviation:\", round(std_attendance, 2))"
      ],
      "metadata": {
        "id": "Fzp4I-_xG9BN"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "# -------------------------------\n",
        "# Data-Driven Categorization Logic\n",
        "# -------------------------------\n",
        "\n",
        "# Logic:\n",
        "# Good     -> Above (mean + 0.5 * std)\n",
        "# Average  -> Between (mean - 0.5 * std) and (mean + 0.5 * std)\n",
        "# Poor     -> Below (mean - 0.5 * std)\n",
        "\n",
        "upper_threshold = mean_attendance + 0.5 * std_attendance\n",
        "lower_threshold = mean_attendance - 0.5 * std_attendance\n",
        "\n",
        "df[\"Attendance_Category\"] = \"Average\"\n",
        "\n",
        "df.loc[df[\"Attendance_Percentage\"] >= upper_threshold, \"Attendance_Category\"] = \"Good\"\n",
        "df.loc[df[\"Attendance_Percentage\"] < lower_threshold, \"Attendance_Category\"] = \"Poor\"\n",
        "\n",
        "print(\"\\nAttendance Category Thresholds\")\n",
        "print(\"Good  >= \", round(upper_threshold, 2))\n",
        "print(\"Poor  <  \", round(lower_threshold, 2))"
      ],
      "metadata": {
        "id": "r8094XE6PHCq"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "# -------------------------------\n",
        "# Category Distribution Analysis\n",
        "# -------------------------------\n",
        "\n",
        "category_counts = df[\"Attendance_Category\"].value_counts()\n",
        "print(\"\\nCategory Distribution\")\n",
        "print(category_counts)\n",
        "\n"
      ],
      "metadata": {
        "id": "qnbTIq23PR4U"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# -------------------------------\n",
        "# Visualization Section\n",
        "# -------------------------------\n",
        "\n",
        "# Bar chart\n",
        "plt.figure()\n",
        "category_counts.plot(kind=\"bar\")\n",
        "plt.title(\"Student Attendance Category Distribution\")\n",
        "plt.xlabel(\"Attendance Category\")\n",
        "plt.ylabel(\"Number of Students\")\n",
        "plt.show()\n",
        "\n",
        "# Pie chart\n",
        "plt.figure()\n",
        "category_counts.plot(kind=\"pie\", autopct=\"%1.1f%%\")\n",
        "plt.title(\"Attendance Category Percentage Share\")\n",
        "plt.ylabel(\"\")\n",
        "plt.show()"
      ],
      "metadata": {
        "id": "JSl-lz-WPU09"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# -------------------------------\n",
        "# Final Analytical Output\n",
        "# -------------------------------\n",
        "\n",
        "final_view = df[\n",
        "    [\"absences\", \"Days_Present\", \"Attendance_Percentage\", \"Attendance_Category\"]\n",
        "].sort_values(by=\"Attendance_Percentage\", ascending=False)\n",
        "\n",
        "print(\"\\nFinal Attendance Analysis (Top Records)\")\n",
        "print(final_view.head(10))\n"
      ],
      "metadata": {
        "id": "nLJequFBPWtS"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}