<a href="https://colab.research.google.com/github/kratoskrat818-alt/shayan/blob/main/Student_Semester_Attendance_vs_Performance_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# ==========================================
# Student Semester Attendance vs Performance Analysis
# Subject: Statistics (Minor 1)
# Libraries: Pandas, NumPy, Matplotlib
# ==========================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# ------------------------------------------
# 1. DATA GENERATION
# ------------------------------------------

np.random.seed(42)

students = 100

# Attendance percentage (50% to 100%)
attendance = np.random.randint(50, 101, students)

# Exam scores correlated with attendance
exam_scores = attendance * 0.6 + np.random.normal(0, 10, students)
exam_scores = np.clip(exam_scores, 0, 100)

# Create DataFrame
df = pd.DataFrame({
    "Student_ID": range(1, students + 1),
    "Attendance_Percentage": attendance,
    "Exam_Score": exam_scores
})

print("\nSample Data:")
print(df.head())

# ------------------------------------------
# 2. STATISTICAL SUMMARY
# ------------------------------------------

print("\nStatistical Summary:")
print(df.describe())

# ------------------------------------------
# 3. CORRELATION ANALYSIS
# ------------------------------------------

correlation = df["Attendance_Percentage"].corr(df["Exam_Score"])

print("\nCorrelation Analysis:")
print(f"Correlation Coefficient: {correlation:.2f}")

# ------------------------------------------
# 4. SCATTER PLOT (Attendance vs Performance)
# ------------------------------------------

plt.figure(figsize=(8, 6))
plt.scatter(
    df["Attendance_Percentage"],
    df["Exam_Score"]
)
plt.xlabel("Attendance Percentage")
plt.ylabel("Exam Score")
plt.title("Attendance vs Exam Performance")
plt.grid(True)
plt.show()

# ------------------------------------------
# 5. LINE OF BEST FIT
# ------------------------------------------

m, b = np.polyfit(df["Attendance_Percentage"], df["Exam_Score"], 1)

plt.figure(figsize=(8, 6))
plt.scatter(df["Attendance_Percentage"], df["Exam_Score"])
plt.plot(
    df["Attendance_Percentage"],
    m * df["Attendance_Percentage"] + b
)
plt.xlabel("Attendance Percentage")
plt.ylabel("Exam Score")
plt.title("Attendance vs Exam Performance (With Trend Line)")
plt.grid(True)
plt.show()

# ------------------------------------------
# 6. PERFORMANCE CATEGORY ANALYSIS
# ------------------------------------------

def performance_category(score):
    if score >= 75:
        return "High"
    elif score >= 50:
        return "Medium"
    else:
        return "Low"

df["Performance_Level"] = df["Exam_Score"].apply(performance_category)

category_counts = df["Performance_Level"].value_counts()

plt.figure(figsize=(6, 6))
plt.pie(
    category_counts.values,
    labels=category_counts.index,
    autopct="%1.1f%%",
    startangle=90
)
plt.title("Student Performance Distribution")
plt.show()

# ------------------------------------------
# 7. FINAL INSIGHTS
# ------------------------------------------

print("\nFinal Insights:")
print("- Attendance and exam scores show a positive correlation.")
print("- Higher attendance generally leads to better academic performance.")
print("- Scatter plot confirms a rising trend between attendance and scores.")
print("- Correlation coefficient quantifies the relationship strength.")

# ------------------------------------------
# END OF PROJECT
# ------------------------------------------

ModuleNotFoundError: No module named 'pandas'