# Project 2: Student Performance Analytics Dashboard
Analyze student data to highlight performance trends and risk areas.

## Step 1: Import Libraries
Import required libraries for analysis and visualization.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Step 2: Load the Dataset
Load the student performance dataset.

In [None]:
df = pd.read_csv("student_data.csv")  # Replace with your actual filename

## Step 3: View Column Names
Check the actual columns to avoid errors.

In [None]:
print(df.columns.tolist())

## Step 4: Clean the Data
Strip column whitespace and remove unnecessary columns if they exist.

In [None]:
df.columns = df.columns.str.strip()
columns_to_drop = ['D', 'First_Name', 'Last_Name', 'Email']
df = df.drop(columns=[col for col in columns_to_drop if col in df.columns])

## Step 5: Convert Categorical Columns
Optimize memory usage by converting to category types.

In [None]:
df['Gender'] = df['Gender'].astype('category')
df['Department'] = df['Department'].astype('category')
df['Grade'] = df['Grade'].astype('category')
df['Internet_Access_at_Home'] = df['Internet_Access_at_Home'].astype('category')
df['Extracurricular_Activities'] = df['Extracurricular_Activities'].astype('category')

## Step 6: Feature Engineering
Calculate a composite academic score and label performance.

In [None]:
df['Academic_Score'] = df[['Midterm_Score', 'Final_Score', 'Assignments_Avg', 'Quizzes_Avg', 'Projects_Score']].mean(axis=1)
threshold = df['Academic_Score'].quantile(0.3)
df['Performance_Category'] = np.where(df['Academic_Score'] <= threshold, 'Struggling', 'Top')

## Step 7: Average Metrics and Correlation
Compute average scores and correlation between features.

In [None]:
avg_scores = df[['Academic_Score', 'Attendance (%)', 'Participation_Score',
                 'Study_Hours_per_Week', 'Stress_Level (1-10)', 'Sleep_Hours_per_Night']].mean()
corr = df[['Academic_Score', 'Attendance (%)', 'Participation_Score',
          'Study_Hours_per_Week', 'Stress_Level (1-10)', 'Sleep_Hours_per_Night']].corr()
corr

## Step 8: Impact of Attendance on Academic Score
Use scatter plot to see how attendance affects performance.

In [None]:
plt.figure(figsize=(8, 5))
sns.scatterplot(data=df, x='Attendance (%)', y='Academic_Score', hue='Performance_Category')
plt.title("Impact of Attendance on Academic Score")
plt.grid(True)
plt.tight_layout()
plt.show()

## Step 9: Correlation Heatmap
Visualize relationships among important features.

In [None]:
plt.figure(figsize=(8, 6))
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Feature Correlation Heatmap")
plt.tight_layout()
plt.show()

## Step 10: Department-wise Performance
Compare academic performance across departments.

In [None]:
plt.figure(figsize=(10, 5))
sns.boxplot(data=df, x='Department', y='Academic_Score', hue='Performance_Category')
plt.title("Academic Score Distribution by Department")
plt.tight_layout()
plt.show()

## Step 11: Study Hours vs Participation
Understand student engagement through study and participation.

In [None]:
plt.figure(figsize=(10, 5))
sns.scatterplot(data=df, x='Study_Hours_per_Week', y='Participation_Score', hue='Performance_Category')
plt.title("Study Hours vs Participation Score")
plt.grid(True)
plt.tight_layout()
plt.show()

## Step 12: Sleep vs Stress Level
Analyze the relation between sleep and student stress.

In [None]:
plt.figure(figsize=(10, 5))
sns.scatterplot(data=df, x='Sleep_Hours_per_Night', y='Stress_Level (1-10)', hue='Performance_Category')
plt.title("Sleep vs Stress Level")
plt.grid(True)
plt.tight_layout()
plt.show()