# Task 1: Student Performance Analysis using Python

This notebook contains data loading, exploration, cleaning, analysis, and visualization of the Student Performance dataset. The objective is to gain insights into how factors like study time and gender affect students' final grades (G3).

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

## Data Loading

In [None]:
# Load the dataset
df = pd.read_csv('student-mat.csv')
df.head()

## Data Exploration

In [None]:
# Check for missing values
df.isnull().sum()

In [None]:
# Check data types
df.dtypes

In [None]:
# Dataset shape
df.shape

## Data Cleaning

In [None]:
# Remove duplicates
df = df.drop_duplicates()
# Handle missing values if any
df.fillna(df.median(numeric_only=True), inplace=True)

## Data Analysis Questions

In [None]:
# 1. Average final grade (G3)
avg_g3 = df['G3'].mean()
print(f'Average G3 Score: {avg_g3:.2f}')

In [None]:
# 2. Students scoring above 15 in G3
above_15 = df[df['G3'] > 15].shape[0]
print(f'Students scoring above 15: {above_15}')

In [None]:
# 3. Correlation between study time and G3
correlation = df['studytime'].corr(df['G3'])
print(f'Correlation between study time and G3: {correlation:.2f}')

In [None]:
# 4. Gender with higher average G3
gender_avg = df.groupby('sex')['G3'].mean()
print(gender_avg)

## Data Visualization

In [None]:
# Histogram of G3
plt.figure(figsize=(8, 5))
sns.histplot(df['G3'], bins=10, kde=True, color='skyblue')
plt.title('Distribution of Final Grades (G3)')
plt.xlabel('G3 Score')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Scatter plot: Study Time vs G3
plt.figure(figsize=(8, 5))
sns.scatterplot(x='studytime', y='G3', data=df, hue='sex')
plt.title('Study Time vs Final Grade')
plt.xlabel('Study Time')
plt.ylabel('G3 Score')
plt.show()

In [None]:
# Bar chart: Average G3 by Gender
plt.figure(figsize=(6, 4))
sns.barplot(x=gender_avg.index, y=gender_avg.values, palette='Set2')
plt.title('Average G3 by Gender')
plt.xlabel('Gender')
plt.ylabel('Average G3')
plt.show()

## 📊 Summary of Findings:
- Average G3 Score: ~X
- Students with G3 > 15: Y students
- Correlation between Study Time and G3: Z
- Gender with higher average G3: Male/Female

**Conclusion:** This analysis provides insights into how study habits and gender may influence student academic performance.