In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Set Seaborn style
sns.set_style("whitegrid")


In [None]:
# Replace with your actual CSV file
df = pd.read_csv("student_scores.csv")

# Preview the dataset
print(df.head())


In [None]:
# Check for missing values
print(df.isnull().sum())

# Drop missing rows
df.dropna(inplace=True)

# Optional: Remove unrealistic scores
df = df[(df['Math'] <= 100) & (df['Science'] <= 100) & (df['English'] <= 100)]


In [None]:
subjects = ['Math', 'Science', 'English']

for subject in subjects:
    sns.histplot(df[subject], bins=20, kde=False)
    plt.title(f'{subject} Score Distribution')
    plt.xlabel(f'{subject} Score')
    plt.ylabel('Number of Students')
    plt.show()


In [None]:
for subject in subjects:
    sns.histplot(df[subject], bins=20, kde=True)
    plt.title(f'{subject} Score Distribution with KDE')
    plt.xlabel(f'{subject} Score')
    plt.ylabel('Number of Students')
    plt.show()


In [None]:
for subject in subjects:
    sns.histplot(df[subject], bins=40, kde=True)
    plt.title(f'{subject} Score Distribution (Custom Bins)')
    plt.xlabel(f'{subject} Score')
    plt.ylabel('Frequency')
    plt.show()


In [None]:
for subject in subjects:
    sns.histplot(data=df, x=subject, hue='Gender', bins=30, kde=True)
    plt.title(f'{subject} Score Distribution by Gender')
    plt.xlabel(f'{subject} Score')
    plt.ylabel('Number of Students')
    plt.show()


In [None]:
for subject in subjects:
    sns.displot(data=df, x=subject, hue='Gender', kind='hist', kde=True, bins=30, height=5, aspect=1.5)
    plt.suptitle(f'{subject} Score Distribution with KDE and Gender Split', y=1.02)
    plt.xlabel(f'{subject} Score')
    plt.ylabel('Frequency')
    plt.show()


In [None]:
for subject in subjects:
    sns.kdeplot(df[df['Gender'] == 'Male'][subject], label='Male', color='blue')
    sns.kdeplot(df[df['Gender'] == 'Female'][subject], label='Female', color='red')
    plt.title(f'{subject} Score KDE Comparison by Gender')
    plt.xlabel(f'{subject} Score')
    plt.ylabel('Density')
    plt.legend()
    plt.show()


In [None]:
palette = {'Male': 'skyblue', 'Female': 'salmon'}

for subject in subjects:
    sns.histplot(data=df, x=subject, hue='Gender', bins=30, kde=True, palette=palette)
    plt.title(f'{subject} Score Distribution (Custom Colors)')
    plt.xlabel(f'{subject} Score')
    plt.ylabel('Number of Students')
    plt.show()


In [None]:
# Save one plot as example
plt.figure(figsize=(8, 5))
sns.histplot(data=df, x='Math', hue='Gender', bins=30, kde=True, palette=palette)
plt.title('Math Score Distribution by Gender')
plt.xlabel('Math Score')
plt.ylabel('Number of Students')
plt.tight_layout()
plt.savefig('math_score_distribution_by_gender.png', dpi=300)
plt.close()
