In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


df = pd.read_csv("StudentData.csv")


print("First 5 entries:")
print(df.head())

sns.set(style="whitegrid")


mean_values = df[["CGPA", "CIE_SCORE", "SEE_SCORE", "HOURS_OF_STUDY"]].mean()
median_values = df[["CGPA", "CIE_SCORE", "SEE_SCORE", "HOURS_OF_STUDY"]].median()
mode_values = df[["CGPA", "CIE_SCORE", "SEE_SCORE", "HOURS_OF_STUDY"]].mode().iloc[0] 

print("\n=== MEAN VALUES ===\n", mean_values)
print("\n=== MEDIAN VALUES ===\n", median_values)
print("\n=== MODE VALUES ===\n", mode_values)


stats_df = pd.DataFrame({
    'Mean': mean_values,
    'Median': median_values,
    'Mode': mode_values
})

stats_df.plot(kind='bar', figsize=(10,5), colormap='Set1')
plt.title("Mean vs Median vs Mode for Numeric Features")
plt.ylabel("Value")
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

# ================== VISUALIZATIONS ==================

# 1. CIE vs SEE Score
plt.figure(figsize=(6,4))
sns.regplot(x="CIE_SCORE", y="SEE_SCORE", data=df, scatter_kws={"s":40, "alpha":0.7}, line_kws={"color":"red"})
plt.title("CIE SCORE vs SEE SCORE")
plt.xlabel("CIE SCORE")
plt.ylabel("SEE SCORE")
plt.tight_layout()
plt.show()

# 2. Boxplot of CGPA by GENDER
plt.figure(figsize=(6,4))
sns.boxplot(x="GENDER", y="CGPA", data=df, palette="Set2")
plt.title("CGPA Distribution by Gender")
plt.tight_layout()
plt.show()

# 3. Violin plot CGPA vs GENDER
plt.figure(figsize=(6,4))
sns.violinplot(x="GENDER", y="CGPA", data=df, palette="Set3")
plt.title("CGPA vs Gender (Violin Plot)")
plt.tight_layout()
plt.show()

# 4. Bar plot: Average CGPA by GENDER
avg_cgpa_gender = df.groupby("GENDER")["CGPA"].mean().reset_index()
plt.figure(figsize=(5,4))
sns.barplot(x="GENDER", y="CGPA", data=avg_cgpa_gender, palette="pastel")
plt.title("Average CGPA by Gender")
plt.ylabel("Average CGPA")
plt.tight_layout()
plt.show()

# 5. Histograms for CIE and SEE Scores
plt.figure(figsize=(12,4))

plt.subplot(1,2,1)
sns.histplot(df["CIE_SCORE"], bins=10, kde=True, color="skyblue")
plt.title("Distribution of CIE SCORE")

plt.subplot(1,2,2)
sns.histplot(df["SEE_SCORE"], bins=10, kde=True, color="salmon")
plt.title("Distribution of SEE SCORE")

plt.tight_layout()
plt.show()

# 6. Pairplot with HOURS_OF_STUDY
sns.pairplot(df, hue="GENDER", vars=["CGPA", "CIE_SCORE", "SEE_SCORE", "HOURS_OF_STUDY"],
             palette="bright", diag_kind="kde")
plt.suptitle("Pairplot of CGPA, CIE_SCORE, SEE_SCORE, HOURS_OF_STUDY by Gender", y=1.02)
plt.show()

# 7. Correlation heatmap
plt.figure(figsize=(6,5))
sns.heatmap(df[["CGPA", "CIE_SCORE", "SEE_SCORE", "HOURS_OF_STUDY"]].corr(), annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Heatmap")
plt.tight_layout()
plt.show()

# 8. Hours of Study vs Scores
fig, axes = plt.subplots(1, 3, figsize=(15,4))

sns.regplot(x="HOURS_OF_STUDY", y="CIE_SCORE", data=df, ax=axes[0], scatter_kws={"s":40, "alpha":0.7}, line_kws={"color":"green"})
axes[0].set_title("Hours of Study vs CIE SCORE")
axes[0].set_xlabel("Hours of Study")
axes[0].set_ylabel("CIE SCORE")

sns.regplot(x="HOURS_OF_STUDY", y="SEE_SCORE", data=df, ax=axes[1], scatter_kws={"s":40, "alpha":0.7}, line_kws={"color":"blue"})
axes[1].set_title("Hours of Study vs SEE SCORE")
axes[1].set_xlabel("Hours of Study")
axes[1].set_ylabel("SEE SCORE")

sns.regplot(x="HOURS_OF_STUDY", y="CGPA", data=df, ax=axes[2], scatter_kws={"s":40, "alpha":0.7}, line_kws={"color":"purple"})
axes[2].set_title("Hours of Study vs CGPA")
axes[2].set_xlabel("Hours of Study")
axes[2].set_ylabel("CGPA")

plt.tight_layout()
plt.show()