In [None]:

# Step 1: Import Libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns

# Step 2: Load Dataset
data = pd.read_csv("student_performance_dataset_cleaned.csv")

# Step 3: Select Numeric Features for Clustering
numeric_features = ['Study_Hours_per_Week', 'Attendance_Rate', 'Past_Exam_Scores', 'Final_Exam_Score']
X = data[numeric_features]

# Step 4: Standardize Data (important for clustering)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 5: Apply K-Means Clustering
kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(X_scaled)

# Add cluster labels to dataset
data['Cluster'] = clusters

# Step 6: Apply PCA for 2D Visualization
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

data['PCA1'] = X_pca[:, 0]
data['PCA2'] = X_pca[:, 1]

# Step 7: Visualize Clusters in 2D
plt.figure(figsize=(8,6))
sns.scatterplot(x='PCA1', y='PCA2', hue='Cluster', data=data, palette='viridis', s=80)
plt.title("K-Means Clustering Results (Visualized in 2D using PCA)")
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")
plt.legend(title='Cluster')
plt.show()

# Step 8: Display Cluster Summary
cluster_summary = data.groupby('Cluster')[numeric_features].mean().round(2)
print("ðŸ“Š Cluster Summary (Mean Values):")
print(cluster_summary)
