# Cognitive Skills & Student Performance Analysis

This notebook analyzes the correlation between cognitive skills and student performance, develops an ML model to predict assessment scores, and clusters students into learning personas.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import json
import warnings
warnings.filterwarnings('ignore')

# Set style for better visualizations
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")


In [None]:
# Load the synthetic student dataset
with open('../data/synthetic_students.json', 'r') as f:
    students_data = json.load(f)

# Convert to DataFrame
df = pd.DataFrame(students_data)
print(f"Dataset shape: {df.shape}")
print(f"\nFirst few rows:")
df.head()


In [None]:
# Basic statistics and data overview
print("Dataset Overview:")
print("=" * 50)
print(f"Total students: {len(df)}")
print(f"Classes: {df['class'].unique()}")
print(f"\nBasic Statistics:")
print(df.describe())
