In [1]:
import pandas as pd
import numpy as np
import json
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report, confusion_matrix
import joblib
import warnings
warnings.filterwarnings('ignore')

print("=" * 70)
print("ELEVATR ML PIPELINE - STUDENT PERFORMANCE PREDICTION")
print("=" * 70)

# ============================================================================
# STEP 1: LOAD DATA
# ============================================================================
print("\n[STEP 1] Loading Student Performance Dataset")
print("-" * 70)

df = pd.read_csv('../data/student_data.csv')
print(f"✓ Dataset loaded: {df.shape[0]} students, {df.shape[1]} features")
print(f"\nFirst 3 rows:")
print(df.head(3))

# Check for missing values
print(f"\n✓ Missing values check:")
missing = df.isnull().sum()
if missing.sum() == 0:
    print("  No missing values found!")
else:
    print(missing[missing > 0])

# Check target variable distribution
print(f"\n✓ Target variable (final_grade) distribution:")
grade_dist = df['final_grade'].value_counts().sort_index()
print(grade_dist)
print(f"\n  Total classes: {df['final_grade'].nunique()}")

ELEVATR ML PIPELINE - STUDENT PERFORMANCE PREDICTION

[STEP 1] Loading Student Performance Dataset
----------------------------------------------------------------------
✓ Dataset loaded: 1000 students, 11 features

First 3 rows:
  student_id  age gender  study_hours_weekly  attendance_percent  \
0       S001   24      F                 5.0                71.3   
1       S002   21      F                36.0                95.1   
2       S003   22      M                10.3                75.0   

   previous_gpa  assignments_completed  participation_score  midterm_score  \
0          7.27                      0                   63             39   
1          6.25                     56                   78             81   
2          8.18                     45                   50             51   

   hours_on_platform final_grade  
0                 42           B  
1                 90           S  
2                 51           S  

✓ Missing values check:
  No missing values

In [2]:

# ============================================================================
# STEP 2: DATA PREPROCESSING
# ============================================================================
print("\n[STEP 2] Data Preprocessing")
print("-" * 70)

# Separate features and target
X = df.drop(['student_id', 'final_grade'], axis=1)
y = df['final_grade']

print(f"✓ Features (X): {X.shape}")
print(f"✓ Target (y): {y.shape}")
print(f"\nFeature columns: {list(X.columns)}")

# Encode categorical variable (gender)
print("\n✓ Encoding categorical variables...")
le_gender = LabelEncoder()
X['gender_encoded'] = le_gender.fit_transform(X['gender'])
X = X.drop('gender', axis=1)
print(f"  Gender encoding: {dict(zip(le_gender.classes_, le_gender.transform(le_gender.classes_)))}")

# Encode target variable (final_grade)
le_grade = LabelEncoder()
y_encoded = le_grade.fit_transform(y)
print(f"\n✓ Target variable encoding:")
for grade, code in zip(le_grade.classes_, le_grade.transform(le_grade.classes_)):
    print(f"  {grade} → {code}")

# Train-test split (stratified)
print(f"\n✓ Performing train-test split (80-20, stratified)...")
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

print(f"  Training set: {X_train.shape[0]} samples")
print(f"  Test set: {X_test.shape[0]} samples")
print(f"\n  Training grade distribution:")
train_dist = pd.Series(y_train).value_counts().sort_index()
for idx, count in train_dist.items():
    print(f"    {le_grade.classes_[idx]}: {count}")

# Feature scaling
print(f"\n✓ Applying StandardScaler to features...")
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"  Scaler fitted on training data")
print(f"  Features scaled: mean=0, std=1")
print(f"\n  Sample scaling (first feature 'age'):")
print(f"    Before: mean={X_train['age'].mean():.2f}, std={X_train['age'].std():.2f}")
print(f"    After: mean={X_train_scaled[:, 0].mean():.4f}, std={X_train_scaled[:, 0].std():.4f}")

# Store feature names for later use
feature_names = list(X.columns)
print(f"\n✓ Feature names stored: {feature_names}")


[STEP 2] Data Preprocessing
----------------------------------------------------------------------
✓ Features (X): (1000, 9)
✓ Target (y): (1000,)

Feature columns: ['age', 'gender', 'study_hours_weekly', 'attendance_percent', 'previous_gpa', 'assignments_completed', 'participation_score', 'midterm_score', 'hours_on_platform']

✓ Encoding categorical variables...
  Gender encoding: {'F': np.int64(0), 'M': np.int64(1)}

✓ Target variable encoding:
  A → 0
  B → 1
  C → 2
  D → 3
  F → 4
  S → 5

✓ Performing train-test split (80-20, stratified)...
  Training set: 800 samples
  Test set: 200 samples

  Training grade distribution:
    A: 119
    B: 44
    C: 22
    D: 27
    F: 10
    S: 578

✓ Applying StandardScaler to features...
  Scaler fitted on training data
  Features scaled: mean=0, std=1

  Sample scaling (first feature 'age'):
    Before: mean=21.56, std=2.32
    After: mean=-0.0000, std=1.0000

✓ Feature names stored: ['age', 'study_hours_weekly', 'attendance_percent', 'prev

In [3]:
# ============================================================================
# STEP 3: TRAIN MULTIPLE MODELS
# ============================================================================
print("\n[STEP 3] Training and Comparing Models")
print("-" * 70)

models = {
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42, max_depth=10),
    'Logistic Regression': LogisticRegression(max_iter=1000, random_state=42, multi_class='multinomial'),
}

# For XGBoost (install with: pip install xgboost)
try:
    from xgboost import XGBClassifier
    models['XGBoost'] = XGBClassifier(n_estimators=100, random_state=42, max_depth=6, learning_rate=0.1)
except ImportError:
    print("⚠ XGBoost not available, skipping...")

results = {}

for name, model in models.items():
    print(f"\n→ Training {name}...")
    model.fit(X_train_scaled, y_train)
    
    # Predictions
    y_pred_train = model.predict(X_train_scaled)
    y_pred_test = model.predict(X_test_scaled)
    
    # Metrics
    train_acc = accuracy_score(y_train, y_pred_train)
    test_acc = accuracy_score(y_test, y_pred_test)
    
    results[name] = {
        'model': model,
        'train_accuracy': train_acc,
        'test_accuracy': test_acc,
        'predictions': y_pred_test
    }
    
    print(f"  Training Accuracy: {train_acc:.4f}")
    print(f"  Test Accuracy: {test_acc:.4f}")

# Find best model
best_model_name = max(results, key=lambda x: results[x]['test_accuracy'])
best_model = results[best_model_name]['model']
print(f"\n✓ Best Model: {best_model_name} (Test Accuracy: {results[best_model_name]['test_accuracy']:.4f})")


[STEP 3] Training and Comparing Models
----------------------------------------------------------------------

→ Training Random Forest...
  Training Accuracy: 0.9912
  Test Accuracy: 0.7850

→ Training Logistic Regression...
  Training Accuracy: 0.7812
  Test Accuracy: 0.7700

→ Training XGBoost...
  Training Accuracy: 1.0000
  Test Accuracy: 0.7800

✓ Best Model: Random Forest (Test Accuracy: 0.7850)


In [4]:
# ============================================================================
# STEP 4: DETAILED EVALUATION
# ============================================================================
print("\n[STEP 4] Detailed Model Evaluation")
print("-" * 70)

best_predictions = results[best_model_name]['predictions']

# Classification report
print(f"\nClassification Report for {best_model_name}:")
print(classification_report(y_test, best_predictions, target_names=le_grade.classes_))

# Calculate metrics by grade
print("\nPer-Grade Metrics:")
for i, grade in enumerate(le_grade.classes_):
    mask = (y_test == i)
    if mask.sum() > 0:
        precision = precision_score(y_test, best_predictions, labels=[i], average='micro', zero_division=0)
        recall = recall_score(y_test, best_predictions, labels=[i], average='micro', zero_division=0)
        f1 = f1_score(y_test, best_predictions, labels=[i], average='micro', zero_division=0)
        print(f"  Grade {grade}: Precision={precision:.3f}, Recall={recall:.3f}, F1={f1:.3f}")

# Confusion matrix
cm = confusion_matrix(y_test, best_predictions)
print(f"\nConfusion Matrix:\n{cm}")

# Feature importance (for tree-based models)
if hasattr(best_model, 'feature_importances_'):
    print(f"\nFeature Importance:")
    importances = best_model.feature_importances_
    feature_importance_df = pd.DataFrame({
        'feature': feature_names,
        'importance': importances
    }).sort_values('importance', ascending=False)
    print(feature_importance_df)


[STEP 4] Detailed Model Evaluation
----------------------------------------------------------------------

Classification Report for Random Forest:
              precision    recall  f1-score   support

           A       0.62      0.33      0.43        30
           B       0.33      0.18      0.24        11
           C       0.50      0.33      0.40         6
           D       0.67      0.57      0.62         7
           F       0.00      0.00      0.00         2
           S       0.83      0.97      0.89       144

    accuracy                           0.79       200
   macro avg       0.49      0.40      0.43       200
weighted avg       0.75      0.79      0.75       200


Per-Grade Metrics:
  Grade A: Precision=0.625, Recall=0.333, F1=0.435
  Grade B: Precision=0.333, Recall=0.182, F1=0.235
  Grade C: Precision=0.500, Recall=0.333, F1=0.400
  Grade D: Precision=0.667, Recall=0.571, F1=0.615
  Grade F: Precision=0.000, Recall=0.000, F1=0.000
  Grade S: Precision=0.827, Recal

In [5]:
# ============================================================================
# STEP 5: SAVE BEST MODEL AND ARTIFACTS
# ============================================================================
print("\n[STEP 5] Saving Model and Artifacts")
print("-" * 70)

# Save model
joblib.dump(best_model, '../models/elevatr_model.pkl')
print("✓ Model saved: models/elevatr_model.pkl")

# Save scaler
joblib.dump(scaler, '../models/scaler.pkl')
print("✓ Scaler saved: models/scaler.pkl")

# Save feature names and encoders
artifacts = {
    'feature_names': feature_names,
    'grade_classes': le_grade.classes_.tolist(),
    'gender_encoding': {'F': 0, 'M': 1},
    'model_name': best_model_name,
    'test_accuracy': float(results[best_model_name]['test_accuracy'])
}

with open('../models/feature_names.json', 'w') as f:
    json.dump(artifacts, f, indent=2)
print("✓ Artifacts saved: models/feature_names.json")

# Save encoders
joblib.dump(le_grade, '../models/label_encoder.pkl')
print("✓ Label encoder saved: models/label_encoder.pkl")


[STEP 5] Saving Model and Artifacts
----------------------------------------------------------------------
✓ Model saved: models/elevatr_model.pkl
✓ Scaler saved: models/scaler.pkl
✓ Artifacts saved: models/feature_names.json
✓ Label encoder saved: models/label_encoder.pkl


In [6]:
# ============================================================================
# STEP 6: CREATE PREDICTION FUNCTION
# ============================================================================
print("\n[STEP 6] Creating Prediction Function")
print("-" * 70)

def predict_grade(student_data):
    """
    Predict student grade with confidence and risk assessment.
    
    Args:
        student_data: dict or DataFrame with student features
    
    Returns:
        dict with predicted_grade, confidence, and risk_level
    """
    # Load saved artifacts
    model = joblib.load('../models/elevatr_model.pkl')
    scaler = joblib.load('../models/scaler.pkl')
    le = joblib.load('../models/label_encoder.pkl')
    
    with open('../models/feature_names.json', 'r') as f:
        artifacts = json.load(f)
    
    # Convert to DataFrame if dict
    if isinstance(student_data, dict):
        student_data = pd.DataFrame([student_data])
    
    # Encode gender
    gender_map = {'F': 0, 'M': 1}
    student_data['gender_encoded'] = student_data['gender'].map(gender_map)
    
    # Select features in correct order
    X_pred = student_data[artifacts['feature_names']]
    
    # Scale features
    X_pred_scaled = scaler.transform(X_pred)
    
    # Predict
    prediction = model.predict(X_pred_scaled)[0]
    predicted_grade = le.inverse_transform([prediction])[0]
    
    # Get confidence (probability)
    if hasattr(model, 'predict_proba'):
        proba = model.predict_proba(X_pred_scaled)[0]
        confidence = float(proba[prediction])
    else:
        confidence = 0.85  # Default for models without probability
    
    # Assess risk level
    if predicted_grade in ['D', 'F']:
        risk_level = 'HIGH'
    elif predicted_grade in ['C', 'B']:
        risk_level = 'MEDIUM'
    else:
        risk_level = 'LOW'
    
    return {
        'predicted_grade': predicted_grade,
        'confidence': round(confidence, 3),
        'risk_level': risk_level
    }

print("✓ Prediction function created successfully")


[STEP 6] Creating Prediction Function
----------------------------------------------------------------------
✓ Prediction function created successfully


In [7]:
# ============================================================================
# STEP 7: TEST WITH TEST SAMPLE
# ============================================================================
print("\n[STEP 7] Testing with Test Sample Data")
print("-" * 70)

test_df = pd.read_csv('../data/test_sample.csv')
print(f"✓ Loaded {len(test_df)} test students\n")

for idx, row in test_df.iterrows():
    student_dict = row.drop('student_id').to_dict()
    actual_grade = student_dict.pop('final_grade')
    
    result = predict_grade(student_dict)
    
    match = "✓" if result['predicted_grade'] == actual_grade else "✗"
    
    print(f"{match} Student {row['student_id']}:")
    print(f"  Actual: {actual_grade} | Predicted: {result['predicted_grade']}")
    print(f"  Confidence: {result['confidence']:.1%} | Risk: {result['risk_level']}")
    print(f"  Profile: {row['study_hours_weekly']}hrs/week, {row['attendance_percent']:.0f}% attendance")
    print()

print("=" * 70)
print("PIPELINE COMPLETE - MODEL READY FOR DEPLOYMENT")
print("=" * 70)


[STEP 7] Testing with Test Sample Data
----------------------------------------------------------------------
✓ Loaded 5 test students

✗ Student T001:
  Actual: A | Predicted: S
  Confidence: 65.3% | Risk: LOW
  Profile: 35.0hrs/week, 92% attendance

✓ Student T002:
  Actual: S | Predicted: S
  Confidence: 56.7% | Risk: LOW
  Profile: 8.0hrs/week, 68% attendance

✗ Student T003:
  Actual: B | Predicted: S
  Confidence: 96.4% | Risk: LOW
  Profile: 22.0hrs/week, 88% attendance

✗ Student T004:
  Actual: D | Predicted: S
  Confidence: 42.9% | Risk: LOW
  Profile: 12.0hrs/week, 95% attendance

✗ Student T005:
  Actual: A | Predicted: S
  Confidence: 98.8% | Risk: LOW
  Profile: 28.0hrs/week, 75% attendance

PIPELINE COMPLETE - MODEL READY FOR DEPLOYMENT
