# Student Performance Prediction using Machine Learning

This notebook reproduces the project used for the Berea College application.

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
df = pd.read_csv('student_performance_dataset.csv')
df.head()

In [None]:
X = df[['StudyHours','Attendance','AssignmentMarks','MidtermMarks']]
y = df['Pass']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print('Accuracy:', accuracy_score(y_test, y_pred))
print('\nClassification Report:\n', classification_report(y_test, y_pred))
cm = confusion_matrix(y_test, y_pred)
cm

In [None]:
fig, ax = plt.subplots(figsize=(4,4))
ax.imshow(cm, interpolation='nearest')
ax.set_title('Confusion Matrix')
ax.set_xlabel('Predicted')
ax.set_ylabel('Actual')
ax.set_xticks([0,1]); ax.set_yticks([0,1])
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        ax.text(j, i, cm[i,j], ha='center', va='center', color='white', fontsize=14)
plt.tight_layout()
plt.show()

## Notes
- Dataset is synthetic but realistic for demonstration.
- Replace dataset with real data if available.
- Submit both the PDF report and this GitHub repository/notebook to show reproducibility.