# 🎓 Student Performance Analysis
A full data science project on student performance.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [None]:
# Load dataset
data = pd.read_csv('student-por.csv', sep=';')
data.head()

In [None]:
# Dataset Info
data.info()

In [None]:
# Describe dataset
data.describe()

In [None]:
# Check for missing values
data.isnull().sum()

In [None]:
# Feature engineering - create pass/fail column
data['pass'] = data['G3'].apply(lambda x: 1 if x >= 10 else 0)

In [None]:
# Encode categorical variables
data = pd.get_dummies(data, drop_first=True)

In [None]:
# Correlation heatmap
plt.figure(figsize=(12, 10))
sns.heatmap(data.corr(), cmap='coolwarm', annot=False)
plt.title('Feature Correlations')
plt.show()

In [None]:
# Split features and labels
X = data.drop(['G3', 'pass'], axis=1)
y = data['pass']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Train logistic regression
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [None]:
# Evaluate model
print('Accuracy:', accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))