In [2]:
# ========================================
# Student Data - Linear & Logistic Models
# ========================================

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import (
    mean_absolute_error, mean_squared_error, r2_score,
    confusion_matrix, accuracy_score, precision_score,
    recall_score, f1_score
)

df = pd.read_csv("student_data.csv")

print("First 5 rows of dataset:")
print(df.head(), "\n")

X = df[["hours_studied", "attendance"]]
y_score = df["score"]
y_passed = df["passed"]

# -----------------------------
# 1. Linear Regression (Score)
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y_score, test_size=0.2, random_state=42)

lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

y_pred = lin_reg.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("=== Linear Regression (Score Prediction) ===")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R² Score: {r2:.2f}")
print("Coefficients:", dict(zip(X.columns, lin_reg.coef_)))
print("Intercept:", lin_reg.intercept_, "\n")

# ----------------------
# 2. Logistic Regression
# ----------------------
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y_passed, test_size=0.2, random_state=42)

log_reg = LogisticRegression()
log_reg.fit(X_train2, y_train2)

y_pred_class = log_reg.predict(X_test2)

cm = confusion_matrix(y_test2, y_pred_class)
accuracy = accuracy_score(y_test2, y_pred_class)
precision = precision_score(y_test2, y_pred_class)
recall = recall_score(y_test2, y_pred_class)
f1 = f1_score(y_test2, y_pred_class)

print("=== Logistic Regression (Pass/Fail Prediction) ===")
print("Confusion Matrix:\n", cm)
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")
print("Coefficients:", dict(zip(X.columns, log_reg.coef_[0])))
print("Intercept:", log_reg.intercept_[0])


First 5 rows of dataset:
   hours_studied  attendance  passed  score
0            1.5          60       0     45
1            2.0          65       0     50
2            2.5          70       0     52
3            3.0          72       0     55
4            3.5          75       0     58 

=== Linear Regression (Score Prediction) ===
Mean Absolute Error (MAE): 3.13
Mean Squared Error (MSE): 18.24
R² Score: 0.96
Coefficients: {'hours_studied': np.float64(2.1416565574875315), 'attendance': np.float64(1.089914408949994)}
Intercept: -30.352156624882113 

=== Logistic Regression (Pass/Fail Prediction) ===
Confusion Matrix:
 [[2 0]
 [0 2]]
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1-score: 1.00
Coefficients: {'hours_studied': np.float64(0.1566625548274987), 'attendance': np.float64(0.8824361330611007)}
Intercept: -68.02215390651409
