In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score

# Load the dataset
data = pd.read_csv('./dataset/student_data_synthetic.csv')

# Split data into X and y
X = data[['Marks 1', 'Marks 2']]
y = data['Admission']

# ✅ Step 1: Add Extreme Random Noise to Data
np.random.seed(42)
X['Marks 1'] += np.random.normal(0, 20, X.shape[0])
X['Marks 2'] += np.random.normal(0, 20, X.shape[0])

# ✅ Step 2: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ✅ Step 3: Apply Logistic Regression with L2 Regularization
model = LogisticRegression(class_weight='balanced', penalty='l2', solver='lbfgs', max_iter=1000)
model.fit(X_train, y_train)

# ✅ Step 4: Predict and Evaluate
y_pred = model.predict(X_test)

# ✅ Step 5: Cross-validation Accuracy
cv_scores = cross_val_score(model, X, y, cv=5)
cv_accuracy = np.mean(cv_scores)

# ✅ Step 6: Confusion Matrix and Test Accuracy
conf_matrix = confusion_matrix(y_test, y_pred)
test_accuracy = accuracy_score(y_test, y_pred)

# ✅ Step 7: Results
print("Confusion Matrix:\n", conf_matrix)
print("\nTest Accuracy: {:.2f}%".format(test_accuracy * 100))
print("Cross-Validation Accuracy: {:.2f}%".format(cv_accuracy * 100))
