# University Admission Prediction (Binary Classification)

This notebook loads the provided dataset (`data/dataset.csv`), prepares a binary target, trains a simple model, and prints basic dataset information and evaluation metrics.


In [None]:
import pandas as pd
import numpy as np

# Load dataset (kept in repo at: data/dataset.csv)
df = pd.read_csv('data/dataset.csv')

# Clean column names (remove extra spaces)
df.columns = [c.strip() for c in df.columns]

df.head()

In [None]:
# Basic information
print('Shape:', df.shape)
df.info()

In [None]:
# Summary statistics
df.describe(include='all')

In [None]:
# Prepare target
# Original dataset has a probability-like column: 'Chance of Admit'
# Convert it into binary label:
#   1 = Admitted (>= threshold)
#   0 = Not Admitted (< threshold)

THRESHOLD = 0.75

df['Admitted'] = (df['Chance of Admit'] >= THRESHOLD).astype(int)

# Drop non-feature columns
X = df.drop(columns=['Admitted', 'Chance of Admit'])

# If Serial No exists, drop it (identifier)
if 'Serial No.' in X.columns:
    X = X.drop(columns=['Serial No.'])

y = df['Admitted']

X.head(), y.value_counts()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

model = Pipeline([
    ('scaler', StandardScaler()),
    ('clf', LogisticRegression(max_iter=1000))
])

model.fit(X_train, y_train)

pred = model.predict(X_test)

print('Accuracy:', accuracy_score(y_test, pred))
print('\nConfusion matrix:\n', confusion_matrix(y_test, pred))
print('\nClassification report:\n', classification_report(y_test, pred))