In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [5]:
file_path =r"C:\Users\TEJASWINI CHANDARGI\ML\ML18dataset\placement_data_30.csv"
df = pd.read_csv(file_path)

In [6]:
print("First 5 rows of the dataset:")
display(df.head())

First 5 rows of the dataset:


Unnamed: 0,Name,Tenth_Percent,Twelfth_Percent,FE_Percent,SE_Percent,TE_Percent,Certifications,Projects_Completed,Internships,Placed
0,Aarav Joshi,88.5,82.3,70.2,72.1,75.4,2,3,1,Yes
1,Diya Mehta,92.1,85.4,78.5,80.1,82.6,4,4,2,Yes
2,Kabir Shah,75.0,70.2,65.3,66.7,69.5,1,2,1,No
3,Ishaan Verma,81.2,79.3,69.4,71.0,73.2,2,2,1,Yes
4,Anaya Rao,89.8,84.1,76.8,78.9,80.3,3,3,2,Yes


In [7]:
# ---------------- DATA PREPROCESSING ----------------

# 1. Remove Duplicate Values
df = df.drop_duplicates()

# 2. Handle Missing Values
for col in df.columns:
    if df[col].dtype != 'object':      # numeric columns
        df[col] = df[col].fillna(df[col].mean())
    else:                              # categorical columns
        df[col] = df[col].fillna(df[col].mode()[0])

In [8]:
# 3. Convert Placed column into binary format
df['Placed'] = df['Placed'].map({'Yes': 1, 'No': 0})

# 4. Drop Name column (Not useful for ML)
df_model = df.drop(columns=['Name'])

# 5. Handle Outliers (IQR Method)
numeric_cols = df_model.select_dtypes(include=['float64','int64']).columns
for col in numeric_cols:
    Q1 = df_model[col].quantile(0.25)
    Q3 = df_model[col].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR
    df_model[col] = np.clip(df_model[col], lower, upper)

In [9]:
# ---------------- MODEL BUILDING ----------------

X = df_model.drop('Placed', axis=1)
y = df_model['Placed']

# Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# Standardization / Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:
# Train Model (Logistic Regression)
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

# Predictions
y_pred = model.predict(X_test_scaled)

# Model Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("\n‚úÖ Model Accuracy: ", accuracy)

# Detailed Classification Metrics
print("\nüìä Classification Report:\n", classification_report(y_test, y_pred))


‚úÖ Model Accuracy:  1.0

üìä Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         5

    accuracy                           1.00         7
   macro avg       1.00      1.00      1.00         7
weighted avg       1.00      1.00      1.00         7



In [11]:
# ---------- TEST WITH YOUR OWN SAMPLE STUDENT DATA ----------

# Example student values (Change these to test different cases)

sample = {
    'Tenth_Percent': 85,
    'Twelfth_Percent': 80,
    'FE_Percent': 74,
    'SE_Percent': 76,
    'TE_Percent': 78,
    'Certifications': 3,
    'Projects_Completed': 3,
    'Internships': 1
}

# Convert sample to DataFrame
sample_df = pd.DataFrame([sample])

# Scale sample input
sample_scaled = scaler.transform(sample_df)

# Predict
prediction = model.predict(sample_scaled)

if prediction[0] == 1:
    print("üéâ The Student is **LIKELY TO BE PLACED** ‚úÖ")
else:
    print("‚ö† The Student is **NOT LIKELY TO BE PLACED** ‚ùå")


üéâ The Student is **LIKELY TO BE PLACED** ‚úÖ
