# Importing

## Import Library

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Import CSV And convert to DataFrame

In [None]:
df = pd.read_csv('/kaggle/input/student-academic-placement-performance-dataset/student_academic_placement_performance_dataset.csv')

# Preprocessing

## Frist five row

In [None]:
df.head()

## last Five row

In [None]:
df.tail()

## Shape of our dataset

In [None]:
df.shape

## List out all columns

In [None]:
df.columns

## Datatype of each columns

In [None]:
df.dtypes

## Information of all Columns

In [None]:
df.info()

## Check Null Value

In [None]:
df.isnull().sum()

## Check Dupicate Value

In [None]:
df.duplicated().sum()

## Summary

In [None]:
df.describe()

# EDA

In [None]:
def show_fig():
    plt.tight_layout()
    plt.show()

plot_no = 1

In [None]:
fig = plt.figure(figsize=(10,6))
sns.histplot(df['cgpa'], kde=True)
plt.title(f'{plot_no}. Distribution of CGPA Showing Overall Academic Performance Spread')
show_fig()
plot_no += 1


In [None]:
fig = plt.figure(figsize=(10,6))
sns.boxplot(x='placement_status', y='cgpa', data=df)
plt.title(f'{plot_no}. CGPA Comparison Between Placed and Non-Placed Students')
show_fig()
plot_no += 1


In [None]:
fig = plt.figure(figsize=(10,6))
sns.boxplot(x='placement_status', y='entrance_exam_score', data=df)
plt.title(f'{plot_no}. Entrance Exam Score Impact on Placement Status')
show_fig()
plot_no += 1


In [None]:
fig = plt.figure(figsize=(10,6))
sns.countplot(x='placement_status', hue='gender', data=df)
plt.title(f'{plot_no}. Gender-wise Placement Distribution')
show_fig()
plot_no += 1


In [None]:
fig = plt.figure(figsize=(10,6))
sns.boxplot(x='backlogs', y='cgpa', data=df)
plt.title(f'{plot_no}. Effect of Academic Backlogs on CGPA')
show_fig()
plot_no += 1


In [None]:
fig = plt.figure(figsize=(10,6))
sns.boxplot(x='extracurricular_activities', y='cgpa', data=df)
plt.title(f'{plot_no}. Impact of Extracurricular Activities on CGPA')
show_fig()
plot_no += 1


In [None]:
fig = plt.figure(figsize=(10,6))
sns.countplot(x='placement_status', data=df)
plt.title(f'{plot_no}. Overall Placement Success Ratio')
show_fig()
plot_no += 1


In [None]:
fig = plt.figure(figsize=(10,6))
sns.boxplot(x='placement_status', y='attendance_percentage', data=df)
plt.title(f'{plot_no}. Attendance Pattern Comparison by Placement Status')
show_fig()
plot_no += 1


In [None]:
fig = plt.figure(figsize=(10,6))
sns.scatterplot(x='technical_skill_score', y='cgpa', data=df)
plt.title(f'{plot_no}. Technical Skill Score vs CGPA Academic Strength')
show_fig()
plot_no += 1


In [None]:
fig = plt.figure(figsize=(10,6))
sns.violinplot(x='placement_status', y='soft_skill_score', data=df)
plt.title(f'{plot_no}. Soft Skill Distribution Across Placement Status')
show_fig()
plot_no += 1


In [None]:
fig = plt.figure(figsize=(15,10))
sns.heatmap(df.select_dtypes(include='number').corr(), cmap='coolwarm', annot=True)
plt.title(f'{plot_no}. Correlation Heatmap of Academic, Skill, and Career Metrics')
show_fig()
plot_no += 1


# Model Training

## feature and target selection

In [None]:
X = df.drop(columns=['student_id', 'placement_status', 'salary_package_lpa'])
y = df['placement_status']

## encoding categorical variables

In [None]:
X = pd.get_dummies(X, drop_first=True)

## train test split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

## feature scaling

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## model training

In [None]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, y_train)

## prediction

In [None]:
y_pred = model.predict(X_test_scaled)

## model evaluation

In [None]:
print("Accuracy:", accuracy_score(y_test, y_pred) * 100)
print(classification_report(y_test, y_pred))

## confusion matrix

In [None]:
cm = confusion_matrix(y_test, y_pred)

fig = plt.figure(figsize=(10,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix for Placement Status Prediction')
plt.tight_layout()
plt.show()