# WEEK-7 ASSIGNMENT

### Develop a web application using Streamlit to deploy a trained machine learning model. The app should allow users to input data, receive predictions, and understand model outputs through visualizations. This task will help you learn how to make your models accessible and interactive.

### We will use **College Student Placement Factors** dataset

### Importing necessary libraries and dataset

In [22]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [7]:
data=pd.read_csv("C:\\Users\\lavan\\OneDrive\\Desktop\\college_student_placement_dataset.csv")
data

Unnamed: 0,College_ID,IQ,Prev_Sem_Result,CGPA,Academic_Performance,Internship_Experience,Extra_Curricular_Score,Communication_Skills,Projects_Completed,Placement
0,CLG0030,107,6.61,6.28,8,No,8,8,4,No
1,CLG0061,97,5.52,5.37,8,No,7,8,0,No
2,CLG0036,109,5.36,5.83,9,No,3,1,1,No
3,CLG0055,122,5.47,5.75,6,Yes,1,6,1,No
4,CLG0004,96,7.91,7.69,7,No,8,10,2,No
...,...,...,...,...,...,...,...,...,...,...
9995,CLG0021,119,8.41,8.29,4,No,1,8,0,Yes
9996,CLG0098,70,9.25,9.34,7,No,0,7,2,No
9997,CLG0066,89,6.08,6.25,3,Yes,3,9,5,No
9998,CLG0045,107,8.77,8.92,3,No,7,5,1,No


In [9]:
data.shape

(10000, 10)

In [11]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 10 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   College_ID              10000 non-null  object 
 1   IQ                      10000 non-null  int64  
 2   Prev_Sem_Result         10000 non-null  float64
 3   CGPA                    10000 non-null  float64
 4   Academic_Performance    10000 non-null  int64  
 5   Internship_Experience   10000 non-null  object 
 6   Extra_Curricular_Score  10000 non-null  int64  
 7   Communication_Skills    10000 non-null  int64  
 8   Projects_Completed      10000 non-null  int64  
 9   Placement               10000 non-null  object 
dtypes: float64(2), int64(5), object(3)
memory usage: 781.4+ KB


### Checking for missing values

In [14]:
data.isnull().sum()

College_ID                0
IQ                        0
Prev_Sem_Result           0
CGPA                      0
Academic_Performance      0
Internship_Experience     0
Extra_Curricular_Score    0
Communication_Skills      0
Projects_Completed        0
Placement                 0
dtype: int64

In [16]:
# No missing values are there

In [18]:
data.drop('College_ID', axis=1, inplace=True)

In [20]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 9 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   IQ                      10000 non-null  int64  
 1   Prev_Sem_Result         10000 non-null  float64
 2   CGPA                    10000 non-null  float64
 3   Academic_Performance    10000 non-null  int64  
 4   Internship_Experience   10000 non-null  object 
 5   Extra_Curricular_Score  10000 non-null  int64  
 6   Communication_Skills    10000 non-null  int64  
 7   Projects_Completed      10000 non-null  int64  
 8   Placement               10000 non-null  object 
dtypes: float64(2), int64(5), object(2)
memory usage: 703.3+ KB


### Convert target column 'Placement' to numerical using Label Encoding where 1 means yes and 0 means no


In [27]:
le = LabelEncoder()
data['Placement'] = le.fit_transform(data['Placement'])

### One Hot Encoding

In [30]:
for col in data.select_dtypes(include='object').columns:
    dummies = pd.get_dummies(data[col], prefix=col, drop_first=True).astype(int)
    
    data = data.drop(col, axis=1)
    data = pd.concat([data, dummies], axis=1)

### Splitting into training and testing datasets

In [33]:
x = data.drop('Placement', axis=1)     # Features
y = data['Placement']                  # Target

In [35]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [37]:
x_train

Unnamed: 0,IQ,Prev_Sem_Result,CGPA,Academic_Performance,Extra_Curricular_Score,Communication_Skills,Projects_Completed,Internship_Experience_Yes
9254,103,7.30,6.96,10,6,6,1,0
1561,88,5.49,5.02,3,2,4,1,1
1670,96,6.86,6.99,4,9,5,4,0
6087,76,6.09,6.22,2,6,7,1,1
6669,110,8.19,8.06,2,1,1,2,1
...,...,...,...,...,...,...,...,...
5734,91,9.93,9.72,6,8,8,1,0
5191,99,7.74,7.36,9,7,10,0,0
5390,89,7.07,6.66,1,10,1,5,1
860,103,7.74,7.57,4,8,3,2,0


In [39]:
x_test

Unnamed: 0,IQ,Prev_Sem_Result,CGPA,Academic_Performance,Extra_Curricular_Score,Communication_Skills,Projects_Completed,Internship_Experience_Yes
6252,139,7.29,7.24,8,8,4,4,0
4684,84,8.28,8.49,8,3,3,5,1
1731,94,5.26,5.66,8,7,10,4,0
4742,100,6.22,6.29,6,8,6,3,1
4521,116,5.99,6.39,6,2,9,5,0
...,...,...,...,...,...,...,...,...
6412,72,7.53,7.40,3,6,8,3,0
8285,114,8.15,7.95,8,6,3,2,1
7853,108,8.85,8.81,3,6,9,1,1
1095,98,9.89,9.63,1,3,1,0,0


In [41]:
y_train

9254    0
1561    0
1670    0
6087    0
6669    0
       ..
5734    0
5191    0
5390    0
860     0
7270    0
Name: Placement, Length: 8000, dtype: int32

In [43]:
y_test

6252    0
4684    0
1731    0
4742    0
4521    1
       ..
6412    0
8285    0
7853    0
1095    0
6929    0
Name: Placement, Length: 2000, dtype: int32

### Feature Selection

In [46]:
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test) 

### Model selection
#### We choose Random Forest Classifier to predict accuracy

In [48]:
models = {
    "Random Forest": RandomForestClassifier()
}

for name, model in models.items():
    model.fit(x_train, y_train)                        
    y_pred = model.predict(x_test)                      
    acc = accuracy_score(y_test, y_pred)                
    print(f"{name} Accuracy: {acc:.4f}") 

Random Forest Accuracy: 0.9985


### Create a pickle file using serialization

In [58]:
pip install streamlit




In [60]:
import joblib

joblib.dump(models["Random Forest"], 'placement_model.pkl')
joblib.dump(sc, 'scaler.pkl')

['scaler.pkl']

In [62]:
import streamlit as st
import joblib
import numpy as np

# Load trained model and scaler
model = joblib.load('placement_model.pkl')
scaler = joblib.load('scaler.pkl')

st.title("Student Placement Prediction App")

st.markdown("Fill in the student details to predict if they will be placed.")

# Collect user inputs
iq = st.slider("IQ", 40, 160, 100)
prev_sem_result = st.slider("Previous Semester Result (out of 10)", 5.0, 10.0, 7.5)
cgpa = st.slider("CGPA", 6.0, 10.0, 7.5)
academic_perf = st.slider("Academic Performance (out of 10)", 1, 10, 5)
internship = st.selectbox("Internship Experience", ['Yes', 'No'])
extra_score = st.slider("Extra Curricular Score (out of 10)", 0, 10, 5)
comm_skills = st.slider("Communication Skills (out of 10)", 1, 10, 5)
projects = st.slider("Projects Completed", 0, 5, 2)

# Preprocess input
internship_encoded = 1 if internship == 'Yes' else 0

input_data = np.array([[iq, prev_sem_result, cgpa, academic_perf, extra_score, comm_skills, projects, internship_encoded]])
scaled_input = scaler.transform(input_data)

# Predict button
if st.button("Predict Placement"):
    prediction = model.predict(scaled_input)[0]
    result = "Placed" if prediction == 1 else "Not Placed"
    st.success(f"The student is likely to be: **{result}**")

# Optional: Show feature importance
if st.checkbox("Show Feature Importance"):
    import matplotlib.pyplot as plt

    features = ['IQ', 'Prev_Sem_Result', 'CGPA', 'Academic_Performance',
                'Extra_Curricular_Score', 'Communication_Skills',
                'Projects_Completed', 'Internship_Experience_Yes']

    importances = model.feature_importances_
    sorted_idx = np.argsort(importances)[::-1]

    st.subheader("Feature Importance")
    fig, ax = plt.subplots()
    ax.barh(np.array(features)[sorted_idx], importances[sorted_idx])
    ax.set_xlabel("Importance")
    ax.invert_yaxis()
    st.pyplot(fig)


2025-07-17 22:05:35.681 
  command:

    streamlit run C:\Users\lavan\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
