#### Importing Libraries

In [26]:
import pandas as pd
import numpy as np

In [27]:
data = pd.read_csv("../Dataset/Placement_Data_Full_Class.csv")

In [28]:
data.head()

Unnamed: 0,sl_no,gender,ssc_p,ssc_b,hsc_p,hsc_b,hsc_s,degree_p,degree_t,workex,etest_p,specialisation,mba_p,status,salary
0,1,M,67.0,Others,91.0,Others,Commerce,58.0,Sci&Tech,No,55.0,Mkt&HR,58.8,Placed,270000.0
1,2,M,79.33,Central,78.33,Others,Science,77.48,Sci&Tech,Yes,86.5,Mkt&Fin,66.28,Placed,200000.0
2,3,M,65.0,Central,68.0,Central,Arts,64.0,Comm&Mgmt,No,75.0,Mkt&Fin,57.8,Placed,250000.0
3,4,M,56.0,Central,52.0,Central,Science,52.0,Sci&Tech,No,66.0,Mkt&HR,59.43,Not Placed,
4,5,M,85.8,Central,73.6,Central,Commerce,73.3,Comm&Mgmt,No,96.8,Mkt&Fin,55.5,Placed,425000.0


In [29]:
data.drop(['ssc_b','hsc_b', 'hsc_s', 'degree_t', 'salary'], axis=1, inplace=True)

In [30]:
data.head()

Unnamed: 0,sl_no,gender,ssc_p,hsc_p,degree_p,workex,etest_p,specialisation,mba_p,status
0,1,M,67.0,91.0,58.0,No,55.0,Mkt&HR,58.8,Placed
1,2,M,79.33,78.33,77.48,Yes,86.5,Mkt&Fin,66.28,Placed
2,3,M,65.0,68.0,64.0,No,75.0,Mkt&Fin,57.8,Placed
3,4,M,56.0,52.0,52.0,No,66.0,Mkt&HR,59.43,Not Placed
4,5,M,85.8,73.6,73.3,No,96.8,Mkt&Fin,55.5,Placed


#### Encoding

In [31]:
data["gender"] = data.gender.map({"M":0,"F":1})
data["workex"] = data.workex.map({"No":0, "Yes":1})
data["status"] = data.status.map({"Not Placed":0, "Placed":1})
data["specialisation"] = data.specialisation.map({"Mkt&HR":0, "Mkt&Fin":1})

#### Balance Dataset

In [32]:
data['status'].value_counts()

status
1    148
0     67
Name: count, dtype: int64

In [33]:
from sklearn.utils import resample

# Separate the majority and minority classes
data_majority = data[data['status'] == 1]
data_minority = data[data['status'] == 0]

# Upsample minority class
data_minority_upsampled = resample(data_minority, 
                                   replace=True,     # sample with replacement
                                   n_samples=len(data_majority),    # to match majority class
                                   random_state=42) # reproducible results

# Combine majority class with upsampled minority class
balanced_data = pd.concat([data_majority, data_minority_upsampled])

In [34]:
balanced_data['status'].value_counts()

status
1    148
0    148
Name: count, dtype: int64

#### Train Test Split

In [35]:
# Seperating Features and Target
X = balanced_data.copy().drop('status', axis=1)
y = balanced_data['status']

In [36]:
#Train Test Split
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.3)
X_train.shape, Y_train.shape, X_test.shape,Y_test.shape

((207, 9), (207,), (89, 9), (89,))

### Feature Selection

In [37]:
# scale each features
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#### K-Nearest Neighbour

In [38]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors = 3)
knn.fit(X_train, Y_train)
knn_Y_pred = knn.predict(X_test)
knn_accuracy = knn.score(X_test, Y_test)
knn_accuracy

0.8876404494382022

In [45]:
import plotly.graph_objects as go
import numpy as np
from sklearn.metrics import confusion_matrix

# Assuming you already have Y_test and knn_Y_pred defined
# Replace this with your actual data

# Generate confusion matrix
conf_mat = confusion_matrix(Y_test, knn_Y_pred)

# Create 3D surface plot for confusion matrix
x = ['True Neg', 'False Pos']
y = ['False Neg', 'True Pos']

fig = go.Figure(data=[go.Surface(z=conf_mat)])

# Set axes labels
fig.update_layout(
    title='Confusion Matrix',
    scene = dict(
        xaxis_title='Predicted Label',
        yaxis_title='True Label',
        zaxis_title='Count',
    )
)

# Save plot as HTML file
fig.write_html("confusion_matrix.html")

print("Confusion matrix saved as '../images/iconfusion_matrix.png'.")
fig.show()


Confusion matrix saved as '../images/iconfusion_matrix.png'.


#### Decision Tree

In [40]:
# Decision Tree
from sklearn.tree import DecisionTreeClassifier

decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, Y_train)
decision_tree_Y_pred = decision_tree.predict(X_test)
decision_tree_accuracy = decision_tree.score(X_test, Y_test)
decision_tree_accuracy

0.8876404494382022

In [44]:
import plotly.graph_objects as go
import numpy as np
from sklearn.metrics import confusion_matrix

# Generate confusion matrix
conf_mat = confusion_matrix(Y_test, decision_tree_Y_pred)

# Create 3D surface plot for confusion matrix
x = ['True Neg', 'False Pos']
y = ['False Neg', 'True Pos']

fig = go.Figure(data=[go.Surface(z=conf_mat, x=x, y=y)])

# Set axes labels
fig.update_layout(
    title='Confusion Matrix',
    scene=dict(
        xaxis_title='Predicted Label',
        yaxis_title='True Label',
        zaxis_title='Count',
    )
)

# Save plot as HTML file
fig.write_html("confusion_matrix_3d.html")
fig.show()

print("Confusion matrix 3D surface plot saved as 'confusion_matrix_3d.html'.")

Confusion matrix 3D surface plot saved as 'confusion_matrix_3d.html'.


#### SVM

In [46]:
# Support Vector Machine
from sklearn.svm import SVC
svc = SVC()
svc.fit(X_train, Y_train)
svm_Y_pred = svc.predict(X_test)
svc_accuracy = svc.score(X_test, Y_test)
svc_accuracy

0.9438202247191011

In [49]:
import plotly.graph_objects as go
from sklearn.metrics import confusion_matrix
import numpy as np

# Generate confusion matrix
conf_mat = confusion_matrix(Y_test, svm_Y_pred)

# Define labels for x and y axes
x = ['True Neg', 'False Pos']
y = ['False Neg', 'True Pos']

# Create heatmap
fig = go.Figure(data=go.Heatmap(
                   z=conf_mat,
                   x=x,
                   y=y,
                   colorscale='Viridis'))

# Set title and labels
fig.update_layout(
    title='Confusion Matrix Heatmap',
    xaxis_title='Predicted Label',
    yaxis_title='True Label')

# Save plot as HTML file
fig.write_html("confusion_matrix_heatmap.html")
fig.show()

print("Confusion matrix heatmap saved as 'confusion_matrix_heatmap.html'.")

Confusion matrix heatmap saved as 'confusion_matrix_heatmap.html'.


#### Random Forest

In [51]:
from sklearn.ensemble import RandomForestClassifier

random_forest = RandomForestClassifier(n_estimators=1000)
random_forest.fit(X_train, Y_train)
random_forest_Y_pred = random_forest.predict(X_test)
random_forest.score(X_train, Y_train)
random_forest_accuracy = random_forest.score(X_test, Y_test)
random_forest_accuracy

0.9775280898876404

In [52]:
import plotly.graph_objects as go
from sklearn.metrics import confusion_matrix
import numpy as np

# Generate confusion matrix
conf_mat = confusion_matrix(Y_test, random_forest_Y_pred)

# Define labels for x and y axes
x = ['True Neg', 'False Pos']
y = ['False Neg', 'True Pos']

# Create heatmap
fig = go.Figure(data=go.Heatmap(
                   z=conf_mat,
                   x=x,
                   y=y,
                   colorscale='Viridis'))

# Set title and labels
fig.update_layout(
    title='Confusion Matrix Heatmap',
    xaxis_title='Predicted Label',
    yaxis_title='True Label')

# Save plot as HTML file
fig.write_html("confusion_matrix_heatmap3.html")
fig.show()

print("Confusion matrix heatmap saved as 'confusion_matrix_heatmap3.html'.")

Confusion matrix heatmap saved as 'confusion_matrix_heatmap3.html'.


#### Gaussian Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB
gaussian = GaussianNB()
gaussian.fit(X_train, Y_train)
gaussian_Y_pred = gaussian.predict(X_test)
gaussian_accuracy = gaussian.score(X_test, Y_test)
gaussian_accuracy

0.7865168539325843

#### Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
lg = LogisticRegression()
lg.fit(X_train, Y_train)
lg_Y_pred = lg.predict(X_test)
lg_accuracy = lg.score(X_test, Y_test)
lg_accuracy

0.8314606741573034

#### Input Predictions

In [None]:
print("Predicted Class :",random_forest.predict(X_train[10].reshape(1,-1))[0])
print("Actual Class:", Y_test.iloc[10])

Predicted Class : 1
Actual Class: 1


In [None]:
print("Predicted Class :",random_forest.predict(X_train[50].reshape(1,-1))[0])
print("Actual Class:", Y_test.iloc[50])

Predicted Class : 0
Actual Class: 1


#### Predictive System

In [None]:
def prediction(sl_no, gender, ssc_p, hsc_p, degree_p, workex, etest_p, specialisation, mba_p):
    data = {
    'sl_no': [sl_no],
    'gender': [gender],
    'ssc_p': [ssc_p],
    'hsc_p': [hsc_p],
    'degree_p': [degree_p],
    'workex': [workex],
    'etest_p': [etest_p],
    'specialisation': [specialisation],
    'mba_p': [mba_p]
    }
    data = pd.DataFrame(data)
    data['gender'] = data['gender'].map({'M':1,"F":0})
    data['workex'] = data['workex'].map({"Yes":1,"No":0})
    data['specialisation'] = data['specialisation'].map({"Mkt&HR":1,"Mkt&Fin":0})
    scaled_df = scaler.fit_transform(data)
    result = random_forest.predict(scaled_df).reshape(1, -1)
    return result[0]

In [None]:
sl_no =11
gender = "F"
ssc_p =58.
hsc_p = 61.
degree_p = 60.
workex = "Yes"
etest_p = 62.
specialisation = "Mkt&Fin"
mba_p = 60.85

result = prediction(sl_no, gender, ssc_p, hsc_p, degree_p, workex, etest_p, specialisation, mba_p)

if result == 1:
    print('place')

else:
    print('not place')

not place


#### Saving Models

In [None]:
import pickle
pickle.dump(random_forest,open('../Models/model.pkl','wb'))
pickle.dump(scaler,open("../Models/scaler.pkl",'wb'))