In [71]:
import pandas as pd
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier  
from sklearn.metrics import confusion_matrix

In [72]:
model = DecisionTreeClassifier() 

In [73]:
# reading data from file
df = pd.read_csv(r"data\Placement_Data.csv")
df = df.drop(["sl_no", "salary"], axis=1)

# converting categorical output data to numerical
df["status"].replace(["Placed", "Not Placed"],[1, 0], inplace=True)

df = df.dropna()

df.head()

Unnamed: 0,gender,ssc_p,ssc_b,hsc_p,hsc_b,hsc_s,degree_p,degree_t,workex,etest_p,specialisation,mba_p,status
0,M,67.0,Others,91.0,Others,Commerce,58.0,Sci&Tech,No,55.0,Mkt&HR,58.8,1
1,M,79.33,Central,78.33,Others,Science,77.48,Sci&Tech,Yes,86.5,Mkt&Fin,66.28,1
2,M,65.0,Central,68.0,Central,Arts,64.0,Comm&Mgmt,No,75.0,Mkt&Fin,57.8,1
3,M,56.0,Central,52.0,Central,Science,52.0,Sci&Tech,No,66.0,Mkt&HR,59.43,0
4,M,85.8,Central,73.6,Central,Commerce,73.3,Comm&Mgmt,No,96.8,Mkt&Fin,55.5,1


In [74]:
# setting input and output variables
x = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [75]:
x.head()

Unnamed: 0,gender,ssc_p,ssc_b,hsc_p,hsc_b,hsc_s,degree_p,degree_t,workex,etest_p,specialisation,mba_p
0,M,67.0,Others,91.0,Others,Commerce,58.0,Sci&Tech,No,55.0,Mkt&HR,58.8
1,M,79.33,Central,78.33,Others,Science,77.48,Sci&Tech,Yes,86.5,Mkt&Fin,66.28
2,M,65.0,Central,68.0,Central,Arts,64.0,Comm&Mgmt,No,75.0,Mkt&Fin,57.8
3,M,56.0,Central,52.0,Central,Science,52.0,Sci&Tech,No,66.0,Mkt&HR,59.43
4,M,85.8,Central,73.6,Central,Commerce,73.3,Comm&Mgmt,No,96.8,Mkt&Fin,55.5


In [76]:
y.head()

0    1
1    1
2    1
3    0
4    1
Name: status, dtype: int64

In [77]:
# encoding categorical feature data
gender_encoder = LabelEncoder()
ssc_b_encoder = LabelEncoder()
hsc_b_encoder = LabelEncoder()
hsc_s_encoder = LabelEncoder()
degree_t_encoder = LabelEncoder()
workex_encoder = LabelEncoder()
specialization_encoder = LabelEncoder()

x["sex"] = gender_encoder.fit_transform(x["gender"])
x["ssc_board"] = ssc_b_encoder.fit_transform(x["ssc_b"])
x["hsc_board"] = hsc_b_encoder.fit_transform(x["hsc_b"])
x["hsc_stream"] = hsc_s_encoder.fit_transform(x["hsc_s"])
x["degree_stream"] = degree_t_encoder.fit_transform(x["degree_t"])
x["work_experience"] = workex_encoder.fit_transform(x["workex"])
x["specialization"] = specialization_encoder.fit_transform(x["specialisation"])

x = x.drop(["gender", "ssc_b", "hsc_b", "hsc_s", "degree_t", "workex", "specialisation"], axis=1)
x.head()

Unnamed: 0,ssc_p,hsc_p,degree_p,etest_p,mba_p,sex,ssc_board,hsc_board,hsc_stream,degree_stream,work_experience,specialization
0,67.0,91.0,58.0,55.0,58.8,1,1,1,1,2,0,1
1,79.33,78.33,77.48,86.5,66.28,1,0,1,2,2,1,0
2,65.0,68.0,64.0,75.0,57.8,1,0,0,0,0,0,0
3,56.0,52.0,52.0,66.0,59.43,1,0,0,2,2,0,1
4,85.8,73.6,73.3,96.8,55.5,1,0,0,1,0,0,0


In [78]:
# feature scaling
scaler = StandardScaler()
x = scaler.fit_transform(x)
x[0]

array([-0.02808697,  2.2688123 , -1.14010225, -1.29109087, -0.59764672,
        0.73943397,  1.08245885,  0.80076299, -0.64195452,  1.57628354,
       -0.72444647,  1.12390297])

In [79]:
# splitting data into training and testing sets
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.10)

In [80]:
model.fit(x_train, y_train)

In [81]:
model.score(x_test, y_test)

0.8636363636363636

In [82]:
predictions = model.predict(x_test)

In [83]:
# creating confusion matrix
cm = confusion_matrix(y_test, predictions)
cm

array([[ 4,  1],
       [ 2, 15]], dtype=int64)

In [84]:
# performance, result evaluation
correct = 0
incorrect = 0
total = 0

for actual, predicted in zip(y_test, predictions):
    total += 1
    if actual == predicted:
        correct += 1
    else:
        incorrect += 1
        
print(f"Model type: {type(model).__name__}")
print(f"Correct predictions: {correct}")
print(f"Incorrect predictions: {incorrect}")
print(f"Model accuracy: {(correct/total)*100:.2f}%")

Model type: DecisionTreeClassifier
Correct predictions: 19
Incorrect predictions: 3
Model accuracy: 86.36%
