# Admission prediction using DecisionTreeClassifier

In [1]:
import numpy as np
import pandas as pd
from sklearn import tree
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier


In [2]:
dataFrame = pd.read_csv('./Admission_Predict.csv')


In [3]:
dataFrame.head(-1)

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.00,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.80
4,5,314,103,2,2.0,3.0,8.21,0,0.65
...,...,...,...,...,...,...,...,...,...
394,395,329,111,4,4.5,4.0,9.23,1,0.89
395,396,324,110,3,3.5,3.5,9.04,1,0.82
396,397,325,107,3,3.0,3.5,9.11,1,0.84
397,398,330,116,4,5.0,4.5,9.45,1,0.91


---
* Data preprocessing

In [4]:
dataFrame.isnull().sum()  # checking is there any null value


Serial No.           0
GRE Score            0
TOEFL Score          0
University Rating    0
SOP                  0
LOR                  0
CGPA                 0
Research             0
Chance of Admit      0
dtype: int64

In [5]:
dataFrame.dtypes  # checking datatypes of each column


Serial No.             int64
GRE Score              int64
TOEFL Score            int64
University Rating      int64
SOP                  float64
LOR                  float64
CGPA                 float64
Research               int64
Chance of Admit      float64
dtype: object

In [6]:
dataFrame.columns


Index(['Serial No.', 'GRE Score', 'TOEFL Score', 'University Rating', 'SOP',
       'LOR ', 'CGPA', 'Research', 'Chance of Admit '],
      dtype='object')

In [7]:
# converting Chance of Admit values from percentage to Binary based on some condition
dataFrame['Chance of Admit '] = np.where(
    dataFrame['Chance of Admit '] >= 0.8, 1, 0)

---
* Splitting data

In [8]:
x_train, x_test, y_train, y_test = train_test_split(dataFrame.drop(
    ['Chance of Admit ', 'Serial No.'], axis=1), dataFrame['Chance of Admit '], test_size=0.3, random_state=0, shuffle=True)  # shuffle makes the dataset unsorted if it is already sorted
x_train.head()


Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
92,298,98,2,4.0,3.0,8.03,0
223,308,109,2,3.0,4.0,8.45,0
234,330,113,5,5.0,4.0,9.31,1
232,312,107,2,2.5,3.5,8.27,0
377,290,100,1,1.5,2.0,7.56,0


---
* Using DecisionTree as classfying model

In [9]:
# make an instance of the Model
clf = DecisionTreeClassifier(max_depth=2, random_state=0)
clf.fit(x_train, y_train)  # train the model on the data


In [10]:
y_pred = clf.predict(x_test)


---
* Evaluating Model on Actual test dataset and predicted dataset

In [11]:
# Confusion Metrics
print("Confusion Matrix:\n", metrics.confusion_matrix(y_test, y_pred))


Confusion Matrix:
 [[79  7]
 [ 2 32]]


In [12]:
print("1. Accuracy Score:", metrics.accuracy_score(y_test, y_pred))
print("2. Precision Score:", metrics.precision_score(y_test, y_pred))
print("3. Recall Score:", metrics.recall_score(y_test, y_pred))
print("4. f1 Score:", metrics.f1_score(y_test, y_pred))


1. Accuracy Score: 0.925
2. Precision Score: 0.8205128205128205
3. Recall Score: 0.9411764705882353
4. f1 Score: 0.8767123287671232


---
* Testing trained model with user inputs

In [14]:
gre = input('Enter GRE score (<=330): ')
toefl = input('Enter TOEFL score (<=120): ')
university_rating = input('Enter University rating (1-5): ')
sop = input('Enter SOP rating (1-5): ')
lor = input('Enter LOR rating (1-5): ')
cgpa = input('Enter CGPA (<=10.0): ')
research = input('Have you done Research work (0/1) : ')
user_input = np.array(
    [[gre, toefl, university_rating, sop, lor, cgpa, research]])
user_df = pd.DataFrame(user_input, columns=[
                       'GRE Score', 'TOEFL Score', 'University Rating', 'SOP', 'LOR ', 'CGPA', 'Research'], index=[1])
user_answer = clf.predict(user_df)
if (user_answer[0]):
    print('Student have positive chances of getting admission!')
else:
    print('Student has less chance of getting admission!')


Student has less chance of getting admission!


---