In [30]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow	import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import layers

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer
from sklearn.metrics import r2_score
from sklearn.compose import ColumnTransformer

In [3]:
data = pd.read_csv('admissions_data.csv')

In [4]:
data.describe()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
count,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0
mean,250.5,316.472,107.192,3.114,3.374,3.484,8.57644,0.56,0.72174
std,144.481833,11.295148,6.081868,1.143512,0.991004,0.92545,0.604813,0.496884,0.14114
min,1.0,290.0,92.0,1.0,1.0,1.0,6.8,0.0,0.34
25%,125.75,308.0,103.0,2.0,2.5,3.0,8.1275,0.0,0.63
50%,250.5,317.0,107.0,3.0,3.5,3.5,8.56,1.0,0.72
75%,375.25,325.0,112.0,4.0,4.0,4.0,9.04,1.0,0.82
max,500.0,340.0,120.0,5.0,5.0,5.0,9.92,1.0,0.97


In [5]:
data.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.0,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.8
4,5,314,103,2,2.0,3.0,8.21,0,0.65


In [9]:
features = data.iloc[:, 0:8]
labels = data.iloc[:, -1]

In [18]:
features.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
0,1,337,118,4,4.5,4.5,9.65,1
1,2,324,107,4,4.0,4.5,8.87,1
2,3,316,104,3,3.0,3.5,8.0,1
3,4,322,110,3,3.5,2.5,8.67,1
4,5,314,103,2,2.0,3.0,8.21,0


In [11]:
labels.head()

0    0.92
1    0.76
2    0.72
3    0.80
4    0.65
Name: Chance of Admit , dtype: float64

In [19]:
features = features.drop(columns=['Serial No.'])

In [20]:
features.shape

(500, 7)

In [21]:
features.head()

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
0,337,118,4,4.5,4.5,9.65,1
1,324,107,4,4.0,4.5,8.87,1
2,316,104,3,3.0,3.5,8.0,1
3,322,110,3,3.5,2.5,8.67,1
4,314,103,2,2.0,3.0,8.21,0


In [24]:
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, 
                                                                            test_size=0.33, random_state=42) 

In [25]:
features_train.shape

(335, 7)

In [74]:
ct = ColumnTransformer([('scale', StandardScaler() , ['GRE Score', 'TOEFL Score', 'University Rating', 'SOP', 'CGPA'])], remainder='passthrough')

In [75]:
features_train_scale = ct.fit_transform(features_train)

In [76]:
features_test_scale = ct.transform(features_test)

In [77]:
print(type(features_train_scale))

<class 'numpy.ndarray'>


In [78]:
features_train_scale = pd.DataFrame(features_train_scale, columns = features_train.columns)

In [79]:
features_test_scale = pd.DataFrame(features_test_scale, columns = features_test.columns)

In [80]:
features_train_scale

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
0,-0.525175,-0.706986,-0.082885,-1.385311,-0.835766,4.0,0.0
1,0.477036,0.276434,1.652517,1.123225,0.363045,3.5,0.0
2,-0.798505,-1.034792,-0.950586,-0.381897,-1.156574,4.0,0.0
3,0.203706,-0.706986,-0.082885,-0.883604,0.295507,4.0,1.0
4,1.297027,1.423757,1.652517,1.624933,1.342356,5.0,1.0
...,...,...,...,...,...,...,...
330,1.114806,0.604240,0.784816,1.123225,1.004663,4.5,1.0
331,-0.980725,-0.379179,-0.950586,-0.883604,-0.616265,3.0,1.0
332,-1.345165,-1.362599,-1.818287,-1.385311,-2.254077,2.0,0.0
333,-0.707395,-0.379179,-0.950586,-0.883604,-1.528037,4.0,0.0


In [81]:
features_test_scale

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
0,1.570357,1.423757,0.784816,0.621518,1.612511,3.5,1.0
1,-0.251845,0.112531,0.784816,1.123225,0.768277,4.0,1.0
2,-0.160734,-0.379179,-0.950586,-1.385311,-1.578691,2.5,0.0
3,-0.434065,0.276434,-0.082885,-0.381897,0.177314,3.0,0.0
4,0.841476,0.768144,-0.082885,0.119811,0.785162,3.0,1.0
...,...,...,...,...,...,...,...
160,-1.071835,0.112531,1.652517,-0.381897,-0.177264,3.0,0.0
161,0.659256,0.604240,-0.082885,-0.883604,0.363045,2.0,1.0
162,-0.069624,-0.706986,-0.950586,-1.385311,0.261737,4.5,0.0
163,-0.616285,-1.362599,-0.950586,-1.887019,-2.169654,2.0,0.0
