In [49]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow	import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer
from sklearn.metrics import r2_score
from sklearn.compose import ColumnTransformer

In [50]:
data = pd.read_csv('admissions_data.csv')

In [51]:
data.describe()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
count,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0
mean,250.5,316.472,107.192,3.114,3.374,3.484,8.57644,0.56,0.72174
std,144.481833,11.295148,6.081868,1.143512,0.991004,0.92545,0.604813,0.496884,0.14114
min,1.0,290.0,92.0,1.0,1.0,1.0,6.8,0.0,0.34
25%,125.75,308.0,103.0,2.0,2.5,3.0,8.1275,0.0,0.63
50%,250.5,317.0,107.0,3.0,3.5,3.5,8.56,1.0,0.72
75%,375.25,325.0,112.0,4.0,4.0,4.0,9.04,1.0,0.82
max,500.0,340.0,120.0,5.0,5.0,5.0,9.92,1.0,0.97


In [52]:
data.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.0,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.8
4,5,314,103,2,2.0,3.0,8.21,0,0.65


In [53]:
features = data.iloc[:, 0:8]
labels = data.iloc[:, -1]

In [54]:
features.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
0,1,337,118,4,4.5,4.5,9.65,1
1,2,324,107,4,4.0,4.5,8.87,1
2,3,316,104,3,3.0,3.5,8.0,1
3,4,322,110,3,3.5,2.5,8.67,1
4,5,314,103,2,2.0,3.0,8.21,0


In [55]:
labels.head()

0    0.92
1    0.76
2    0.72
3    0.80
4    0.65
Name: Chance of Admit , dtype: float64

In [56]:
features = features.drop(columns=['Serial No.'])

In [57]:
features.shape

(500, 7)

In [58]:
features.head()

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
0,337,118,4,4.5,4.5,9.65,1
1,324,107,4,4.0,4.5,8.87,1
2,316,104,3,3.0,3.5,8.0,1
3,322,110,3,3.5,2.5,8.67,1
4,314,103,2,2.0,3.0,8.21,0


In [59]:
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, 
                                                                            test_size=0.33, random_state=42) 

In [60]:
features_train

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
471,311,103,3,2.0,4.0,8.09,0
26,322,109,5,4.5,3.5,8.80,0
7,308,101,2,3.0,4.0,7.90,0
453,319,103,3,2.5,4.0,8.76,1
108,331,116,5,5.0,5.0,9.38,1
...,...,...,...,...,...,...,...
106,329,111,4,4.5,4.5,9.18,1
270,306,105,2,2.5,3.0,8.22,1
348,302,99,1,2.0,2.0,7.25,0
435,309,105,2,2.5,4.0,7.68,0


In [61]:
ct = ColumnTransformer([('scale', StandardScaler() , ['GRE Score', 'TOEFL Score'])], remainder='passthrough')

In [62]:
features_train_scale = ct.fit_transform(features_train)

In [63]:
features_test_scale = ct.transform(features_test)

In [65]:
print(features_train.columns)
print(type(features_train_scale))

Index(['GRE Score', 'TOEFL Score', 'University Rating', 'SOP', 'LOR ', 'CGPA',
       'Research'],
      dtype='object')
<class 'numpy.ndarray'>


In [66]:
features_train = pd.DataFrame(features_train_scale, columns = features_train.columns)

In [67]:
features_test = pd.DataFrame(features_test_scale, columns = features_test.columns)

In [68]:
features_train

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
0,-0.525175,-0.706986,3.0,2.0,4.0,8.09,0.0
1,0.477036,0.276434,5.0,4.5,3.5,8.80,0.0
2,-0.798505,-1.034792,2.0,3.0,4.0,7.90,0.0
3,0.203706,-0.706986,3.0,2.5,4.0,8.76,1.0
4,1.297027,1.423757,5.0,5.0,5.0,9.38,1.0
...,...,...,...,...,...,...,...
330,1.114806,0.604240,4.0,4.5,4.5,9.18,1.0
331,-0.980725,-0.379179,2.0,2.5,3.0,8.22,1.0
332,-1.345165,-1.362599,1.0,2.0,2.0,7.25,0.0
333,-0.707395,-0.379179,2.0,2.5,4.0,7.68,0.0


In [69]:
features_test

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
0,1.570357,1.423757,4.0,4.0,3.5,9.54,1.0
1,-0.251845,0.112531,4.0,4.5,4.0,9.04,1.0
2,-0.160734,-0.379179,2.0,2.0,2.5,7.65,0.0
3,-0.434065,0.276434,3.0,3.0,3.0,8.69,0.0
4,0.841476,0.768144,3.0,3.5,3.0,9.05,1.0
...,...,...,...,...,...,...,...
160,-1.071835,0.112531,5.0,3.0,3.0,8.48,0.0
161,0.659256,0.604240,3.0,2.5,2.0,8.80,1.0
162,-0.069624,-0.706986,2.0,2.0,4.5,8.74,0.0
163,-0.616285,-1.362599,2.0,1.5,2.0,7.30,0.0


In [94]:
def model_design(features):
    model = Sequential(name="regression_model")
    num_features = features.shape[1]
    input = layers.InputLayer(input_shape=(num_features,))
    model.add(input)
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(1)) 
    opt = Adam(learning_rate=0.01)
    model.compile(loss='mse', metrics=['mae'], optimizer=opt)
    return model

In [95]:
model = model_design(features_train)
print(model.summary())

Model: "regression_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_10 (Dense)            (None, 128)               1024      
                                                                 
 dense_11 (Dense)            (None, 1)                 129       
                                                                 
Total params: 1,153
Trainable params: 1,153
Non-trainable params: 0
_________________________________________________________________
None


In [96]:
model.fit(features_train, labels_train, epochs=40, batch_size=1, verbose=1)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x7faa77c346d0>

In [97]:
val_mse, val_mae = model.evaluate(features_test, labels_test, verbose=0)

In [98]:
print(f'Validation MSE: {val_mse}, Validation MAE: {val_mae}')

Validation MSE: 0.0073916371911764145, Validation MAE: 0.0721481516957283


Hyperparameter Adjustment:

In [99]:
model.fit(features_train, labels_train, epochs=5, batch_size=20, verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7faa77c59dd0>

In [100]:
print(f'Validation MSE: {val_mse}, Validation MAE: {val_mae}')

Validation MSE: 0.0073916371911764145, Validation MAE: 0.0721481516957283
