# College Graduate Admission using ANN
The goal of this project is to predict Graduate Admissions from an Indian perspective.

### Loading the dataset

In [2]:
# Lets load the dataset
import pandas as pd
df = pd.read_csv("../input/graduate-admissions/Admission_Predict.csv")

In [3]:
# Lets see the first five rows
df.head()

### Data Preprocessing

In [4]:
# Lets see the shape of the dataset
df.shape

In [5]:
# Lets check for null values
df.isnull().sum()

There are no null values.

In [6]:
# Lets drop the "Serial No." as it is not that useful
df.drop("Serial No.", axis = 1, inplace = True)

In [7]:
# Lets see the first five rows
df.head()

In [8]:
# Independent and Dependent features
X = df.iloc[:,0:-1].values
y = df.iloc[:,-1].values

In [9]:
# Feature scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)

In [10]:
# Train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 0)

### Model Building

In [11]:
# Lets import tensorflow==2.5.0
!pip install tensorflow==2.5.0

In [12]:
# Lets see the tensorflow version
import tensorflow
tensorflow.__version__

In [13]:
# Importing the dependencies
from tensorflow.keras.layers import Dense
from tensorflow.keras import Sequential

In [14]:
# Lets create an object of the Sequential class
model = Sequential()

In [15]:
# Model Building
model.add(Dense(3, activation = "relu", input_dim = X_train.shape[1]))
model.add(Dense(1, activation = "linear"))

In [16]:
# Lets see the model summary
model.summary()

In [17]:
# Model compile
model.compile(optimizer = "Adam", loss = "mean_squared_error")

In [18]:
# Model Training
model.fit(X_train, y_train, epochs = 10, batch_size = 10, verbose = 1)

### Prediction

In [19]:
# Lets predict on the test data
y_pred = model.predict(X_test)

In [20]:
# R2 score
from sklearn.metrics import r2_score
r2_score(y_pred, y_test)

The model performance is very poor, giving a negative value of R2. Now, lets try to improve the model. We can improve the model performance by:
1. Increasing the number of epochs.
2. Increasing the nodes in hidden layers.
3. Adding more layers.

In [21]:
# Lets try with increasing the number of epochs and see the effects on R2 score
model.add(Dense(3, activation = "relu", input_dim = X_train.shape[1]))
model.add(Dense(1, activation = "linear"))

# Model compile
model.compile(optimizer = "Adam", loss = "mean_squared_error")

# Model Training
model.fit(X_train, y_train, epochs = 100, batch_size = 10, verbose = 1)

In [22]:
# Lets predict on the test data
y_pred = model.predict(X_test)

In [23]:
# R2 score
from sklearn.metrics import r2_score
r2_score(y_pred, y_test)

The model performance increased. R2 score increased from -0.28 to 0.61

In [24]:
# Lets try with increasing the number of epochs, adding another layer and increasing neurons and see the effects on R2 score
model.add(Dense(7, activation = "relu", input_dim = X_train.shape[1]))
model.add(Dense(7, activation = "relu"))
model.add(Dense(1, activation = "linear"))

# Model compile
model.compile(optimizer = "Adam", loss = "mean_squared_error")

# Model Training
model.fit(X_train, y_train, epochs = 100, batch_size = 10, verbose = 1)

In [25]:
# Lets predict on the test data
y_pred = model.predict(X_test)

In [26]:
# R2 score
from sklearn.metrics import r2_score
r2_score(y_pred, y_test)

Model performance increased even further from R2 score of 0.61 to 0.66

In [27]:
# Introducing validation split inside our model
model.add(Dense(7, activation = "relu", input_dim = X_train.shape[1]))
model.add(Dense(7, activation = "relu"))
model.add(Dense(1, activation = "linear"))

# Model compile
model.compile(optimizer = "Adam", loss = "mean_squared_error")

# Model Training
history = model.fit(X_train, y_train, epochs = 100, batch_size = 10, verbose = 1, validation_split = 0.2)

In [30]:
# Lets predict on the test data
y_pred = model.predict(X_test)

In [31]:
# R2 score
from sklearn.metrics import r2_score
r2_score(y_pred, y_test)

Model performs even better, R2 incresed from 0.66 to 0.68

In [32]:
# Lets plot the training and the validation loss
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"]);