## Linear regression with Tensorflow v2
#### Thirumurugan Ramar

In [1]:
import tensorflow as tf

In [2]:
import pandas as pd
import numpy as np

In [3]:
data = pd.read_csv("Admission_Predict_Ver1.1.csv")
data.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.0,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.8
4,5,314,103,2,2.0,3.0,8.21,0,0.65


In [4]:
continuous_features = data[ ['GRE Score','TOEFL Score','University Rating','SOP','LOR ','CGPA'] ].values/100
categorical_research_features = data[ [ 'Research' ] ].values

In [5]:
continuous_features 

array([[3.37  , 1.18  , 0.04  , 0.045 , 0.045 , 0.0965],
       [3.24  , 1.07  , 0.04  , 0.04  , 0.045 , 0.0887],
       [3.16  , 1.04  , 0.03  , 0.03  , 0.035 , 0.08  ],
       ...,
       [3.3   , 1.2   , 0.05  , 0.045 , 0.05  , 0.0956],
       [3.12  , 1.03  , 0.04  , 0.04  , 0.05  , 0.0843],
       [3.27  , 1.13  , 0.04  , 0.045 , 0.045 , 0.0904]])

In [6]:
X = np.concatenate( [ continuous_features , categorical_research_features ] , axis=1 )
Y = data[ [ 'Chance of Admit ' ] ].values

In [7]:
X

array([[3.37  , 1.18  , 0.04  , ..., 0.045 , 0.0965, 1.    ],
       [3.24  , 1.07  , 0.04  , ..., 0.045 , 0.0887, 1.    ],
       [3.16  , 1.04  , 0.03  , ..., 0.035 , 0.08  , 1.    ],
       ...,
       [3.3   , 1.2   , 0.05  , ..., 0.05  , 0.0956, 1.    ],
       [3.12  , 1.03  , 0.04  , ..., 0.05  , 0.0843, 0.    ],
       [3.27  , 1.13  , 0.04  , ..., 0.045 , 0.0904, 0.    ]])

In [8]:
from sklearn.model_selection import train_test_split
train_features , test_features ,train_labels, test_labels = train_test_split( X , Y , test_size=0.25 )

In [9]:
X = tf.constant( train_features , dtype=tf.float32 )
Y = tf.constant( train_labels , dtype=tf.float32 ) 

In [11]:
train_features

array([[3.    , 1.05  , 0.01  , ..., 0.02  , 0.078 , 0.    ],
       [3.29  , 1.14  , 0.02  , ..., 0.04  , 0.0856, 1.    ],
       [3.28  , 1.12  , 0.04  , ..., 0.045 , 0.091 , 1.    ],
       ...,
       [3.12  , 1.08  , 0.03  , ..., 0.03  , 0.0853, 0.    ],
       [3.26  , 1.12  , 0.03  , ..., 0.03  , 0.091 , 1.    ],
       [3.21  , 1.09  , 0.04  , ..., 0.04  , 0.0913, 1.    ]])

In [14]:
train_labels

array([[0.58],
       [0.72],
       [0.78],
       [0.77],
       [0.71],
       [0.87],
       [0.73],
       [0.68],
       [0.7 ],
       [0.68],
       [0.74],
       [0.67],
       [0.49],
       [0.45],
       [0.75],
       [0.73],
       [0.52],
       [0.74],
       [0.75],
       [0.92],
       [0.63],
       [0.74],
       [0.62],
       [0.65],
       [0.96],
       [0.92],
       [0.52],
       [0.93],
       [0.56],
       [0.67],
       [0.7 ],
       [0.81],
       [0.71],
       [0.84],
       [0.96],
       [0.78],
       [0.67],
       [0.86],
       [0.92],
       [0.82],
       [0.64],
       [0.95],
       [0.36],
       [0.75],
       [0.93],
       [0.54],
       [0.89],
       [0.44],
       [0.91],
       [0.56],
       [0.89],
       [0.97],
       [0.87],
       [0.93],
       [0.72],
       [0.86],
       [0.93],
       [0.68],
       [0.62],
       [0.64],
       [0.73],
       [0.73],
       [0.53],
       [0.82],
       [0.79],
       [0.77],
       [0.

In [15]:
test_X = tf.constant( test_features , dtype=tf.float32 ) 
test_Y = tf.constant( test_labels , dtype=tf.float32 )  

In [16]:
def mean_squared_error(Y,y_pred):
  return tf.reduce_mean(tf.square(y_pred-Y))

def mean_squared_error_deriv(Y,y_pred):
  return tf.reshape(tf.reduce_mean(2*(y_pred - Y)),[1,1])

def h(X,weights,bias): 
  return tf.tensordot(X,weights,axes=1)+bias

In [17]:
num_epochs = 10
num_samples = X.shape[0]
batch_size = 10
learning_rate = 0.001

In [18]:
dataset = tf.data.Dataset.from_tensor_slices(( X , Y )) 
dataset = dataset.shuffle( 500 ).repeat( num_epochs ).batch( batch_size )
iterator = dataset.__iter__()

In [19]:
num_features = X.shape[1]
weights = tf.random.normal( ( num_features , 1 ) ) 
bias = 0

epochs_plot = list()
loss_plot = list()

for i in range( num_epochs ) :
    
    epoch_loss = list()
    for b in range( int(num_samples/batch_size) ):
        x_batch , y_batch = iterator.get_next()
   
        output = h( x_batch , weights , bias ) 
        loss = epoch_loss.append( mean_squared_error( y_batch , output ).numpy() )
    
        dJ_dH = mean_squared_error_deriv( y_batch , output)
        dH_dW = x_batch
        dJ_dW = tf.reduce_mean( dJ_dH * dH_dW )
        dJ_dB = tf.reduce_mean( dJ_dH )
    
        weights -= ( learning_rate * dJ_dW )
        bias -= ( learning_rate * dJ_dB ) 
        
    loss = np.array( epoch_loss ).mean()
    epochs_plot.append( i + 1 )
    loss_plot.append( loss ) 
    
    print( 'Loss is {}'.format( loss ) )

Loss is 0.2624738812446594
Loss is 0.21441861987113953
Loss is 0.19177424907684326
Loss is 0.17816539108753204
Loss is 0.17500090599060059
Loss is 0.17521145939826965
Loss is 0.16814042627811432
Loss is 0.17377960681915283
Loss is 0.17008259892463684
Loss is 0.17330804467201233
