# Implementing Gradient Descent Example

In [1]:
import pandas as pd

### Importing dataset downloaded from https://stats.idre.ucla.edu/stat/data/binary.csv 

In [2]:
admis=pd.read_csv('binary.csv')

## Data Cleaning

### One-Hot Encoding the rank using pandas' get_dummies method into 4 new columns and then concating these columns with the original dataset  

In [3]:
pd_dum=pd.concat([admis,pd.get_dummies(admis['rank'],prefix='rank')],axis=1)

### We then drop the original rank table. Dummy Variables are the one that takes value either 0 or 1 indicating prescence or abscense of something. For example,categories 'dog' or 'not dog',the two are mutually exclusive. Hence it is used to sort data into mutual exclusive categories avoiding dependency between classes.

In [4]:
data=pd_dum.drop('rank',axis=1)

### After that we standarize the GRE and GPA, i.e. scaling the values between  0 or 1.

In [5]:
for field in ['gre', 'gpa']:
    mean,std=data[field].mean(),data[field].std()
    data.loc[:,field]=(data[field]-mean)/std


In [6]:
import numpy as np

### Then we split the dataset into training and test data with 80:20 ratio

In [7]:
sampleset=np.random.choice(data.index,int(len(data)*0.8),replace=False)

In [8]:
data_train,data_test=data.ix[sampleset],data.drop(sampleset)

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


### After that we define features and targets

In [9]:
features,targets=data_train.drop('admit',axis=1),data_train['admit']

In [10]:
features_test,targets_test=data_test.drop('admit',axis=1),data_test['admit']

## Implementing gradient descent and training the network on  data

## Algorithm to update weights
### - Set the weight step to 0.
### - For each record in training data:
        -Do Forward Pass,i.e. calculating the output formula
        -Calculating error term,i.e. = (target-output)*derivative of output function
        -Update the weight step += error_term*x # x at i th step
### - After that we update the weights w at i += alpha  *   weight_step at i  * x at i  / m
      #alpha is the learning rate and m is the number of records
### - Repeat above steps for e epochs.

In [11]:
# Activation function
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [12]:
np.random.seed(28)
n_records,n_features=features.shape
last_loss=None

In [13]:
weights=np.random.normal(scale=1/n_features**0.5,size=n_features)

In [14]:
epochs=2000
alpha=0.4

In [15]:
for e in range(epochs):
    del_weights=np.zeros(weights.shape)
    for x,y in zip(features.values,targets):
        h=np.dot(x,weights)
        output=sigmoid(h)
        error=y-output
        error_term=error*output*(1-output) #derivative of sigmoid function is sigmoidfunction*(1-sigmoidfunction)
        del_weights+=error_term*x
    weights += alpha*del_weights/n_records

    #After every 200 epoch printing meaning squared error and also displaying if the loss is increasing or decreasing
    if e%(epochs/10)==0:
        outz=sigmoid(np.dot(features,weights))
        errorz=np.mean((targets-outz)**2)
        if last_loss and last_loss<errorz:
            print('Loss Increasing! Loss=',errorz)
        else:
            print('Loss=',errorz)
        last_loss=errorz
        

Loss= 0.2668565052671138
Loss= 0.20730181092979488
Loss= 0.19998882485260605
Loss= 0.19838150365758972
Loss= 0.19788959908604448
Loss= 0.19770697756852962
Loss= 0.19763046384076208
Loss= 0.19759575420898717
Loss= 0.1975791442932227
Loss= 0.1975709018292245


### Since now we got our weights after training. Let's now check the accuracy on the test data we splited earlier.
### Student Admitted = 1 and Student Not Admited = 0, hence probabilities closer to 1 predicts student admitted.
### So all the probabilities > 0.5 convey student being admitted
### So we make all the probabilities > 0.5 as 1 while the others 0 and save it as our predictions. And then check the accuracy.

In [16]:
test_output=sigmoid(np.dot(features_test,weights))

predictions=test_output>0.5

accuracy=np.mean(predictions==targets_test)

In [17]:
print('Accuracy:',accuracy)

Accuracy: 0.7875
