In [1]:
import pandas as pd
import numpy as np

In [2]:
diabetes_df = pd.read_csv('../SupervisedML_13/diabetes.csv')
diabetes_df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


### Preprocessing

In [3]:
# checking nulls in our dataset
diabetes_df.isnull().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

#### Why using .describe()?

   * Are there any extreme values? (Outliers)
   * Does any features has 0 values?
   * Any features contains negative values?
    
* We would be able to answer those questions by using .describe() method for our entire dataframe.

In [4]:
diabetes_df.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


* It seems like some features contains 0. There is a possibility for preganancy to be 0. But its not possible for other features(Glucose, BloodPressure, SkinThickness, Insulin, BMI). It might be, already our dataset has replaced Nans with 0s or kind of mistyping error.

In [5]:
# I am writing a function to check 0s in the columns. Keeping Columns which contains 0s in a list. 
zero_features = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']

def check_zeros(df,features):
    for i in features:
        print('%s : No. of 0s: %d' %(i,len(df.loc[df[i]==0,i])))
        
# calling zero_features function
check_zeros(diabetes_df,zero_features)

Glucose : No. of 0s: 5
BloodPressure : No. of 0s: 35
SkinThickness : No. of 0s: 227
Insulin : No. of 0s: 374
BMI : No. of 0s: 11


In [6]:
# Skin Thickness and Insulin columns contains more 0s. Using function We can impute these 0s with their average values.
# If we take mean for entire column, it will calculate including all 0s. So I am calculating only average of non zero
# values.

def impute_zeros(df, features):
    nonzero_vals = df.loc[df[features] != 0, features]
    avg = np.sum(nonzero_vals) / len(nonzero_vals)
    k = len(df.loc[ df[features] == 0, features])   # num of 0-entries
    df.loc[ df[features] == 0, features] = avg   # avg of non 0 values
    print('%s; Replaced %d entries with value: %.3f' % (features, k, avg))
    
for i in zero_features:
    impute_zeros(diabetes_df, i)


Glucose; Replaced 5 entries with value: 121.687
BloodPressure; Replaced 35 entries with value: 72.405
SkinThickness; Replaced 227 entries with value: 29.153
Insulin; Replaced 374 entries with value: 155.548
BMI; Replaced 11 entries with value: 32.457


In [7]:
# Let me check again, whether we replaced all 0s are not
diabetes_df.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,121.686763,72.405184,29.15342,155.548223,32.457464,0.471876,33.240885,0.348958
std,3.369578,30.435949,12.096346,8.790942,85.021108,6.875151,0.331329,11.760232,0.476951
min,0.0,44.0,24.0,7.0,14.0,18.2,0.078,21.0,0.0
25%,1.0,99.75,64.0,25.0,121.5,27.5,0.24375,24.0,0.0
50%,3.0,117.0,72.202592,29.15342,155.548223,32.4,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,155.548223,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [None]:
# Other than pregnancies, all our zeros are replaced. 

In [8]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

X = diabetes_df.iloc[:,:-1].values
y = diabetes_df.iloc[:,-1].values

# Split into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=6, stratify=y)

# Standardize
sc= StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

### Using ANN with PyTorch Frame work

In [9]:
# Libraries from pytorch

import torch
import torch.nn as nn  # helps you to create and train neural networks
import torch.nn.functional as F # F contains activation functions(sigmoid, relu, tanh, softmax, leaky relu etc..)

In [20]:
# creating tensors
# All input features need to be converted as floating tensors

X_train_tensor = torch.FloatTensor(X_train) 
X_test_tensor = torch.FloatTensor(X_test)

# No need to convert to float tensors in case of output feature
y_train_tensor = torch.LongTensor(y_train)
y_test_tensor = torch.LongTensor(y_test)

# artificial neural network class
class ANN_Model(nn.Module):
    def __init__(self,input_nodes = 8, hidden1 = 20, hidden2 = 20, output_nodes = 2):
        super().__init__()
        self.hidden1_connection = nn.Linear(input_nodes, hidden1) # first hidden layer
        self.hidden2_connection = nn.Linear(hidden1, hidden2) # second hidden layer
        self.output_layer = nn.Linear(hidden2, output_nodes) # output layer
    def forward(self,x):
        # applying activation function to hidden layers
        x = F.relu(self.hidden1_connection(x)) # F is torch.nn.functional
        x = F.relu(self.hidden2_connection(x))
        x = self.output_layer(x)
        return x
    
# for results reproducibility setting random seed
torch.manual_seed(6)

# creating an object for class ANN_Model
model = ANN_Model()

# creating an object for loss function 
cross_loss = nn.CrossEntropyLoss() # diabetes dataset is binary classification problemm so we can use cross entropyloss

# optimizer - is an algorithm that modifies attributes of nn such as weights and learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=0.01) # model.parameters() is a generator, so we can iterate and retirve all the parameter one by one

# run model through multiple epochs(iterations)
loss_list = []
n_epochs = 500
for i in range(n_epochs):
    y_pred = model.forward(X_train_tensor) # predicting y using our ann model
    loss = cross_loss(y_pred,y_train_tensor) # calculating the deviation between prediction and actual values
    loss_list.append(loss) # appending losses in the list, forward propagation ends here
    
    # after every 10 epochs printing loss
    if i % 15 == 1:
        print('Epoch number {} and Loss {}' .format(i,loss))
    
    optimizer.zero_grad() # clear the gradient before running backward propagation
    loss.backward() # backward propagation to find the derivative
    optimizer.step()  # perform one optimization step each epoch
    

Epoch number 1 and Loss 0.6573691368103027
Epoch number 16 and Loss 0.46483901143074036
Epoch number 31 and Loss 0.41151270270347595
Epoch number 46 and Loss 0.3737669587135315
Epoch number 61 and Loss 0.3302250802516937
Epoch number 76 and Loss 0.2801561951637268
Epoch number 91 and Loss 0.237721249461174
Epoch number 106 and Loss 0.19606539607048035
Epoch number 121 and Loss 0.1606629639863968
Epoch number 136 and Loss 0.1325983852148056
Epoch number 151 and Loss 0.10911095142364502
Epoch number 166 and Loss 0.08828233927488327
Epoch number 181 and Loss 0.07125198841094971
Epoch number 196 and Loss 0.05782382935285568
Epoch number 211 and Loss 0.047068022191524506
Epoch number 226 and Loss 0.03843929246068001
Epoch number 241 and Loss 0.03125106543302536
Epoch number 256 and Loss 0.025269214063882828
Epoch number 271 and Loss 0.02060263603925705
Epoch number 286 and Loss 0.016625309363007545
Epoch number 301 and Loss 0.013572918251156807
Epoch number 316 and Loss 0.011256532743573189

In [15]:
# Now that we have trained the network, we are going to use it in the testing set to make predictions.

prediction = []
with torch.no_grad(): # decreases memory consumption 
    for i, data in enumerate(X_test_tensor):
        pred = model(data)
        prediction.append(pred.argmax()) # returns index with max element in each prediction set
from sklearn.metrics import classification_report,precision_score, recall_score
print(classification_report(y_test_tensor,prediction))

              precision    recall  f1-score   support

           0       0.77      0.78      0.77       150
           1       0.58      0.57      0.58        81

    accuracy                           0.71       231
   macro avg       0.68      0.67      0.67       231
weighted avg       0.70      0.71      0.70       231



In [None]:
# I have used Adam optimizer and checked my precision and recall. Now I am going to try other optimizers in Pytorch.

## 1. Look up the Adam optimization functions in PyTorch. How does it work? Try at least one other optimization function with the diabetes dataset shown in class. How does the model perform with the new optimizer? Did it perform better or worse than Adam? Why do you think that is?

### How does Adam optimization work?

* The most important function of the optimizer is to update the weights of the learning algorithm to reach the least cost function. 

* ADAM (Adaptive Moment Estimation) algorithm works computing adaptive learning rates for each parameter at every iteration. It uses a combination of Gradient Descent with Momentum and RMSprop to determine the parameter values.

* Adam has been used most widely in Deep Learning models.

In [21]:
optimizer_list = [torch.optim.Adagrad, torch.optim.Adam,torch.optim.SGD]

for x in optimizer_list:
    optimizer = x(model.parameters(), lr=0.02) 
    loss_list = []
    n_epochs = 500
    for i in range(n_epochs):
        y_pred = model.forward(X_train_tensor) 
        loss = cross_loss(y_pred,y_train_tensor) 
        loss_list.append(loss) 
        if i % 15 == 1:
            print('Epoch number {} and Loss {}' .format(i,loss))
        optimizer.zero_grad() 
        loss.backward() 
        optimizer.step()  
        prediction = []
    with torch.no_grad(): # decreases memory consumption 
        for i, data in enumerate(X_test_tensor):
            pred = model(data)
            prediction.append(pred.argmax()) # returns index with max element in each prediction set
        print('Precision Score {} for {}'.format(precision_score(y_test_tensor,prediction).round(2),x))
        print('Recall Score {} for {}'.format(recall_score(y_test_tensor,prediction).round(2),x))

Epoch number 1 and Loss 0.24676987528800964
Epoch number 16 and Loss 0.005101545248180628
Epoch number 31 and Loss 0.004184851888567209
Epoch number 46 and Loss 0.0037477882578969
Epoch number 61 and Loss 0.0034690566826611757
Epoch number 76 and Loss 0.0032693410757929087
Epoch number 91 and Loss 0.003121255896985531
Epoch number 106 and Loss 0.0030049977358430624
Epoch number 121 and Loss 0.0029156634118407965
Epoch number 136 and Loss 0.0028457792941480875
Epoch number 151 and Loss 0.0027880542911589146
Epoch number 166 and Loss 0.002737445756793022
Epoch number 181 and Loss 0.0026917429640889168
Epoch number 196 and Loss 0.002650950103998184
Epoch number 211 and Loss 0.002614485565572977
Epoch number 226 and Loss 0.0025809193029999733
Epoch number 241 and Loss 0.002551501151174307
Epoch number 256 and Loss 0.002523834817111492
Epoch number 271 and Loss 0.0024979733861982822
Epoch number 286 and Loss 0.0024755410850048065
Epoch number 301 and Loss 0.0024549688678234816
Epoch number 

#### Comparing performance


|    Optimizer             |    precision score       |     Recall score     |
|:------------------------:|:------------------------:|:--------------------:|
|       Adam               |           0.58           |       0.57           | 
|       SGD                |           0.6            |       0.57           |  
|       Adagrad            |           0.61           |       0.59           |   



* I have used SGD and Adagrad optimizers to check the performance of our neural network. I got an improved precision and recall for **Adagrad optimizer**. Adagrad is an algorithm for gradient descent optimization where each parameter has its own learning rate. So that might be the reason why it gave improved performance.

## 2. Write a function that lists and counts the number of divisors for an input value.
Example 1:
Input: 5
Output: “There are 2 divisors: 1 and 5”

Example 2:
Input: 40
Output: “There are 8 divisors: 1, 2, 4, 5, 8, 10, 20, and 40"

In [32]:
def divisor():
    try:
        n = int(input("Input: "))
        divisors =""
        i=1
        count = 0
        while i <=n:
            if n%i == 0:
                count +=1
                if divisors=="":
                    divisors=str(i)
                else:
                    divisors=divisors +","+ str(i)
            i+=1 # I calculated all divisors here. I extended my code to print output in a desired way
            
        last_index=divisors.rfind(",") # rfind will give the index of last occurence
        New_string=divisors[:last_index]+" and "+divisors[last_index+1:] # slicling the last index to add "and"
        print("Output: There are",count,"divisors:",New_string)
    except Exception as e:
        return e

In [33]:
divisor()

Input: 5
Output: There are 2 divisors: 1 and 5


In [34]:
divisor()

Input: 40
Output: There are 8 divisors: 1,2,4,5,8,10,20 and 40


In [37]:
divisor()

Input: 989
Output: There are 4 divisors: 1,23,43 and 989
