In [None]:
from google.colab import files
file = files.upload()

In [None]:
import pandas as pd
df = pd.read_csv('Pima_Diabetes_Dataset.csv')
df.head()

In [None]:
df.isnull().sum()

In [None]:
import seaborn as sns
import numpy as np

In [None]:
#As seaborn can not understand our dependent feature values which are numirical, we are converting that in to categorical
#YOU CAN SKIP THIS PROCESS IF YOU DONT WANT TO SEE RELATION BETWEEN DEPEDENT(X) and INDEPENDENT(Y) FEATURE VALUES <--------------------------------
df['Outcome'] = np.where(df["Outcome"] == 1, "Diabetic", "No Diabatic")
df.head()

In [None]:
sns.pairplot(df, hue="Outcome") #hue --> holds dependent feature values

In [None]:
#As we cant work on CAT values of  "Outcome" we are reading dataset again
df = pd.read_csv('Pima_Diabetes_Dataset.csv')
df.head()

In [None]:
X = df.drop("Outcome", axis = 1).values
y = df["Outcome"].values

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.8, random_state = 0)

In [None]:
### Libraries from Pytorch

import torch
import torch.nn as nn #Helps to create models
import torch.nn.functional as F

In [None]:
####### CREATING TENSORS

###It is compulsory to have all dependent values to be in FLoat while creating Tensors
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
y_train = torch.LongTensor(y_train) #We dont want depedent values to be in Float dtype
y_test = torch.LongTensor(y_test)


### CREATING MODEL USING PYTORCH

In [None]:
df.shape

In [None]:
# Before creating a model we need to define a class
# input_feature --> input values(depedent(8 out of 9 available in dataset))
# hidden1 and hidden2 --> hidden layer 1 and 2 with no of nuerons
# out_features --> no of output feature values(in this case 1 and 0)

class ANN_Model(nn.Module):
  def __init__(self,input_features = 8, hidden1 = 20, hidden2 = 20, out_features=2):
    #IN-herit the parent class nn.model
    super().__init__()
    #create fully connected layer
    self.f_connected1 = nn.Linear(input_features, hidden1) #Connecting input layer to hidden l;ayer.. from 8 to 20
    self.f_connected2 = nn.Linear(hidden1, hidden2) #Connecting hidden1 layer to hidden2
    self.out = nn.Linear(hidden2,out_features) #Output layer

  def forward(self,x): #x --> variable to track gradient descent and all forward prop things
    x = F.relu(self.f_connected1(x)) #we added "x" here to track the progress in this layer
    x = F.relu(self.f_connected2(x))
    x = self.out(x)
    return x
    

In [None]:
## Instantiate ANN_Model
## Before doing anything we need to set a seed so that initial weights gets applied
##Lets take a manual seed value

torch.manual_seed(20) #If u use 20 then the same parameters will be assigned over every itiration
model = ANN_Model()

In [None]:
# To see whole information

model.parameters #these are the generator and it will throw output one by one | to know this u can use "model.parameters()"

In [None]:
## Backward propogation --> define the loss_function | define the optimizer
# loss_function --> will give difference in predicted and actual values
# Optimizer --> to reduce the difference
# lr --> learning rate --> it should not be very very small or very very high else we never be able to find global minima

loss_function = nn.CrossEntropyLoss() #You can use this when you have multiclass classification
optimizer = torch.optim.Adam(model.parameters(),lr = 0.01)

In [None]:
epochs = 20
final_losses = []

#Run the number of epochs

for i in range(epochs):
  i = i+1
  y_pred = model.forward(X_train) #initializing forward propogation
  loss = loss_function(y_pred, y_train) #calculating losses
  final_losses.append(loss)
  # On every tenth epoch print following
  if i%10 == 1:
    print("Epoch number is :{} and the loss is:{}".format(i, loss.item()))
  optimizer.zero_grad() #Creates the gradients of all optimized class
  loss.backward()
  optimizer.step() #performs a single optimization step

In [None]:
#Plot the loss function

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
#Plot a graph to see loss over every epochs
plt.plot(range(epochs), final_losses)
plt.ylabel("Loss")
plt.xlabel("Epoch")

In [None]:
## Prediction in X_test data
prediction = []
with torch.no_grad(): #While evaluating the results we dont want to see gardients
  for i, data in enumerate(X_test): #enumerate will help to iterate all x_test data
    y_pred = model(data) #Apply model
    prediction.append(y_pred.argmax().item()) #argmax --> can help to which index max it is
    print(y_pred.argmax().item()) #argmax --> can help to which index max it is

In [None]:
#Lets compare prediction with y_test data
import sklearn
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, prediction)
cm

##### 320 and 84 --> right results
##### 135 and 76 --> wrong results


In [None]:
#Design confussion matrix

plt.figure(figsize = (10,6))
sns.heatmap(cm,annot  = True)
plt.xlabel("Actual Values")
plt.ylabel("Predicted Values")

In [None]:
#To find accuracy

from sklearn.metrics import accuracy_score
score = accuracy_score(y_test,prediction)
score

In [None]:
## SAVE THE MODEL

torch.save(model, "diabetes.pt") #Extension for saving pytorch model is ".pt"

In [None]:
## Prediction of new data point
list(df.iloc[0,:-1])

In [None]:
#Copy the above to create a new data

lst1 = [6.0, 130.0, 72.0, 40.0, 0.0, 25.6, 0.627, 45.0]

In [None]:
new_data = torch.tensor(lst1)

In [None]:
# Predict new data using PyTorch

with torch.no_grad():
  print(model(new_data))
  print(model(new_data).argmax().item())

# for the given attributes if we get 0 --> no diabetes and for 1 --> Diabetes


THANK YOU
---

