### Importing Libraries and Loading Data

In [1]:
import pandas as pd 
import numpy as np
data = pd.read_csv('diabetes.csv')

In [2]:
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


### Column Descriptions
1. Pregnancies - Number of times pregnant
2. Glucose - Plasma glucose concentration a 2 hours in an oral glucose tolerance test
3. BloodPressure - Diastolic blood pressure (mm Hg)
4. SkinThickess - Triceps skin fold thickness (mm)
5. Insulin - 2-Hour serum insulin (mu U/ml)
6. BMI - Body mass index (weight in kg/(height in m)^2)
7. DiabetesPedigreeFunction - Diabetes pedigree function
8. Age - Age (years)
9. Outcome - Class variable (0 or 1) 268 of 768 are 1, the others are 0

In [3]:
# checking for null values
data.isnull().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

In [7]:
X = data.iloc[:,0:8]

In [8]:
y = data.iloc[:,8]

0      1
1      0
2      1
3      0
4      1
      ..
763    0
764    0
765    0
766    1
767    0
Name: Outcome, Length: 768, dtype: int64


### Normalizing the data

In [10]:
from sklearn.preprocessing import normalize
X = normalize(X, norm='l2')

### Making the train test split

In [16]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.33)

In [17]:
y_train.shape

(514,)

### Training and Predicting using the model

In [18]:
# Calling the required Library
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0).fit(X_train, y_train)
# Making the prediction
training_prediction = clf.predict(X_train)
test_prediction = clf.predict(X_test)

### Checking the accuracy

In [20]:
from sklearn.metrics import accuracy_score
training_accuracy = accuracy_score(y_train, training_prediction)
test_accuracy = accuracy_score(y_test, test_prediction)

In [21]:
print(f"Training accuracy = {training_accuracy}")
print(f"Test accuracy = {test_accuracy}")

Training accuracy = 0.669260700389105
Test accuracy = 0.6023622047244095
