In [2]:
import pandas as pd
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"

In [3]:
cols = [
    "Pregnancies", "Glucose", "BloodPressure", "SkinThickness",
    "Insulin", "BMI", "DiabetesPedigreeFunction", "Age", "Class"
]
data = pd.read_csv(url, names=cols)

In [20]:
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
X = data.iloc[:, :-1]
y = data.iloc[:,-1]
print("Original Data (features only):")
print(X.head())

Original Data (features only):
   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  
0                     0.627   50  
1                     0.351   31  
2                     0.672   32  
3                     0.167   21  
4                     2.288   33  


In [7]:
print(y.tail())

763    0
764    0
765    0
766    1
767    0
Name: Class, dtype: int64


In [8]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler.fit_transform(X)
X_scaled_df = pd.DataFrame(X_scaled, columns=X.columns)
print("\nScaled Data (0–1 range):")
print(X_scaled_df.head())


Scaled Data (0–1 range):
   Pregnancies   Glucose  BloodPressure  SkinThickness   Insulin       BMI  \
0     0.352941  0.743719       0.590164       0.353535  0.000000  0.500745   
1     0.058824  0.427136       0.540984       0.292929  0.000000  0.396423   
2     0.470588  0.919598       0.524590       0.000000  0.000000  0.347243   
3     0.058824  0.447236       0.540984       0.232323  0.111111  0.418778   
4     0.000000  0.688442       0.327869       0.353535  0.198582  0.642325   

   DiabetesPedigreeFunction       Age  
0                  0.234415  0.483333  
1                  0.116567  0.166667  
2                  0.253629  0.183333  
3                  0.038002  0.000000  
4                  0.943638  0.200000  


In [11]:
from sklearn.preprocessing import Binarizer

binarizer = Binarizer(threshold=0.0)
X_binarized = binarizer.fit_transform(X_scaled_df)

X_binarized_df = pd.DataFrame(X_binarized, columns=X.columns)

print("\nBinarized Data (threshold = 0.0):")
print(X_binarized_df.head())


Binarized Data (threshold = 0.0):
   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin  BMI  \
0          1.0      1.0            1.0            1.0      0.0  1.0   
1          1.0      1.0            1.0            1.0      0.0  1.0   
2          1.0      1.0            1.0            0.0      0.0  1.0   
3          1.0      1.0            1.0            1.0      1.0  1.0   
4          0.0      1.0            1.0            1.0      1.0  1.0   

   DiabetesPedigreeFunction  Age  
0                       1.0  1.0  
1                       1.0  1.0  
2                       1.0  1.0  
3                       1.0  0.0  
4                       1.0  1.0  


In [13]:
from sklearn.preprocessing import StandardScaler

standardizer = StandardScaler()
X_standardized = standardizer.fit_transform(X)

X_standardized_df = pd.DataFrame(X_standardized, columns=X.columns).round(3)

print("\nStandardized Data (mean = 0, std = 1):")
print(X_standardized_df.head())



Standardized Data (mean = 0, std = 1):
   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin    BMI  \
0        0.640    0.848          0.150          0.907   -0.693  0.204   
1       -0.845   -1.123         -0.161          0.531   -0.693 -0.684   
2        1.234    1.944         -0.264         -1.288   -0.693 -1.103   
3       -0.845   -0.998         -0.161          0.155    0.123 -0.494   
4       -1.142    0.504         -1.505          0.907    0.766  1.410   

   DiabetesPedigreeFunction    Age  
0                     0.468  1.426  
1                    -0.365 -0.191  
2                     0.604 -0.106  
3                    -0.921 -1.042  
4                     5.485 -0.020  


In [11]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

In [12]:
X_scaled_df = data.iloc[:, :-1]
y_scaled_df = data.iloc[:, -1] 

In [13]:
y_scaled_df

0      1
1      0
2      1
3      0
4      1
      ..
763    0
764    0
765    0
766    1
767    0
Name: Class, Length: 768, dtype: int64

In [14]:
X_train_df, X_test_df, y_train_df, y_test_df = train_test_split(
    X_scaled_df, y_scaled_df, test_size=0.2, random_state=42
)

In [15]:
y_train_df.shape

(614,)

In [16]:
model = GaussianNB()
model.fit(X_train_df, y_train_df)

In [17]:
y_pred = model.predict(X_test_df)

In [18]:
print("Accuracy:", accuracy_score(y_test_df, y_pred))
print("\nClassification Report:\n", classification_report(y_test_df, y_pred))

Accuracy: 0.7662337662337663

Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.80      0.81        99
           1       0.66      0.71      0.68        55

    accuracy                           0.77       154
   macro avg       0.75      0.75      0.75       154
weighted avg       0.77      0.77      0.77       154



In [19]:

sample = pd.DataFrame([[0.058, 0.43, 0.54, 0.29, 0, 0.40, 0.12, 0.16]], columns = X.columns)
pred = model.predict(sample)
print("Predicted class:", pred)

Predicted class: [0]


In [5]:
#scaling
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler.fit_transform(X)

X_scaled_df = pd.DataFrame(X_scaled, columns=X.columns)

print("\nScaled Data (0–1 range):")
print(X_scaled_df.head())


Scaled Data (0–1 range):
   Pregnancies   Glucose  BloodPressure  SkinThickness   Insulin       BMI  \
0     0.352941  0.743719       0.590164       0.353535  0.000000  0.500745   
1     0.058824  0.427136       0.540984       0.292929  0.000000  0.396423   
2     0.470588  0.919598       0.524590       0.000000  0.000000  0.347243   
3     0.058824  0.447236       0.540984       0.232323  0.111111  0.418778   
4     0.000000  0.688442       0.327869       0.353535  0.198582  0.642325   

   DiabetesPedigreeFunction       Age  
0                  0.234415  0.483333  
1                  0.116567  0.166667  
2                  0.253629  0.183333  
3                  0.038002  0.000000  
4                  0.943638  0.200000  


In [6]:
from sklearn.preprocessing import Binarizer

binarizer = Binarizer(threshold=0.0)
X_binarized = binarizer.fit_transform(X_scaled_df)

X_binarized_df = pd.DataFrame(X_binarized, columns=X.columns)

print("\nBinarized Data (threshold = 0.0):")
print(X_binarized_df.head())


Binarized Data (threshold = 0.0):
   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin  BMI  \
0          1.0      1.0            1.0            1.0      0.0  1.0   
1          1.0      1.0            1.0            1.0      0.0  1.0   
2          1.0      1.0            1.0            0.0      0.0  1.0   
3          1.0      1.0            1.0            1.0      1.0  1.0   
4          0.0      1.0            1.0            1.0      1.0  1.0   

   DiabetesPedigreeFunction  Age  
0                       1.0  1.0  
1                       1.0  1.0  
2                       1.0  1.0  
3                       1.0  0.0  
4                       1.0  1.0  


In [7]:
from sklearn.preprocessing import StandardScaler

standardizer = StandardScaler()
X_standardized = standardizer.fit_transform(X)

X_standardized_df = pd.DataFrame(X_standardized, columns=X.columns).round(3)

print("\nStandardized Data (mean = 0, std = 1):")
print(X_standardized_df.head())



Standardized Data (mean = 0, std = 1):
   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin    BMI  \
0        0.640    0.848          0.150          0.907   -0.693  0.204   
1       -0.845   -1.123         -0.161          0.531   -0.693 -0.684   
2        1.234    1.944         -0.264         -1.288   -0.693 -1.103   
3       -0.845   -0.998         -0.161          0.155    0.123 -0.494   
4       -1.142    0.504         -1.505          0.907    0.766  1.410   

   DiabetesPedigreeFunction    Age  
0                     0.468  1.426  
1                    -0.365 -0.191  
2                     0.604 -0.106  
3                    -0.921 -1.042  
4                     5.485 -0.020  
