<a href="https://colab.research.google.com/github/riyamishra28/Early-Type-2-Diabetes-Prediction/blob/main/NaiveBayes_Diabetes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Upload the file
print("Please upload your CSV file")
uploaded = files.upload()

# Load the dataset
for filename in uploaded.keys():
    df = pd.read_csv(filename)

# Preview the dataset
print("Dataset preview:")
print(df.head())

# Drop unnecessary columns (if any)
df = df.drop(['ID', 'No_Pation'], axis=1, errors='ignore')

# Check for missing values
print("Missing values:")
print(df.isnull().sum())

# Encode categorical variables
label_encoder_gender = LabelEncoder()
label_encoder_class = LabelEncoder()

df['Gender'] = label_encoder_gender.fit_transform(df['Gender'])
df['CLASS'] = label_encoder_class.fit_transform(df['CLASS'])

# Split the dataset into features and target
X = df.drop('CLASS', axis=1)
y = df['CLASS']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y)

# Initialize the Gaussian Naive Bayes model
gnb = GaussianNB()

# Train the model
gnb.fit(X_train, y_train)

# Make predictions
y_pred = gnb.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy * 100:.2f}%")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(class_report)

# Optional: Save the model
import joblib
joblib.dump(gnb, 'naive_bayes_model.pkl')
print("Model saved as 'naive_bayes_model.pkl'")

Please upload your CSV file


Saving DiabetesPredict_Dataset.csv to DiabetesPredict_Dataset (1).csv
Dataset preview:
      ID  No_Pation Gender   AGE  Urea    Cr  HbA1c  Chol   TG  HDL  LDL  \
0  502.0    17975.0      F  50.0   4.7  46.0    4.9   4.2  0.9  2.4  1.4   
1  735.0    34221.0      M  26.0   4.5  62.0    4.9   3.7  1.4  1.1  2.1   
2  420.0    47975.0      F  50.0   4.7  46.0    4.9   4.2  0.9  2.4  1.4   
3  680.0    87656.0      F  50.0   4.7  46.0    4.9   4.2  0.9  2.4  1.4   
4  504.0    34223.0      M  33.0   7.1  46.0    4.9   4.9  1.0  0.8  2.0   

   VLDL   BMI CLASS  
0   0.5  24.0     N  
1   0.6  23.0     N  
2   0.5  24.0     N  
3   0.5  24.0     N  
4   0.4  21.0     N  
Missing values:
Gender    0
AGE       0
Urea      0
Cr        0
HbA1c     0
Chol      0
TG        0
HDL       0
LDL       0
VLDL      0
BMI       0
CLASS     0
dtype: int64
Accuracy: 88.63%
Confusion Matrix:
[[ 556   77   21]
 [  19  598   11]
 [  54  159 1505]]
Classification Report:
              precision    recall  f1-