In [1]:
import pandas as pd

# Load the dataset
df = pd.read_csv('/content/drive/MyDrive/Concept-and-Technology-of-AI-/diabetes.csv')

# Display the first few rows
print(df.head())


   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

print("Missing values in each column:\n",df.isnull().sum())

features=['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']

X = df[features]
y_regression = df['BloodPressure']  # Target for regression
y_classification = df['Outcome']    # Target for classification

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_scaled, y_regression, test_size=0.2, random_state=42)
X_train_cls, X_test_cls, y_train_cls, y_test_cls = train_test_split(X_scaled, y_classification, test_size=0.2, random_state=42)


Missing values in each column:
 Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64


In [5]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Initialize the Linear Regression model
reg_model = LinearRegression()

# Train the model
reg_model.fit(X_train_reg, y_train_reg)

# Predict on the test set
y_pred_reg = reg_model.predict(X_test_reg)

# Evaluate the model
mse = mean_squared_error(y_test_reg, y_pred_reg)
rmse = np.sqrt(mse)
r2 = r2_score(y_test_reg, y_pred_reg)

print("Regression Model Evaluation:")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"R² Score: {r2:.2f}")


Regression Model Evaluation:
Mean Squared Error (MSE): 0.00
Root Mean Squared Error (RMSE): 0.00
R² Score: 1.00


In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Initialize Logistic Regression and KNN models
log_reg_model = LogisticRegression()
knn_model = KNeighborsClassifier(n_neighbors=5)  # You can tune 'n_neighbors' for better results

# Train Logistic Regression
log_reg_model.fit(X_train_cls, y_train_cls)

# Train KNN
knn_model.fit(X_train_cls, y_train_cls)

# Predict on the test set using Logistic Regression
y_pred_log_reg = log_reg_model.predict(X_test_cls)

# Predict on the test set using KNN
y_pred_knn = knn_model.predict(X_test_cls)

# Evaluate Logistic Regression model
print("Logistic Regression Model Evaluation:")
print(f"Accuracy: {accuracy_score(y_test_cls, y_pred_log_reg):.2f}")
print(f"Precision: {precision_score(y_test_cls, y_pred_log_reg):.2f}")
print(f"Recall: {recall_score(y_test_cls, y_pred_log_reg):.2f}")
print(f"F1 Score: {f1_score(y_test_cls, y_pred_log_reg):.2f}")
print("Confusion Matrix:\n", confusion_matrix(y_test_cls, y_pred_log_reg))

# Evaluate KNN model
print("\nKNN Model Evaluation:")
print(f"Accuracy: {accuracy_score(y_test_cls, y_pred_knn):.2f}")
print(f"Precision: {precision_score(y_test_cls, y_pred_knn):.2f}")
print(f"Recall: {recall_score(y_test_cls, y_pred_knn):.2f}")
print(f"F1 Score: {f1_score(y_test_cls, y_pred_knn):.2f}")
print("Confusion Matrix:\n", confusion_matrix(y_test_cls, y_pred_knn))


Logistic Regression Model Evaluation:
Accuracy: 0.75
Precision: 0.65
Recall: 0.67
F1 Score: 0.66
Confusion Matrix:
 [[79 20]
 [18 37]]

KNN Model Evaluation:
Accuracy: 0.69
Precision: 0.57
Recall: 0.49
F1 Score: 0.53
Confusion Matrix:
 [[79 20]
 [28 27]]
