<a href="https://colab.research.google.com/github/princesapkota/3rdaiws7/blob/main/wokrshop7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, Lasso, LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, mean_squared_error, r2_score
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.model_selection import train_test_split
import pandas as pd

In [9]:
from google.colab import drive
drive.mount('/content/drive')
data = pd.read_csv('/content/drive/MyDrive/datasets/diabetes.csv')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [15]:
# Define features (X) and target (y) for regression
X = data.drop(columns=['BloodPressure'])
y = data['BloodPressure']

# Split data into training and testing sets for regression
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 1: Regression Task - Predict Blood Pressure
# Feature Normalization and Feature Selection
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('feature_selection', SelectKBest(score_func=f_regression, k=5)),  # Select top 5 features
    ('regressor', Ridge(alpha=1.0))  # Ridge regression with regularization
])

# Train the regression pipeline model
pipeline.fit(X_train, y_train)

# Make predictions for regression
ridge_y_pred = pipeline.predict(X_test)

# Evaluate the regression model
ridge_mse = mean_squared_error(y_test, ridge_y_pred)
ridge_r2 = r2_score(y_test, ridge_y_pred)

# Step 2: Classification Task - Predict Diabetes Outcome
# Define features and target for classification
X_classification = data.drop(columns=['Outcome'])
y_classification = data['Outcome']

# Split the data for classification
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(
    X_classification, y_classification, test_size=0.2, random_state=42
)

# Logistic Regression Model
logistic_model = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', LogisticRegression())
])

# Train Logistic Regression model
logistic_model.fit(X_train_class, y_train_class)

# Make predictions for Logistic Regression
logistic_y_pred = logistic_model.predict(X_test_class)

# Evaluate Logistic Regression model
logistic_accuracy = accuracy_score(y_test_class, logistic_y_pred)
logistic_report = classification_report(y_test_class, logistic_y_pred)

# K-Nearest Neighbors Model
knn_model = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', KNeighborsClassifier(n_neighbors=5))
])

# Train KNN model
knn_model.fit(X_train_class, y_train_class)

# Make predictions for KNN
knn_y_pred = knn_model.predict(X_test_class)

# Evaluate KNN model
knn_accuracy = accuracy_score(y_test_class, knn_y_pred)
knn_report = classification_report(y_test_class, knn_y_pred)

ridge_mse, ridge_r2, logistic_accuracy, logistic_report, knn_accuracy, knn_report



(409.843000367697,
 0.180052069532728,
 0.7532467532467533,
 '              precision    recall  f1-score   support\n\n           0       0.81      0.80      0.81        99\n           1       0.65      0.67      0.66        55\n\n    accuracy                           0.75       154\n   macro avg       0.73      0.74      0.73       154\nweighted avg       0.76      0.75      0.75       154\n',
 0.6948051948051948,
 '              precision    recall  f1-score   support\n\n           0       0.75      0.80      0.77        99\n           1       0.58      0.51      0.54        55\n\n    accuracy                           0.69       154\n   macro avg       0.66      0.65      0.66       154\nweighted avg       0.69      0.69      0.69       154\n')