### Step 1: Load and Preprocess the Dataset
1. Download the dataset from the UCI repository.
2. Exclude the Drinks column (the last column in the dataset).



In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load the dataset from a URL
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/liver-disorders/bupa.data"
columns = ["mcv", "alkphos", "sgpt", "sgot", "gammagt", "drinks", "selector"]
data = pd.read_csv(url, names=columns)

# Drop the 'selector' column
data = data.drop(columns=["selector"])

# Bin the 'drinks' column into categories
bins = [0, 1, 3, np.inf]  # Define bin edges: 0-1 (low), 1-3 (moderate), 3+ (high)
labels = [0, 1, 2]  # Define class labels
data["drinks"] = pd.cut(data["drinks"], bins=bins, labels=labels, include_lowest=True)

# Handle potential NaN values in 'drinks' column
data["drinks"] = data["drinks"].cat.codes  # Convert categories to integers

# Separate features and target
X = data.iloc[:, :-1].values  # Features (all columns except the last one)
y = data.iloc[:, -1].values  # Target (binned 'drinks')

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


### Step 2: Use the db_knn_predict Function for Classification
Ensure the "db_knn_predict" function is set up to handle the training and testing datasets. Use the "train_data" and "test_data" prepared above.

In [2]:
from density_knn1 import db_knn_predict

# Combine features and labels into NumPy arrays for DB-kNN
train_data = np.hstack((X_train, y_train.reshape(-1, 1)))
test_data = np.hstack((X_test, y_test.reshape(-1, 1)))

# Set DB-kNN parameters
k = 7
radius = 10

# Make predictions
predictions = db_knn_predict(train_data, y_train, test_data, k, radius)


### Step 3: Evaluate Classification Performance
Evaluate the predictions using accuracy and a classification report.

In [3]:
from sklearn.metrics import accuracy_score, classification_report

# Evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)

print("Classification Report:\n", classification_report(y_test, predictions))


Accuracy: 0.3684210526315789
Classification Report:
               precision    recall  f1-score   support

           0       0.37      1.00      0.54        42
           1       0.00      0.00      0.00        20
           2       0.00      0.00      0.00        52

    accuracy                           0.37       114
   macro avg       0.12      0.33      0.18       114
weighted avg       0.14      0.37      0.20       114



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
