In [22]:
#Linear Regression example (predict SugarLevel)

In [23]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [24]:
data= {
    "Age": [25,26,35,45, 67, 80, 12, 60],
    "BloodPressure": [120, 140, 150, 130, 110, 160, 170, 145],
    "Diabetics": [85,90,200,180,70,160,220,195],
    "Diagnosis": ["Normal", "Pre-hypertension", "Diabetic", "Hypertension", "Normal", "Diabetic", "Hypertension", "Diabetic"]
}

In [25]:
df = pd.DataFrame(data)

In [26]:
X = df[["Age", "BloodPressure"]]
y = df["Diabetics"]

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [28]:
lr = LinearRegression()
lr.fit(X_train, y_train)

In [29]:
y_pred = lr.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [30]:
print("Linear Regression Report")
print("Coefficients:", lr.coef_)
print("Intercept:", lr.intercept_)
print("MSE:", mse)
print("R2:", r2)

Linear Regression Report
Coefficients: [0.42438253 2.73182893]
Intercept: -230.77583413534757
MSE: 4081.1239786148785
R2: -2.4815275646951567


In [33]:
#K-Nearest Neighbours for Diagnosis Classification

In [32]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

data = {
    "Age": [25,26,35,45,67,80,12,60,55],
    "BloodPressure": [120,140,150,130,110,160,170,145,135],
    "Diabetics": [85,90,200,180,70,160,220,195,150],
    "Diagnosis": ["Normal", "Pre-hypertension", "Diabetic", "Hypertension",
                  "Normal", "Diabetic", "Hypertension", "Diabetic", "Pre-hypertension"]
}

In [34]:
df = pd.DataFrame(data)
X = df[["Age", "BloodPressure", "Diabetics"]]# prediction
y_raw = df["Diagnosis"]
le = LabelEncoder()
y = le.fit_transform(y_raw)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42, stratify=y)
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s = scaler.transform(X_test)
knn = KNeighborsClassifier(n_neighbors=3, weights="uniform", p=2)
knn.fit(X_train_s, y_train)
print("KNN Report")
y_pred = knn.predict(X_test_s)
print("Class mapping:", dict(enumerate(le.classes_)))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification report:\n", classification_report(y_test, y_pred, target_names=le.classes_))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))

KNN Report
Class mapping: {0: 'Diabetic', 1: 'Hypertension', 2: 'Normal', 3: 'Pre-hypertension'}
Accuracy: 0.4
Classification report:
                   precision    recall  f1-score   support

        Diabetic       0.40      1.00      0.57         2
    Hypertension       0.00      0.00      0.00         1
          Normal       0.00      0.00      0.00         1
Pre-hypertension       0.00      0.00      0.00         1

        accuracy                           0.40         5
       macro avg       0.10      0.25      0.14         5
    weighted avg       0.16      0.40      0.23         5

Confusion matrix:
 [[2 0 0 0]
 [1 0 0 0]
 [1 0 0 0]
 [1 0 0 0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [35]:
# Logistic Regression

In [36]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, log_loss

data= {
    "Age": [25,26,35,45, 67, 80, 12, 60],
    "BloodPressure": [120, 140, 150, 130, 110, 160, 170, 145],
    "Diabetes": [85,90,200,180,70,160,220,195],
    "Diagnosis": ["Normal", "Pre-hypertension", "Diabetic", "Hypertension", "Normal", "Diabetic", "Hypertension", "Diabetic"]
}
scaler= StandardScaler()
X_train_scaled= scaler.fit_transform(X_train)
X_test_scaled= scaler.transform(X_test)
lr= LogisticRegression(multi_class= "multinomial",solver= "lbfgs",penalty= "l2",C = 1.0,max_iter= 200,random_state= 42)
lr.fit(X_train_scaled, y_train)
y_pred= lr.predict(X_test_scaled)
y_prob= lr.predict_proba(X_test_scaled)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix", confusion_matrix(y_test, y_pred))
print("Log Loss", log_loss(y_test, y_prob))

Accuracy: 0.2
Confusion Matrix [[0 1 0 1]
 [0 0 0 1]
 [0 0 1 0]
 [0 0 1 0]]
Log Loss 1.3122237118377833


