In [None]:
import pandas as pd
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
from collections import Counter
import numpy as np
import heapq

In [176]:
df = pd.read_csv('../data/diet_recommendations_dataset.csv')
df = df[ df['Gender'] == 'Female']
df.drop(columns=['Gender', 'Patient_ID', 'Dietary_Restrictions', 'Allergies', 'Preferred_Cuisine', 'Blood_Pressure_mmHg','Adherence_to_Diet_Plan', 'Dietary_Nutrient_Imbalance_Score', 'Disease_Type', 'Severity', 'Physical_Activity_Level' ], inplace=True)
df.head()

Unnamed: 0,Age,Weight_kg,Height_cm,BMI,Daily_Caloric_Intake,Cholesterol_mg/dL,Glucose_mg/dL,Weekly_Exercise_Hours,Diet_Recommendation
2,46,63.5,173,21.2,1737,181.0,109.6,3.8,Low_Sodium
5,25,105.7,156,43.4,2715,182.3,108.9,0.9,Balanced
11,40,95.5,186,27.6,1622,220.8,154.9,4.3,Low_Carb
17,57,80.9,175,26.4,1979,214.4,185.4,1.1,Low_Carb
19,20,90.1,173,30.1,2542,207.7,98.1,1.0,Low_Carb


In [172]:
def convert_category(df: pd.DataFrame, col: str) -> pd.DataFrame:
    types = set(df[col])
    mapping = {}
    i = 1
    for type in types:
        mapping[type] = i
        i += 1
    df[col] = df[col].map(mapping)
    return df
# df = convert_category(df, "Disease_Type")
# df = convert_category(df, "Gender")
# df = convert_category(df, "Severity")
# df = convert_category(df, "Physical_Activity_Level")
df.head()


Unnamed: 0,Age,Weight_kg,Height_cm,BMI,Daily_Caloric_Intake,Cholesterol_mg/dL,Glucose_mg/dL,Weekly_Exercise_Hours,Diet_Recommendation
0,56,58.4,160,22.8,3079,173.3,116.3,3.1,Balanced
1,69,101.2,169,35.4,3032,199.2,137.1,4.5,Low_Carb
3,32,58.1,164,21.6,2657,168.2,159.4,4.3,Balanced
4,60,79.5,197,20.5,3496,200.4,182.3,9.8,Low_Carb
6,78,102.2,170,35.4,2879,175.8,95.1,9.2,Balanced


In [173]:
X = df.iloc[:,:-1]
Y = df.iloc[:,-1]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.2, random_state=0)

In [174]:
class KNN:
    def __init__(self, n: int, X_train, Y_train):
        self.n = n
        self.X_train = X_train
        self.Y_train = Y_train

    def distance(self, row1, row2) -> int:
        distance = 0.0
        for val1, val2 in zip(row1, row2):
            distance += pow(val1 - val2, 2)
        return distance
    
    def getNeighbors(self, stats) -> list[str]:
        heap = []
        for idx, X in self.X_train.iterrows():
            d = self.distance(stats, X)
            heapq.heappush(heap, (d, idx))
            if len(heap) > self.n:
                _, throw = heapq.heappop(heap)
        labels = [self.Y_train[i] for _,i in heap]
        return labels

    def predict(self, stats) -> str:
        predicted_vals = self.getNeighbors(stats)
        pred_count = Counter(predicted_vals)
        most = max(pred_count.values())
        ans = ""
        for pred in pred_count:
            if pred_count[pred] == most:
                ans = pred
        return ans

In [175]:
knn = KNN(77, X_train, Y_train)
correct = 0
size = 0
for (_,x), y in zip(X_test.iterrows(), Y_test):
    label = knn.predict(x)
    if y == label: correct+=1
    size+=1
acurracy = correct / len(X_test)
print(acurracy)

0.3523809523809524


In [169]:
from sklearn.neighbors import KNeighborsClassifier
skmodel = KNeighborsClassifier(n_neighbors=77)
skmodel.fit(X_train, Y_train)

In [170]:
sk_predictions = skmodel.predict(X_test)
c = 0
for x,y in zip(sk_predictions, Y_test):
    if x == y: c+= 1
sk_accuracy = c / len(Y_test)
print(f" sklearn-model got accuracy score of : {sk_accuracy}")

 sklearn-model got accuracy score of : 0.4
