In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer
import pickle

def train_knn_model(data_path, n_neighbors=5, metric="cosine", algorithm="brute", model_filename="knn_model.pkl"):
    """
    Trains a K-Nearest Neighbors model, saves it as a pickle file, and returns the pipeline.
    
    :param data_path: Path to the CSV file containing the data
    :param n_neighbors: Number of neighbors to use for KNN
    :param metric: Distance metric to use (default is 'cosine')
    :param algorithm: Algorithm to compute the nearest neighbors (default is 'brute')
    :param model_filename: Name of the file where the model will be saved
    :return: Trained pipeline object
    """
    
    # Load the data
    df = pd.read_csv(data_path)
    
    # Preprocess the data
    scaler = StandardScaler()
    prep_data = scaler.fit_transform(df.iloc[:, 1:5].to_numpy())
    
    # Initialize KNN model
    neigh = NearestNeighbors(metric=metric, algorithm=algorithm, n_neighbors=n_neighbors)
    neigh.fit(prep_data)
    
    # Create a transformer for KNN
    transformer = FunctionTransformer(neigh.kneighbors, kw_args={"return_distance": False})
    
    # Create the pipeline
    pipeline = Pipeline([("std_scaler", scaler), ("NN", transformer)])
    
    # Save the pipeline to a pickle file
    with open(model_filename, "wb") as f:
        pickle.dump(pipeline, f)
    
    return pipeline

def predict_knn(model_filename, input_data):
    """
    Loads a KNN model from a pickle file and makes predictions on the input data.
    
    :param model_filename: Name of the file where the model is saved
    :param input_data: A list of lists or 2D array containing the input data for prediction
    :return: Indices of the nearest neighbors
    """
    
    # Load the model from the pickle file
    with open(model_filename, "rb") as f:
        pipeline = pickle.load(f)
    
    # Transform the input data using the pipeline and return predictions
    return pipeline.transform(input_data)




In [3]:
# Example usage

# Path to the dataset
data_path = "./daily_menus.csv"

# Train the model and save it
model = train_knn_model(data_path)

# Example input for prediction
input_data = [[1800, 154, 60, 23]]

# Make predictions using the saved model
predictions = predict_knn("knn_model.pkl", input_data)

# Display the predicted neighbors
print(predictions)

# To get the actual rows from the dataframe corresponding to the predictions:
df = pd.read_csv(data_path)
# print(df.iloc[predictions[0]])
df.iloc[predictions[0]]

[[31 45 17  3 37]]


Unnamed: 0,Day Menu,Total Calories,Total Protein (g),Total Fat (g),Total Fiber (g)
31,Diabetic-Friendly Dinner + Balanced Meal + Pro...,1750,115,57,25
45,High-Protein Lunch + Gluten-Free Lunch + Heart...,1850,115,58,25
17,Heart-Healthy Meal + High-Protein Lunch + Weig...,2500,155,90,26
3,High-Protein Lunch + Balanced Meal + Weight Ga...,2400,150,90,23
37,Weight Loss Plan + High-Protein Lunch + High-C...,1900,120,68,20
