In [4]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.neighbors import NearestNeighbors

In [7]:
data = {
    "Product_Name": ["RSPCA Approved Chicken Livers","RSPCA Approved Chicken Giblets","RSPCA Approved Chicken Frames","Creme Gold Washed Potatoes Loose",
                     "Baby White Potatoes Prepacked","Potatoes Prepacked","Happy Gut Wholemeal","Low Carb Bread","Happy Gut Multigrain"],  
    "Category": ["Meat & seafood", "Meat & seafood","Meat & seafood","Fruit & vegetables","Fruit & vegetables","Fruit & vegetables","Bakery","Bakery","Bakery"],
    "Sub_category": ["Poultry", "Poultry","Poultry","Vegetables", "Vegetables", "Vegetables","Packaged Breads","Packaged Breads","Packaged Breads"],
    "Product_Group": ["Chicken offal", "Chicken offal", "Chicken offal","Potatoes","Potatoes","Potatoes","Digestive Health & Speciality Bread",
                      "Digestive Health & Speciality Bread","Digestive Health & Speciality Bread"],
    "Brand": ["Coles","Coles","Coles","Coles","Coles","Spud Lite","Coles","Herman Brot","Coles"],
    "Sku": ["1718058P","2565429P","3199541P","1182162P","2488485P","3026441P","4727962P","4654122P","4727995P"]
}

df = pd.DataFrame(data)

# encoding categorical features
encoder = OneHotEncoder()
encoded_features = encoder.fit_transform(df[["Category", "Sub_category", "Product_Group", "Brand"]])

# KNN model
knn = NearestNeighbors(n_neighbors=3, metric='euclidean')
knn.fit(encoded_features)

# cross-selling function
def recommend_similar_products(product_name, k=3):
    # index of the product
    product_idx = df[df["Product_Name"] == product_name].index[0]
    
    # feature vector for the product
    product_vector = encoded_features[product_idx]
    
    # reshaping the product vector to be a 2D array
    product_vector = product_vector.reshape(1, -1)
    
    # k nearest neighbors
    distances, indices = knn.kneighbors(product_vector, n_neighbors=k+1)
    
    # recommended products
    recommended_indices = indices[0][1:]  # Exclude the first item as it is the input product itself
    recommended_products = df.iloc[recommended_indices]["Product_Name"].values
    
    return recommended_products

In [8]:
# example usage
product_name = "RSPCA Approved Chicken Livers"
recommended_products = recommend_similar_products(product_name, k=3)
print(f"Products similar to '{product_name}': {recommended_products}")

Products similar to 'RSPCA Approved Chicken Livers': ['RSPCA Approved Chicken Livers' 'RSPCA Approved Chicken Giblets'
 'Creme Gold Washed Potatoes Loose']
