In [1]:
import pandas as pd
from warnings import filterwarnings
filterwarnings(action='ignore', category=FutureWarning)

DIET = '/kaggle/input/indian-cuisine-based-rda-diet-recommendation-data/indian_rda_based_diet_recommendation_system.csv'
df = pd.read_csv(filepath_or_buffer=DIET)
df['VegNovVeg'] = df['VegNovVeg'].replace(' ', '0').astype(int)
for column in ['Breakfast', 'Lunch', 'Dinner', 'VegNovVeg']:
    df[column] = df[column].astype(bool)
df.head()

Unnamed: 0,Food_items,Breakfast,Lunch,Dinner,VegNovVeg,Calories,Fats,Proteins,Iron,Calcium,Sodium,Potassium,Carbohydrates,Fibre,VitaminD,Sugars
0,aloo Tikki,False,True,True,False,22,0.2,2.4,0.91,23.0,14,224.0,4.1,2.0,0,1.3
1,Kadhi,True,False,False,False,160,15.0,2.0,0.55,12.0,7,485.0,8.5,6.7,0,0.7
2,Bananas,True,False,False,False,89,0.3,1.1,0.26,5.0,1,358.0,23.0,2.6,0,12.0
3,Bread made in wheat,False,True,True,False,250,1.5,10.0,2.76,20.0,439,165.0,49.0,4.1,0,6.1
4,Mango Chutney,True,False,False,False,349,0.4,14.0,6.8,190.0,298,77.0,77.0,13.0,0,46.0


All of our data is numeric and we have no obvious target variable; let's try several and just see what happens.

In [2]:
from umap import UMAP
from plotly import express

columns = ['Calories', 'Fats', 'Proteins', 'Iron', 'Calcium', 'Sodium', 'Potassium', 'Carbohydrates', 'Fibre', 'VitaminD', 'Sugars']

reducer = UMAP(n_components=2, random_state=2024, transform_seed=2024, verbose=True, n_jobs=1, n_epochs=100)
df[['x', 'y']] = pd.DataFrame(data=reducer.fit_transform(X=df[columns]))
for target in ['Breakfast', 'Lunch', 'Dinner', 'VegNovVeg',]:
    express.scatter(data_frame=df, x='x', y='y', color=target,  hover_name='Food_items', ).show()

2024-03-05 20:52:26.795676: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-05 20:52:26.795811: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-05 20:52:26.967810: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


UMAP(n_epochs=100, n_jobs=1, random_state=2024, transform_seed=2024, verbose=True)
Tue Mar  5 20:52:41 2024 Construct fuzzy simplicial set
Tue Mar  5 20:52:41 2024 Finding Nearest Neighbors
Tue Mar  5 20:52:46 2024 Finished Nearest Neighbor Search
Tue Mar  5 20:52:49 2024 Construct embedding


Epochs completed:   0%|            0/100 [00:00]

	completed  0  /  100 epochs
	completed  10  /  100 epochs
	completed  20  /  100 epochs
	completed  30  /  100 epochs
	completed  40  /  100 epochs
	completed  50  /  100 epochs
	completed  60  /  100 epochs
	completed  70  /  100 epochs
	completed  80  /  100 epochs
	completed  90  /  100 epochs
Tue Mar  5 20:52:50 2024 Finished embedding


Not surprisingly vegetables look different nutritionally from not-vegetable dishes.

In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(df[columns], df[['Breakfast', 'Lunch', 'Dinner', 'VegNovVeg']], test_size=0.2, random_state=2024)

regression = LogisticRegression(max_iter=100000)

for target in ['Breakfast', 'Lunch', 'Dinner', 'VegNovVeg']:
    regression.fit(X=X_train, y=y_train[target])
    print('target: {} accuracy: {:5.4f} '.format(target, regression.score(X=X_test, y=y_test[target])))
express.histogram(y=regression.coef_.tolist()[0], x=columns).show(validate=True)


target: Breakfast accuracy: 0.6111 
target: Lunch accuracy: 0.3889 
target: Dinner accuracy: 0.6111 
target: VegNovVeg accuracy: 0.9444 


It looks like vegetables are higher in fiber and iron, while not-vegetables are higher in protein. And people will eat most anything for lunch.