In [4]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [5]:
df = pd.read_csv("C:/Users/sreya/Downloads/Dataset/nutrition_1.csv")


In [6]:
df.drop(columns=['Column1', 'serving_size', 'calories'], inplace=True)


In [7]:
columns_to_clean = ['calcium', 'protein', 'carbohydrate', 'fiber', 'sugars', 'fat']
for column in columns_to_clean:
    df[column] = df[column].str.replace(' g', '').str.replace('mg', '').astype(float)

In [8]:
df.fillna(df.mean(), inplace=True)


  df.fillna(df.mean(), inplace=True)


In [9]:
df.head()

Unnamed: 0,name,calcium,protein,carbohydrate,fiber,sugars,fat
0,Cornstarch,2.0,0.26,91.27,0.9,0.0,0.05
1,"Nuts, pecans",70.0,9.17,13.86,9.6,3.97,71.97
2,"Eggplant, raw",9.0,0.98,5.88,3.0,3.53,0.18
3,"Teff, uncooked",180.0,13.3,73.13,8.0,1.84,2.38
4,"Sherbet, orange",54.0,1.1,30.4,1.3,24.32,2.0


In [10]:
df['name'].unique()

array(['Cornstarch', 'Nuts, pecans', 'Eggplant, raw', ...,
       'Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand',
       'Beef, raw, all grades, trimmed to 0" fat, separable lean only, boneless, eye of round roast, round',
       'Beef, raw, all grades, trimmed to 0" fat, separable lean only, boneless, eye of round steak, round'],
      dtype=object)

In [11]:
null_values = df.isnull().sum()
null_values

name            0
calcium         0
protein         0
carbohydrate    0
fiber           0
sugars          0
fat             0
dtype: int64

In [12]:
df['nutritional_profile'] = df[columns_to_clean].astype(str).agg(' '.join, axis=1)


In [13]:
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(df['nutritional_profile'])

In [14]:
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [15]:
def recommend_similar_food(food_item, cosine_sim=cosine_sim, df=df, num_recommendations=5):
    # Get the index of the food item
    idx = df[df['name'] == food_item].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_indices = [i[0] for i in sim_scores[1:num_recommendations+1]]
    return df.iloc[sim_indices]

In [16]:

given_food_item = 'Figs, stewed, dried' 
recommended_food_items = recommend_similar_food(given_food_item)
print("Recommended similar food items:")
print(recommended_food_items[['name', 'calcium', 'protein', 'carbohydrate', 'fiber', 'sugars', 'fat']])


Recommended similar food items:
                                                   name  calcium  protein  \
4578  MURRAY, Vanilla Creme Sandwich Cookies, SUGAR ...      0.0     4.90   
1463                 Cake, prepared from recipe, sponge     42.0     7.30   
4082  Cookies, regular, sugar wafers with creme filling     27.0     3.84   
5547  KELLOGG'S, Frosted Blueberry Muffin Toaster Pa...      0.0     4.00   
939                         Bread, canned, boston brown     70.0     5.20   

      carbohydrate  fiber  sugars    fat  
4578         70.00    3.4    1.20  23.10  
1463         57.70    0.0    0.00   4.30  
4082         70.64    1.6   42.95  23.24  
5547         70.90    1.1   27.60  10.90  
939          43.30    4.7    2.42   1.50  


In [17]:
# Sample ground truth data
ground_truth = {
    'Figs, stewed, dried': ['MURRAY, Vanilla Creme Sandwich Cookies, SUGAR FREE', 
                            'Cake, prepared from recipe, sponge', 
                            'Cookies, regular, sugar wafers with creme filling', 
                            'KELLOGG\'S, Frosted Blueberry Muffin Toaster Pastries', 
                            'Bread, canned, boston brown'],
    # Add more ground truth data for other food items
}


In [18]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Function to calculate precision, recall, and F1-score
def calculate_metrics(recommendations, ground_truth_set):
    true_positives = len(set(recommendations) & set(ground_truth_set))
    false_positives = len(set(recommendations) - set(ground_truth_set))
    false_negatives = len(set(ground_truth_set) - set(recommendations))
    
    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
    recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    return precision, recall, f1

# Calculate metrics for each item in the ground truth
overall_precision = []
overall_recall = []
overall_f1 = []

for food_item, ground_truth_items in ground_truth.items():
    recommended_items = recommend_similar_food(food_item)['name'].tolist()
    precision, recall, f1 = calculate_metrics(recommended_items, ground_truth_items)
    
    overall_precision.append(precision)
    overall_recall.append(recall)
    overall_f1.append(f1)
    
    print(f"Food item: {food_item}")
    print(f"Recommended items: {recommended_items}")
    print(f"Ground truth items: {ground_truth_items}")
    print(f"Precision: {precision}, Recall: {recall}, F1-score: {f1}")
    print("-" * 50)

# Calculate and print overall metrics
mean_precision = sum(overall_precision) / len(overall_precision)
mean_recall = sum(overall_recall) / len(overall_recall)
mean_f1 = sum(overall_f1) / len(overall_f1)

print(f"Overall Precision: {mean_precision}")
print(f"Overall Recall: {mean_recall}")
print(f"Overall F1-score: {mean_f1}")


Food item: Figs, stewed, dried
Recommended items: ['MURRAY, Vanilla Creme Sandwich Cookies, SUGAR FREE', 'Cake, prepared from recipe, sponge', 'Cookies, regular, sugar wafers with creme filling', "KELLOGG'S, Frosted Blueberry Muffin Toaster Pastries, POP-TARTS", 'Bread, canned, boston brown']
Ground truth items: ['MURRAY, Vanilla Creme Sandwich Cookies, SUGAR FREE', 'Cake, prepared from recipe, sponge', 'Cookies, regular, sugar wafers with creme filling', "KELLOGG'S, Frosted Blueberry Muffin Toaster Pastries", 'Bread, canned, boston brown']
Precision: 0.8, Recall: 0.8, F1-score: 0.8000000000000002
--------------------------------------------------
Overall Precision: 0.8
Overall Recall: 0.8
Overall F1-score: 0.8000000000000002
