In [27]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

In [28]:
try:
    foods = pd.read_csv(r'C:\\Users\\Osas\\Personal_projects\\PCOS_project\\foods.csv')
except FileNotFoundError:
    print("foods.csv not found. Please update the file path.")
    exit()

In [29]:
try:
    nutrition = pd.read_excel(r'C:\\Users\\Osas\\Personal_projects\\PCOS_project\\nutritionalsamples.xlsx')
except FileNotFoundError:
    print("nutritionalsamples.xlsx not found. Please update the file path.")
    # Exit or continue with a dummy DataFrame if the file is not essential for the rest of the script.
    # In this case, we can continue as the main logic is based on `foods`.
    nutrition = pd.DataFrame() # Create an empty DataFrame to prevent errors.

In [30]:
# 1. Calculate 'net carbs' on the nutrition DataFrame
if not nutrition.empty and 'carbs(g)' in nutrition.columns and 'fiber(g)' in nutrition.columns:
    nutrition['net carbs'] = nutrition['carbs(g)'] - nutrition['fiber(g)']
    print("Nutrition DataFrame head after adding 'net carbs' column:")
    print(nutrition.head())

Nutrition DataFrame head after adding 'net carbs' column:
             food  protein(g)  carbs(g)  fiber(g)  fats(g)  sugar(g)  kcal  \
0         almonds       21.40      20.0     10.80    51.10      3.90   584   
1     rolled oats       13.50      68.7     10.40     5.89      1.00   379   
2           bread       14.30      72.8      2.70     1.65      5.00   372   
3   gold potatoes        1.81      16.0     13.80     0.26      0.65    72   
4  sweet potatoes        1.58      17.3      4.44     0.38      6.06    77   

   gi  net carbs  
0  15       9.20  
1  55      58.30  
2  90      70.10  
3  58       2.20  
4  70      12.86  


In [31]:
# 2. Define the `pcos_score` function
def pcos_score(row):
    protein_weight = 3
    fat_weight = 1
    sat_fat_weight = -2
    fiber_weight = 2
    carb_weight = -2
    # Based on the screenshot, the 'Carbs' column is not used in the score calculation, but 'Fat' and 'Sat.Fat' are.
    # The function definition uses 'Protein', 'Sat.Fat', 'Fiber' from the 'foods' DataFrame.
    # This seems to be the most consistent interpretation of the screenshots.
    # Note: There is an inconsistency in the screenshot where one function uses `sugar` and `carb_weight`, but we will stick to the one that aligns with the final `foods` DataFrame.
    calc_score = (protein_weight * row['Protein']) + (sat_fat_weight * row['Sat.Fat']) + (fat_weight * row['Fat']) + (fiber_weight * row['Fiber'] + carb_weight * row['Carbs'])
    return calc_score

In [32]:
# 3. Apply the `pcos_score` to the `foods` DataFrame
foods['PCOS_score'] = foods.apply(pcos_score, axis=1)
print("\nFoods DataFrame head after adding 'PCOS_score' column:")
print(foods.head())


Foods DataFrame head after adding 'PCOS_score' column:
                    Food Measure  Grams Calories  Protein  Fat  Sat.Fat  \
0             Cows' milk   1 qt.    976      660       32   40     36.0   
1              Milk skim   1 qt.    984      360       36    0      0.0   
2             Buttermilk   1 cup    246      127        9    5      4.0   
3  Evaporated, undiluted   1 cup    252      345       16   20     18.0   
4         Fortified milk  6 cups  1,419    1,373       89   42     23.0   

   Fiber  Carbs        Category  PCOS_score  
0    0.0   48.0  Dairy products       -32.0  
1    0.0   52.0  Dairy products         4.0  
2    0.0   13.0  Dairy products        -2.0  
3    0.0   24.0  Dairy products       -16.0  
4    1.4  119.0  Dairy products        27.8  


In [33]:
# 4. Data preprocessing for cosine similarity
# Select features as shown in the screenshot
features = foods[['Protein', 'Fat', 'Sat.Fat', 'Fiber', 'Carbs', 'PCOS_score']].copy()
# Handle missing values by filling with the mean
features.fillna(features.mean(), inplace=True)

# Scale the features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Calculate the cosine similarity matrix
sim_matrix = cosine_similarity(features_scaled)

In [34]:
# 5. Define `recommend_similar` function
def recommend_similar(food_name, top_n=5):
    if food_name not in foods['Food'].values:
        print(f"Food '{food_name}' not found in dataset. Try one of: {list(foods['Food'].sample(5))}")
        return None
    idx = foods[foods['Food'] == food_name].index[0]
    sim_scores = list(enumerate(sim_matrix[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    top_indices = [i[0] for i in sim_scores[1:top_n+1]]
    return foods.iloc[top_indices][['Food', 'Category', 'PCOS_score']]

In [35]:
# 6. Define `recommend` function
def recommend(df, top=10, category=None):
    filtered = df.copy()
    if category:
        filtered = filtered[filtered['Category'] == category]
    recommendation = filtered.sort_values(by='PCOS_score', ascending=False)
    return recommendation.head(top)

In [36]:
# 7. Example usage
print("\n--- Example: Top 10 recommendations for 'Fruits A-F' ---")
recommendations_fruits = recommend(foods, 10, 'Fruits A-F')
print(recommendations_fruits)

print("\n--- Example: Foods similar to 'Shrimp' ---")
similar_foods_shrimp = recommend_similar("Shrimp", top_n=5)
if similar_foods_shrimp is not None:
    print(similar_foods_shrimp)


--- Example: Top 10 recommendations for 'Fruits A-F' ---
                      Food    Measure Grams Calories  Protein  Fat  Sat.Fat  \
161  Watercress stems, raw      1 cup    50        9        1    0      0.0   
163          Apple vinegar    1/3 cup   100       14        0    0      0.0   
170                Avocado  1/2 large   108      185        2   18     12.0   
174             Cantaloupe   1/2 med.   380       40        1    0      0.0   
172           Blackberries      1 cup   144       85        2    1      0.0   
168                  Fresh     3 med.   114       55        1    0      0.0   
176             Fresh, raw      1 cup   114       65        1    0      0.0   
164            Apples, raw      1 med   130       70        0    0      0.0   
180        Fresh, raw figs     3 med.   114       90        2    0      0.0   
175               Cherries      1 cup   257      100        2    1      0.0   

     Fiber  Carbs    Category  PCOS_score  
161    0.3    1.0  Fruits A-