In [2]:
import pandas as pd
import numpy as np

# Load the data
df = pd.read_csv('cleaned_data5.csv', low_memory=False)

# Define columns for nutritional evaluation
nutrition_columns = [
    'energy-kcal_value', 'fat_value', 'saturated-fat_value', 'carbohydrates_value',
    'sugars_value', 'fiber_value', 'proteins_value', 'salt_value', 'sodium_value',
    'trans-fat_value', 'cholesterol_value', 'vitamin-a_value', 'vitamin-c_value',
    'vitamin-d_value', 'calcium_value', 'iron_value', 'potassium_value'
]

# Convert relevant columns to numeric, replacing any non-numeric values with NaN, then replace NaN with zero
for col in nutrition_columns:
    df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)

# Drop rows where all nutritional values are zero (optional, if you want to drop rows with no useful data)
df = df[(df[nutrition_columns] != 0).any(axis=1)]

# Function to convert values to grams
'''def convert_to_grams(value, unit):
    if unit == 'mg':
        return value / 1000  # Convert milligrams to grams
    elif unit == 'g':
        return value  # Already in grams
    else:
        return None  # Handle unexpected unit cases'''

def evaluate_food(row):
    score = 0
    reasons = []

    # Energy (kcal)
    energy = row['energy-kcal_value']
    if energy > 0:
        if 0 < energy <= 500:
            score += 1
            reasons.append("Moderate calorie content")
        elif energy > 500:
            score -= 1
            reasons.append("High calorie content")

    # Fat
    fat = row['fat_value']
    if fat > 0:
        if fat<=3:
              score += 1
              reasons.append("Low fat content")            
        elif 0 < fat <= 10:
            #score += 1
            reasons.append("Moderate fat content")
        elif fat > 17.5:
            score -= 1
            reasons.append("High fat content")

    # Saturated Fat
    saturated_fat = row['saturated-fat_value']
    if saturated_fat > 0:
        if 0 < saturated_fat <= 4:
            score += 1
            reasons.append("Low saturated fat")
        elif saturated_fat > 5:
            score -= 1
            reasons.append("High saturated fat")

    # Carbohydrates
    carbohydrates = row['carbohydrates_value']
    if carbohydrates > 0:
        if carbohydrates <= 4:
            score += 1
            reasons.append("Low carbohydrate content")
        elif 4 < carbohydrates <= 15:
            #score += 1
            reasons.append("Moderate carbohydrate content")
        elif carbohydrates > 15:
            score -= 1
            reasons.append("High carbohydrate content")

    # Sugars
    sugars = row['sugars_value']
    if sugars > 0:
        if 0 < sugars <= 5:
            score += 1
            reasons.append("Low sugar content")
        elif sugars > 22.5:
            score -= 1
            reasons.append("High sugar content")
        else:
            reasons.append("Moderate sugar content")

    # Fiber
    fiber = row['fiber_value']
    if fiber > 0:
        if fiber > 3:
            score += 1
            reasons.append("Good source of fiber")
        elif 0 < fiber <= 2:
            score -= 1
            reasons.append("Low fiber content")

    # Protein
    protein = row['proteins_value']
    if protein > 0:
        if protein > 10:
            score += 1
            reasons.append("Good source of protein")
        elif 0 < protein <= 5:
            score -= 1
            reasons.append("Low protein content")

    # Salt
    salt = row['salt_value']
    if salt > 0:
        if 0 < salt <= 1.5:
            score += 1
            reasons.append("Low salt content")
        elif salt > 2:
            score -= 1
            reasons.append("High salt content")

    # Trans fat
    trans_fat = row['trans-fat_value']
    if trans_fat >= 0:
        if trans_fat == 0:
            score += 1
            reasons.append("No trans fat")
        elif trans_fat > 0:
            score -= 2
            reasons.append("Contains trans fat")

    # Cholesterol
    cholesterol = row['cholesterol_value']
    if cholesterol > 0:
        if 0 < cholesterol <= 20:
            score += 1
            reasons.append("Low cholesterol")
        elif cholesterol > 60:
            score -= 1
            reasons.append("High cholesterol")

    # Vitamins and Minerals
    nutrients = ['vitamin-a_value', 'vitamin-c_value', 'vitamin-d_value', 'calcium_value', 'iron_value', 'potassium_value']
    nutrient_names = ['Vitamin A', 'Vitamin C', 'Vitamin D', 'Calcium', 'Iron', 'Potassium']
    
    for nutrient, name in zip(nutrients, nutrient_names):
        value = row[nutrient]
        if value > 0:
            score += 0.5
            reasons.append(f"Contains {name}")

    # Categorize based on score
    if score > 5:
        category = "Excellent"
    elif score > 2:
        category = "Good"
    elif score > -2:
        category = "Moderate"
    else:
        category = "Poor"

    return pd.Series([category, score, ', '.join(reasons)])

def get_recommendations(food):
    recommendations = []
    
    if food['Category'] == 'Poor':
        recommendations.append("This product has a poor nutritional profile. Consider limiting its consumption.")
    elif food['Category'] == 'Moderate':
        recommendations.append("This product has a moderate nutritional profile. It can be consumed in moderation as part of a balanced diet.")
    elif food['Category'] == 'Good':
        recommendations.append("This product has a good nutritional profile. It can be a healthy part of your diet.")
    else:
        recommendations.append("This product has an excellent nutritional profile. It's a great choice for a healthy diet.")

    # Add specific recommendations
    if food['fat_value'] > 20:
        recommendations.append("This product is high in fat. Consider alternatives with less fat or consume in moderation.")
    if food['sugars_value'] > 15:
        recommendations.append("This product is high in sugar. Look for options with less added sugar or limit consumption.")
    if food['salt_value'] > 2:
        recommendations.append("This product is high in salt. Try to limit your intake to maintain healthy blood pressure.")
    if food['fiber_value'] < 2:
        recommendations.append("This product is low in fiber. Consider adding high-fiber foods to your diet for digestive health.")
    if food['trans-fat_value'] > 0:
        recommendations.append("This product contains trans fat. It's recommended to avoid or minimize consumption of trans fats.")
    if food['cholesterol_value'] > 60:
        recommendations.append("This product is high in cholesterol. If you have concerns about cholesterol, consult with a healthcare professional.")

    return "\n".join(recommendations)

def get_food_info(barcode):
    food = df[df['code'] == barcode]
    #print(food)
    if food.empty:
        return "Food not found in database"
    
    food = food.iloc[0]
    evaluation = evaluate_food(food)
    food['Category'], food['Score'], food['Reasons'] = evaluation

    # Handle NaN values
    food = food.fillna('N/A')

    recommendations = get_recommendations(food)
    return f"""
    Product: {food['product_name_en'] if food['product_name_en'] != 'N/A' else 'N/A'}
    Brand: {food['brands'] if food['brands'] != 'N/A' else 'N/A'}
    Category: {food['Category']}
    Score: {food['Score']:.2f}
    Reasons: {food['Reasons']}
    
    Nutritional Information (per 100g):
    Energy: {food['energy-kcal_value']} kcal
    Fat: {food['fat_value']}g
    Saturated Fat: {food['saturated-fat_value']}g
    Carbohydrates: {food['carbohydrates_value']}g
    Sugars: {food['sugars_value']}g
    Fiber: {food['fiber_value']}g
    Protein: {food['proteins_value']}g
    Salt: {food['salt_value']}g
    Trans Fat: {food['trans-fat_value']}g
    Cholesterol: {food['cholesterol_value']}mg
    Vitamin A: {food['vitamin-a_value']}µg
    Vitamin C: {food['vitamin-c_value']}mg
    Vitamin D: {food['vitamin-d_value']}µg
    Calcium: {food['calcium_value']}mg
    Iron: {food['iron_value']}mg
    Potassium: {food['potassium_value']}mg
    
    Recommendations:
    {recommendations}
    """

# Example usage
barcode = input('Enter the Barcode Number:')
print(get_food_info(barcode))

Enter the Barcode Number: 90162602



    Product: Redbull
    Brand: Red Bull
    Category: Good
    Score: 3.00
    Reasons: Moderate calorie content, Moderate carbohydrate content, Moderate sugar content, Low salt content, No trans fat
    
    Nutritional Information (per 100g):
    Energy: 46.0 kcal
    Fat: 0.0g
    Saturated Fat: 0.0g
    Carbohydrates: 11.0g
    Sugars: 11.0g
    Fiber: 0.0g
    Protein: 0.0g
    Salt: 0.1g
    Trans Fat: 0.0g
    Cholesterol: 0.0mg
    Vitamin A: 0.0µg
    Vitamin C: 0.0mg
    Vitamin D: 0.0µg
    Calcium: 0.0mg
    Iron: 0.0mg
    Potassium: 0.0mg
    
    Recommendations:
    This product has a good nutritional profile. It can be a healthy part of your diet.
This product is low in fiber. Consider adding high-fiber foods to your diet for digestive health.
    


In [12]:
import pandas as pd
import numpy as np

# Load the data
df = pd.read_csv('cleaned_data5.csv', low_memory=False)

# Define columns for nutritional evaluation
nutrition_columns = [
    'energy-kcal_value', 'fat_value', 'saturated-fat_value', 'carbohydrates_value',
    'sugars_value', 'fiber_value', 'proteins_value', 'salt_value', 'sodium_value',
    'trans-fat_value', 'cholesterol_value', 'vitamin-a_value', 'vitamin-c_value',
    'vitamin-d_value', 'calcium_value', 'iron_value', 'potassium_value'
]

# Convert relevant columns to numeric, replacing any non-numeric values with NaN, then replace NaN with zero
for col in nutrition_columns:
    df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)

# # Drop rows where all nutritional values are zero (optional, if you want to drop rows with no useful data)
# df = df[(df[nutrition_columns] != 0).any(axis=1)]

def evaluate_food(row):
    score = 0
    reasons = []

    # Energy (kcal)
    energy = row['energy-kcal_value']
    if energy > 0:
        if 0 < energy <= 400:
            score += 1
            reasons.append("Moderate calorie content")
        elif energy > 400:
            score -= 1
            reasons.append("High calorie content")

    # Fat
    fat = row['fat_value']
    if fat > 0:
        if 0 < fat <= 10:
            score += 1
            reasons.append("Moderate fat content")
        elif fat > 20:
            score -= 1
            reasons.append("High fat content")

    # Saturated Fat
    saturated_fat = row['saturated-fat_value']
    if saturated_fat > 0:
        if 0 < saturated_fat <= 4:
            score += 1
            reasons.append("Low saturated fat")
        elif saturated_fat > 5:
            score -= 1
            reasons.append("High saturated fat")

    # Carbohydrates
    carbohydrates = row['carbohydrates_value']
    if carbohydrates > 0:
        if 0 < carbohydrates <= 50:
            score += 1
            reasons.append("Moderate carbohydrate content")
        elif carbohydrates > 70:
            score -= 1
            reasons.append("High carbohydrate content")

    # Sugars
    sugars = row['sugars_value']
    if sugars > 0:
        if 0 < sugars <= 10:
            score += 1
            reasons.append("Low sugar content")
        elif sugars > 15:
            score -= 1
            reasons.append("High sugar content")

    # Fiber
    fiber = row['fiber_value']
    if fiber > 0:
        if fiber > 3:
            score += 1
            reasons.append("Good source of fiber")
        elif 0 < fiber <= 2:
            score -= 1
            reasons.append("Low fiber content")

    # Protein
    protein = row['proteins_value']
    if protein > 0:
        if protein > 10:
            score += 1
            reasons.append("Good source of protein")
        elif 0 < protein <= 5:
            score -= 1
            reasons.append("Low protein content")

    # Salt
    salt = row['salt_value']
    if salt > 0:
        if 0 < salt <= 1.5:
            score += 1
            reasons.append("Low salt content")
        elif salt > 2:
            score -= 1
            reasons.append("High salt content")

    # Trans fat
    trans_fat = row['trans-fat_value']
    if trans_fat >= 0:
        if trans_fat == 0:
            score += 1
            reasons.append("No trans fat")
        elif trans_fat > 0:
            score -= 2
            reasons.append("Contains trans fat")

    # Cholesterol
    cholesterol = row['cholesterol_value']
    if cholesterol > 0:
        if 0 < cholesterol <= 20:
            score += 1
            reasons.append("Low cholesterol")
        elif cholesterol > 60:
            score -= 1
            reasons.append("High cholesterol")

    # Vitamins and Minerals
    nutrients = ['vitamin-a_value', 'vitamin-c_value', 'vitamin-d_value', 'calcium_value', 'iron_value', 'potassium_value']
    nutrient_names = ['Vitamin A', 'Vitamin C', 'Vitamin D', 'Calcium', 'Iron', 'Potassium']
    
    for nutrient, name in zip(nutrients, nutrient_names):
        value = row[nutrient]
        if value > 0:
            score += 0.5
            reasons.append(f"Contains {name}")

    # Categorize based on score
    if score > 5:
        category = "Excellent"
    elif score > 2:
        category = "Good"
    elif score > -2:
        category = "Moderate"
    else:
        category = "Poor"

    return pd.Series([category, score, ', '.join(reasons)])

def get_recommendations(food):
    recommendations = []
    
    if food['Category'] == 'Poor':
        recommendations.append("This product has a poor nutritional profile. Consider limiting its consumption.")
    elif food['Category'] == 'Moderate':
        recommendations.append("This product has a moderate nutritional profile. It can be consumed in moderation as part of a balanced diet.")
    elif food['Category'] == 'Good':
        recommendations.append("This product has a good nutritional profile. It can be a healthy part of your diet.")
    else:
        recommendations.append("This product has an excellent nutritional profile. It's a great choice for a healthy diet.")

    # Add specific recommendations
    if food['fat_value'] > 20:
        recommendations.append("This product is high in fat. Consider alternatives with less fat or consume in moderation.")
    if food['sugars_value'] > 15:
        recommendations.append("This product is high in sugar. Look for options with less added sugar or limit consumption.")
    if food['salt_value'] > 2:
        recommendations.append("This product is high in salt. Try to limit your intake to maintain healthy blood pressure.")
    if food['fiber_value'] < 2:
        recommendations.append("This product is low in fiber. Consider adding high-fiber foods to your diet for digestive health.")
    if food['trans-fat_value'] > 0:
        recommendations.append("This product contains trans fat. It's recommended to avoid or minimize consumption of trans fats.")
    if food['cholesterol_value'] > 60:
        recommendations.append("This product is high in cholesterol. If you have concerns about cholesterol, consult with a healthcare professional.")

    return "\n".join(recommendations)

def get_food_info(barcode):
    food = df[df['code'] == barcode]
    if food.empty:
        return "Food not found in database"
    
    food = food.iloc[0]
    evaluation = evaluate_food(food)
    food['Category'], food['Score'], food['Reasons'] = evaluation

    # Handle NaN values
    food = food.fillna('N/A')

    recommendations = get_recommendations(food)
    return f"""
    Product: {food['product_name_en'] if food['product_name_en'] != 'N/A' else 'N/A'}
    Brand: {food['brands'] if food['brands'] != 'N/A' else 'N/A'}
    Category: {food['Category']}
    Score: {food['Score']:.2f}
    Reasons: {food['Reasons']}
    
    Nutritional Information (per 100g):
    Energy: {food['energy-kcal_value']} kcal
    Fat: {food['fat_value']}g
    Saturated Fat: {food['saturated-fat_value']}g
    Carbohydrates: {food['carbohydrates_value']}g
    Sugars: {food['sugars_value']}g
    Fiber: {food['fiber_value']}g
    Protein: {food['proteins_value']}g
    Salt: {food['salt_value']}g
    Trans Fat: {food['trans-fat_value']}g
    Cholesterol: {food['cholesterol_value']}mg
    Vitamin A: {food['vitamin-a_value']}µg
    Vitamin C: {food['vitamin-c_value']}mg
    Vitamin D: {food['vitamin-d_value']}µg
    Calcium: {food['calcium_value']}mg
    Iron: {food['iron_value']}mg
    Potassium: {food['potassium_value']}mg
    
    Recommendations:
    {recommendations}
    """

# Example usage
print(get_food_info('90162602'))



    Product: Redbull
    Brand: Red Bull
    Category: Good
    Score: 4.00
    Reasons: Moderate calorie content, Moderate carbohydrate content, Low salt content, No trans fat
    
    Nutritional Information (per 100g):
    Energy: 46.0 kcal
    Fat: 0.0g
    Saturated Fat: 0.0g
    Carbohydrates: 11.0g
    Sugars: 11.0g
    Fiber: 0.0g
    Protein: 0.0g
    Salt: 0.1g
    Trans Fat: 0.0g
    Cholesterol: 0.0mg
    Vitamin A: 0.0µg
    Vitamin C: 0.0mg
    Vitamin D: 0.0µg
    Calcium: 0.0mg
    Iron: 0.0mg
    Potassium: 0.0mg
    
    Recommendations:
    This product has a good nutritional profile. It can be a healthy part of your diet.
This product is low in fiber. Consider adding high-fiber foods to your diet for digestive health.
    


In [13]:
df.columns.tolist()

['code',
 'product_name_en',
 'generic_name_en',
 'quantity',
 'serving_size',
 'brands',
 'categories',
 'labels',
 'countries',
 'origins',
 'ingredients_text_en',
 'allergens',
 'no_nutrition_data',
 'nutrition_data_per',
 'nutrition_data_prepared_per',
 'energy-kj_value',
 'energy-kj_unit',
 'energy-kcal_value',
 'energy-kcal_unit',
 'fat_value',
 'fat_unit',
 'saturated-fat_value',
 'saturated-fat_unit',
 'carbohydrates_value',
 'carbohydrates_unit',
 'sugars_value',
 'sugars_unit',
 'fiber_value',
 'fiber_unit',
 'proteins_value',
 'proteins_unit',
 'salt_value',
 'salt_unit',
 'sodium_value',
 'sodium_unit',
 'alcohol_value',
 'alcohol_unit',
 'energy_value',
 'energy_unit',
 'energy-from-fat_value',
 'energy-from-fat_unit',
 'myristic-acid_value',
 'myristic-acid_unit',
 'stearic-acid_value',
 'stearic-acid_unit',
 'arachidic-acid_value',
 'arachidic-acid_unit',
 'unsaturated-fat_value',
 'unsaturated-fat_unit',
 'monounsaturated-fat_value',
 'monounsaturated-fat_unit',
 'polyu

In [6]:
df.shape

(5574, 231)

In [8]:
df[ 'trans-fat_prepared_value'].isnull().sum()

5570