# Exploring the Nutritional Psychiatry Dataset

This notebook demonstrates how to load, analyze, and visualize data from the Nutritional Psychiatry Dataset.

In [None]:
# Import required libraries
import os
import json
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set plotting style
plt.style.use('ggplot')
sns.set_palette("viridis")
sns.set_context("notebook")

%matplotlib inline

## 1. Loading the Dataset

First, let's load the dataset from the JSON files.

In [None]:
def load_food_data(directory):
    """Load all food data files from a directory into a list of dictionaries."""
    food_data = []
    
    files = glob.glob(os.path.join(directory, "*.json"))
    print(f"Found {len(files)} food data files")
    
    for file_path in files:
        try:
            with open(file_path, 'r') as f:
                data = json.load(f)
                food_data.append(data)
        except Exception as e:
            print(f"Error loading {file_path}: {e}")
    
    return food_data

# Update this path to your data directory
data_directory = "../../../data/enriched/ai_generated"
foods = load_food_data(data_directory)

## 2. Basic Dataset Exploration

In [None]:
# Basic statistics about the dataset
print(f"Total number of foods: {len(foods)}")

# Count foods by category
categories = {}
for food in foods:
    category = food.get("category", "Unknown")
    categories[category] = categories.get(category, 0) + 1

# Display category counts
categories_df = pd.DataFrame(list(categories.items()), columns=["Category", "Count"])
categories_df.sort_values("Count", ascending=False, inplace=True)
categories_df

In [None]:
# Visualize food categories
plt.figure(figsize=(10, 6))
ax = sns.barplot(x="Count", y="Category", data=categories_df)
plt.title("Number of Foods by Category")
plt.tight_layout()
plt.show()

## 3. Extracting Nutrients for Analysis

Now let's extract specific nutrients from the dataset and create a DataFrame for easier analysis.

In [None]:
def extract_nutrients(foods):
    """Extract key nutrients from the food data into a pandas DataFrame."""
    data = []
    
    for food in foods:
        # Basic food info
        food_info = {
            "food_id": food.get("food_id", ""),
            "name": food.get("name", ""),
            "category": food.get("category", ""),
        }
        
        # Extract standard nutrients
        std_nutrients = food.get("standard_nutrients", {})
        for key in ["calories", "protein_g", "carbohydrates_g", "fat_g", "fiber_g", "sugars_g"]:
            food_info[key] = std_nutrients.get(key, None)
        
        # Extract brain nutrients
        brain_nutrients = food.get("brain_nutrients", {})
        for key in ["tryptophan_mg", "tyrosine_mg", "vitamin_b6_mg", "folate_mcg", 
                    "vitamin_b12_mcg", "vitamin_d_mcg", "magnesium_mg", "zinc_mg", "iron_mg", "selenium_mcg"]:
            food_info[key] = brain_nutrients.get(key, None)
        
        # Extract omega-3 data
        omega3 = brain_nutrients.get("omega3", {})
        food_info["omega3_total_g"] = omega3.get("total_g", None)
        food_info["omega3_epa_mg"] = omega3.get("epa_mg", None)
        food_info["omega3_dha_mg"] = omega3.get("dha_mg", None)
        food_info["omega3_ala_mg"] = omega3.get("ala_mg", None)
        
        # Extract bioactive compounds
        bioactive = food.get("bioactive_compounds", {})
        for key in ["polyphenols_mg", "flavonoids_mg", "anthocyanins_mg", "carotenoids_mg"]:
            food_info[key] = bioactive.get(key, None)
        
        data.append(food_info)
    
    return pd.DataFrame(data)

# Create DataFrame
nutrients_df = extract_nutrients(foods)
nutrients_df.head()

In [None]:
# Basic summary statistics
nutrients_df.describe()

## 4. Analyzing Brain Nutrients

Let's look at the distribution of key brain nutrients across food categories.

In [None]:
# Calculate mean tryptophan by category
tryptophan_by_category = nutrients_df.groupby('category')['tryptophan_mg'].mean().sort_values(ascending=False)

plt.figure(figsize=(12, 6))
ax = sns.barplot(x=tryptophan_by_category.index, y=tryptophan_by_category.values)
plt.title("Average Tryptophan Content by Food Category")
plt.xlabel("Food Category")
plt.ylabel("Tryptophan (mg per 100g)")
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
# Let's look at omega-3 distribution
plt.figure(figsize=(12, 6))

# Filter out entries with null values
omega3_data = nutrients_df.dropna(subset=['omega3_total_g'])

# Get top 15 foods by omega-3 content
top_omega3 = omega3_data.sort_values('omega3_total_g', ascending=False).head(15)

# Plot
ax = sns.barplot(x='name', y='omega3_total_g', data=top_omega3)
plt.title("Top 15 Foods by Omega-3 Content")
plt.xlabel("Food")
plt.ylabel("Omega-3 (g per 100g)")
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

## 5. Exploring Mental Health Impacts

Now let's analyze the mental health impacts associated with different foods.

In [None]:
def extract_mental_health_impacts(foods):
    """Extract mental health impacts into a DataFrame for analysis."""
    impacts = []
    
    for food in foods:
        food_name = food.get("name", "")
        food_category = food.get("category", "")
        food_impacts = food.get("mental_health_impacts", [])
        
        for impact in food_impacts:
            impact_data = {
                "food_name": food_name,
                "food_category": food_category,
                "impact_type": impact.get("impact_type", ""),
                "direction": impact.get("direction", ""),
                "strength": impact.get("strength", 0),
                "confidence": impact.get("confidence", 0),
                "time_to_effect": impact.get("time_to_effect", ""),
                "research_context": impact.get("research_context", ""),
                "research_support_count": len(impact.get("research_support", []))
            }
            impacts.append(impact_data)
    
    return pd.DataFrame(impacts)

# Create impacts DataFrame
impacts_df = extract_mental_health_impacts(foods)
impacts_df.head()

In [None]:
# Count by impact type
impact_counts = impacts_df["impact_type"].value_counts()

plt.figure(figsize=(12, 6))
sns.barplot(x=impact_counts.index, y=impact_counts.values)
plt.title("Types of Mental Health Impacts in the Dataset")
plt.xlabel("Impact Type")
plt.ylabel("Count")
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
# Plot strength vs confidence for impacts
plt.figure(figsize=(10, 8))
sns.scatterplot(x="confidence", y="strength", hue="impact_type", 
                size="research_support_count", sizes=(20, 200),
                data=impacts_df)

plt.title("Strength vs. Confidence of Mental Health Impacts")
plt.xlabel("Confidence Rating (1-10)")
plt.ylabel("Strength of Effect (1-10)")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True)
plt.tight_layout()
plt.show()

## 6. Finding Foods for Specific Mental Health Goals

Now let's create a function to identify foods that might help with specific mental health goals.

In [None]:
def find_foods_for_mental_health_goal(impact_type, direction="positive", min_strength=5, min_confidence=5):
    """Find foods with specific mental health impacts."""
    # Filter impacts
    filtered_impacts = impacts_df[
        (impacts_df["impact_type"] == impact_type) &
        (impacts_df["direction"] == direction) &
        (impacts_df["strength"] >= min_strength) &
        (impacts_df["confidence"] >= min_confidence)
    ].sort_values(["strength", "confidence"], ascending=False)
    
    return filtered_impacts[["food_name", "food_category", "strength", "confidence", "time_to_effect"]]

# Example: Foods for anxiety reduction
anxiety_reduction_foods = find_foods_for_mental_health_goal("anxiety_reduction")
anxiety_reduction_foods.head(10)

In [None]:
# Example: Foods for cognitive enhancement
cognitive_enhancement_foods = find_foods_for_mental_health_goal("cognitive_enhancement")
cognitive_enhancement_foods.head(10)

## 7. Correlations Between Nutrients and Mental Health Impacts

Let's explore correlations between specific nutrients and mental health impacts.

In [None]:
# First, create a combined dataset of nutrients and top impacts
def create_impact_nutrients_dataset():
    combined_data = []
    
    # Loop through foods
    for food in foods:
        food_name = food.get("name", "")
        
        # Get key nutrients
        std_nutrients = food.get("standard_nutrients", {})
        brain_nutrients = food.get("brain_nutrients", {})
        bioactive = food.get("bioactive_compounds", {})
        omega3 = brain_nutrients.get("omega3", {})
        
        # Get top impact (by strength × confidence) for each impact type
        impacts = food.get("mental_health_impacts", [])
        
        # Skip foods without impacts
        if not impacts:
            continue
            
        # Calculate impact scores (strength × confidence)
        for impact in impacts:
            impact_score = impact.get("strength", 0) * impact.get("confidence", 0)
            impact["score"] = impact_score
        
        # Find top impact by score
        top_impact = max(impacts, key=lambda x: x.get("score", 0))
        
        # Create a record
        record = {
            "food_name": food_name,
            "category": food.get("category", ""),
            
            # Nutrients
            "tryptophan_mg": brain_nutrients.get("tryptophan_mg", None),
            "omega3_total_g": omega3.get("total_g", None),
            "vitamin_b12_mcg": brain_nutrients.get("vitamin_b12_mcg", None),
            "magnesium_mg": brain_nutrients.get("magnesium_mg", None),
            "polyphenols_mg": bioactive.get("polyphenols_mg", None),
            "fiber_g": std_nutrients.get("fiber_g", None),
            
            # Impact
            "impact_type": top_impact.get("impact_type", ""),
            "impact_direction": top_impact.get("direction", ""),
            "impact_strength": top_impact.get("strength", 0),
            "impact_confidence": top_impact.get("confidence", 0),
            "impact_score": top_impact.get("score", 0)
        }
        
        combined_data.append(record)
    
    return pd.DataFrame(combined_data)

# Create combined dataset
combined_df = create_impact_nutrients_dataset()
combined_df.head()

In [None]:
# Calculate correlations between nutrients and impact strength for positive impacts
positive_impacts = combined_df[combined_df["impact_direction"] == "positive"]

# Get correlation matrix for key nutrients and impact strength
nutrient_columns = ["tryptophan_mg", "omega3_total_g", "vitamin_b12_mcg", "magnesium_mg", "polyphenols_mg", "fiber_g"]
correlations = positive_impacts[nutrient_columns + ["impact_strength"]].corr()

# Plot correlation heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(correlations, annot=True, cmap="coolwarm", vmin=-1, vmax=1)
plt.title("Correlations Between Nutrients and Positive Impact Strength")
plt.tight_layout()
plt.show()

## 8. Creating a Food Recommendation Engine

Finally, let's create a simple recommendation engine to suggest foods for specific health goals.

In [None]:
def recommend_foods(goal, top_n=5):
    """Recommend foods for a specific mental health goal."""
    # Map goals to impact types
    goal_mapping = {
        "mood": "mood_elevation",
        "anxiety": "anxiety_reduction",
        "cognition": "cognitive_enhancement",
        "energy": "energy_increase",
        "stress": "stress_reduction",
        "sleep": "sleep_improvement"
    }
    
    impact_type = goal_mapping.get(goal.lower())
    if not impact_type:
        print(f"Unknown goal: {goal}. Available goals: {', '.join(goal_mapping.keys())}")
        return None
    
    # Find foods with the desired impact
    recommendations = find_foods_for_mental_health_goal(
        impact_type=impact_type,
        direction="positive",
        min_strength=6,
        min_confidence=5
    ).head(top_n)
    
    return recommendations

# Example: Recommend foods for anxiety reduction
anxiety_recommendations = recommend_foods("anxiety", top_n=10)
anxiety_recommendations

In [None]:
# Visualize recommendations
def plot_recommendations(recommendations, goal):
    plt.figure(figsize=(12, 6))
    
    # Create a compound score (strength × confidence)
    recommendations["score"] = recommendations["strength"] * recommendations["confidence"]
    
    # Sort by score
    recommendations = recommendations.sort_values("score", ascending=False)
    
    # Plot
    ax = sns.barplot(x="food_name", y="score", data=recommendations, hue="food_category")
    plt.title(f"Top Food Recommendations for {goal.title()}")
    plt.xlabel("Food")
    plt.ylabel("Recommendation Score (Strength × Confidence)")
    plt.xticks(rotation=45, ha='right')
    plt.legend(title="Food Category")
    plt.tight_layout()
    plt.show()

# Plot anxiety recommendations
plot_recommendations(anxiety_recommendations, "anxiety")

In [None]:
# Example: Recommend foods for cognitive enhancement
cognition_recommendations = recommend_foods("cognition")
plot_recommendations(cognition_recommendations, "cognition")

## 9. Conclusion

In this notebook, we've explored the Nutritional Psychiatry Dataset by:

1. Loading and examining basic dataset statistics
2. Analyzing brain-specific nutrients across food categories
3. Exploring mental health impacts and their distribution
4. Finding correlations between nutrients and mental health effects
5. Creating a simple food recommendation engine

This demonstrates the potential of the dataset for various applications in nutritional psychiatry, dietary planning, and wellness applications.