# Exploratory Data Analysis (EDA) - Recipe Recommender
This notebook explores the recipes and reviews datasets to understand the distributions and patterns.

In [None]:
import polars as pl
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys

# Add project root to path
sys.path.append('..')

from src.data_loader import DataLoader

sns.set_theme(style="whitegrid")

In [None]:
loader = DataLoader(data_dir="../data/raw")
recipes = loader.load_recipes()
reviews = loader.load_reviews()

print(f"Recipes shape: {recipes.shape}")
print(f"Reviews shape: {reviews.shape}")

In [None]:
print("\n--- Recipes Schema ---")
print(recipes.schema)

print("\n--- Sample Recipes ---")
print(recipes.head())

if "RecipeCategory" in recipes.columns:
    plt.figure(figsize=(12, 6))
    category_counts = recipes.group_by("RecipeCategory").len().sort("len", descending=True).head(15)
    sns.barplot(data=category_counts.to_pandas(), x="len", y="RecipeCategory")
    plt.title("Top 15 Recipe Categories")
    plt.show()

In [None]:
print("\n--- Ratings Distribution ---")
rating_dist = reviews.group_by("Rating").len().sort("Rating")
print(rating_dist)

plt.figure(figsize=(8, 5))
sns.barplot(data=rating_dist.to_pandas(), x="Rating", y="len")
plt.title("Distribution of Ratings")
plt.show()