# 📊 Analyse Statistique Long-terme des Recettes

## 🎯 Objectif
Analyser l'évolution statistique des recettes (2000-2018) avec tests appropriés :

| Analyse | Type | Variable(s) | Métriques / Tests | Interprétation |
|---------|------|-------------|-------------------|----------------|
| 📈 Durée moyenne | Univarié temporel | minutes, year | OLS, Theil-Sen | Recettes plus rapides ? |
| 🔪 Complexité | Bivarié temporel | complexity_score ~ year | Régression, Spearman | Complexité croissante ? |
| 🥗 Nutrition | Multivarié | calories, protein, sugar | ANOVA, Kruskal-Wallis | Profil nutritionnel meilleur ? |
| 🧂 Ingrédients-clés | Bivarié | % butter/olive oil | Spearman, χ² | Changements culturels ? |
| 📊 Tags | Catégoriel | vegan, dessert, quick | χ² contingence | Évolution des types ? |

In [15]:
# 📦 Imports
import sys
sys.path.append('..')

from _data_utils import *

import pandas as pd
import polars as pl
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Stats
from scipy import stats
from scipy.stats import spearmanr, kendalltau, kruskal, chi2_contingency
from sklearn.linear_model import LinearRegression, TheilSenRegressor
from statsmodels.stats.anova import anova_lm
from statsmodels.formula.api import ols

import warnings
warnings.filterwarnings('ignore')

# 🎨 Config graphiques
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 6)
plt.rcParams['font.size'] = 10

print("✅ Imports réussis!")

ModuleNotFoundError: No module named 'statsmodels'

## 📥 Chargement des données

Utilisation du **nouveau module refactorisé** avec pipeline complet.

In [None]:
# 🚀 Pipeline complet : charge + nettoie + enrichit
df = load_clean_recipes()

print(f"\n✅ Dataset prêt : {df.shape[0]:,} lignes × {df.shape[1]} colonnes")
print(f"📅 Période : {df['year'].min()} - {df['year'].max()}")

# Aperçu
show_recipes_sample(df, n=3)

In [None]:
# 📊 Rapport de qualité
report = analyze_recipe_quality(df)

---

# 📈 ANALYSE 1 : Évolution de la durée moyenne (minutes)

**Type:** Univarié temporel  
**Variable:** `minutes ~ year`  
**Tests:** 
- Régression linéaire OLS (Ordinary Least Squares)
- Régression robuste Theil-Sen
- Corrélation de Spearman

**Question:** Les recettes deviennent-elles plus rapides à préparer avec le temps ?

In [None]:
# 📊 Agrégation par année
minutes_by_year = (
    df.group_by("year")
    .agg([
        pl.mean("minutes").alias("mean_minutes"),
        pl.median("minutes").alias("median_minutes"),
        pl.quantile("minutes", 0.25).alias("q25"),
        pl.quantile("minutes", 0.75).alias("q75"),
        pl.len().alias("n_recipes")
    ])
    .sort("year")
    .to_pandas()
)

minutes_by_year['IQR'] = minutes_by_year['q75'] - minutes_by_year['q25']

print("📊 Durée moyenne par année :")
print(minutes_by_year.head(10))

In [None]:
# 📈 Visualisation
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# 1. Évolution de la moyenne avec IC
ax1 = axes[0]
ax1.plot(minutes_by_year['year'], minutes_by_year['mean_minutes'], 
         marker='o', linewidth=2.5, color='steelblue', label='Moyenne')
ax1.plot(minutes_by_year['year'], minutes_by_year['median_minutes'], 
         marker='s', linewidth=2, color='coral', alpha=0.7, label='Médiane')
ax1.fill_between(minutes_by_year['year'], 
                  minutes_by_year['q25'], 
                  minutes_by_year['q75'], 
                  alpha=0.2, color='steelblue', label='IQR (Q25-Q75)')
ax1.set_title('📈 Évolution de la durée moyenne (minutes)', fontsize=14, fontweight='bold')
ax1.set_xlabel('Année', fontsize=12)
ax1.set_ylabel('Minutes', fontsize=12)
ax1.legend()
ax1.grid(True, alpha=0.3)

# 2. Distribution (boxplot)
ax2 = axes[1]
years_sample = sorted(df['year'].unique())[::-5]  # Tous les 5 ans
df_sample = df.filter(pl.col('year').is_in(years_sample)).to_pandas()
sns.boxplot(data=df_sample, x='year', y='minutes', ax=ax2, palette='Set2')
ax2.set_title('📦 Distribution de la durée par année (échantillon)', fontsize=14, fontweight='bold')
ax2.set_xlabel('Année', fontsize=12)
ax2.set_ylabel('Minutes', fontsize=12)
ax2.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

In [None]:
# 🔬 Tests statistiques
print("="*70)
print("🔬 TESTS STATISTIQUES - Durée moyenne (minutes)")
print("="*70)

X = minutes_by_year[['year']].values
y = minutes_by_year['mean_minutes'].values

# 1. Régression linéaire OLS
model_ols = LinearRegression()
model_ols.fit(X, y)
r2_ols = model_ols.score(X, y)
slope_ols = model_ols.coef_[0]
intercept_ols = model_ols.intercept_

print(f"\n📊 Régression linéaire OLS :")
print(f"   • Pente (slope) : {slope_ols:.4f} minutes/an")
print(f"   • R² : {r2_ols:.4f}")
print(f"   • Équation : minutes = {slope_ols:.4f} × year + {intercept_ols:.2f}")

# 2. Régression robuste Theil-Sen
model_theil = TheilSenRegressor(random_state=42)
model_theil.fit(X, y)
slope_theil = model_theil.coef_[0]
intercept_theil = model_theil.intercept_

print(f"\n📊 Régression Theil-Sen (robuste) :")
print(f"   • Pente (slope) : {slope_theil:.4f} minutes/an")
print(f"   • Intercept : {intercept_theil:.2f}")

# 3. Corrélation de Spearman
corr_spearman, p_spearman = spearmanr(minutes_by_year['year'], minutes_by_year['mean_minutes'])
print(f"\n📊 Corrélation de Spearman :")
print(f"   • ρ (rho) : {corr_spearman:.4f}")
print(f"   • p-value : {p_spearman:.6f}")
print(f"   • Significatif : {'✅ OUI' if p_spearman < 0.05 else '❌ NON'} (α=0.05)")

# 4. Kendall Tau
corr_kendall, p_kendall = kendalltau(minutes_by_year['year'], minutes_by_year['mean_minutes'])
print(f"\n📊 Corrélation de Kendall Tau :")
print(f"   • τ (tau) : {corr_kendall:.4f}")
print(f"   • p-value : {p_kendall:.6f}")

# Interprétation
print(f"\n💡 INTERPRÉTATION :")
if slope_ols < 0:
    print(f"   ✅ Les recettes deviennent PLUS RAPIDES : {abs(slope_ols):.2f} min/an en moyenne")
elif slope_ols > 0:
    print(f"   ⚠️ Les recettes deviennent PLUS LONGUES : +{slope_ols:.2f} min/an en moyenne")
else:
    print(f"   ➡️ Pas de tendance claire")

print("="*70)

---

# 🔪 ANALYSE 2 : Évolution de la complexité

**Type:** Bivarié temporel  
**Variable:** `complexity_score ~ year`  
**Tests:**
- Régression linéaire
- Corrélation de Spearman
- Test de tendance de Kendall Tau

**Question:** La complexité des recettes augmente-t-elle avec le temps ?

In [None]:
# 📊 Agrégation de la complexité par année
complexity_by_year = (
    df.group_by("year")
    .agg([
        pl.mean("complexity_score").alias("mean_complexity"),
        pl.median("complexity_score").alias("median_complexity"),
        pl.std("complexity_score").alias("std_complexity"),
        pl.mean("n_steps").alias("mean_steps"),
        pl.mean("n_ingredients").alias("mean_ingredients")
    ])
    .sort("year")
    .to_pandas()
)

print("🔪 Complexité moyenne par année :")
print(complexity_by_year.head(10))

In [None]:
# 📈 Visualisation
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# 1. Score de complexité
axes[0].plot(complexity_by_year['year'], complexity_by_year['mean_complexity'], 
             marker='o', linewidth=2.5, color='purple', markersize=8)
axes[0].fill_between(complexity_by_year['year'], 
                      complexity_by_year['mean_complexity'] - complexity_by_year['std_complexity'],
                      complexity_by_year['mean_complexity'] + complexity_by_year['std_complexity'],
                      alpha=0.2, color='purple')
axes[0].set_title('🔪 Évolution du score de complexité', fontsize=12, fontweight='bold')
axes[0].set_xlabel('Année')
axes[0].set_ylabel('Complexity Score')
axes[0].grid(True, alpha=0.3)

# 2. Nombre d'étapes
axes[1].plot(complexity_by_year['year'], complexity_by_year['mean_steps'], 
             marker='s', linewidth=2, color='orange', markersize=7)
axes[1].set_title('📝 Nombre moyen d\'étapes', fontsize=12, fontweight='bold')
axes[1].set_xlabel('Année')
axes[1].set_ylabel('Nombre d\'étapes')
axes[1].grid(True, alpha=0.3)

# 3. Nombre d'ingrédients
axes[2].plot(complexity_by_year['year'], complexity_by_year['mean_ingredients'], 
             marker='^', linewidth=2, color='forestgreen', markersize=7)
axes[2].set_title('🥕 Nombre moyen d\'ingrédients', fontsize=12, fontweight='bold')
axes[2].set_xlabel('Année')
axes[2].set_ylabel('Nombre d\'ingrédients')
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# 🔬 Tests statistiques
print("="*70)
print("🔬 TESTS STATISTIQUES - Complexité")
print("="*70)

X = complexity_by_year[['year']].values
y = complexity_by_year['mean_complexity'].values

# Régression linéaire
model = LinearRegression()
model.fit(X, y)
r2 = model.score(X, y)
slope = model.coef_[0]

print(f"\n📊 Régression linéaire (complexity ~ year) :")
print(f"   • Pente : {slope:.6f} points/an")
print(f"   • R² : {r2:.4f}")

# Corrélation de Spearman
corr, p_value = spearmanr(complexity_by_year['year'], complexity_by_year['mean_complexity'])
print(f"\n📊 Corrélation de Spearman :")
print(f"   • ρ : {corr:.4f}")
print(f"   • p-value : {p_value:.6f}")
print(f"   • Significatif : {'✅ OUI' if p_value < 0.05 else '❌ NON'}")

# Kendall Tau
tau, p_tau = kendalltau(complexity_by_year['year'], complexity_by_year['mean_complexity'])
print(f"\n📊 Kendall Tau :")
print(f"   • τ : {tau:.4f}")
print(f"   • p-value : {p_tau:.6f}")

# Interprétation
print(f"\n💡 INTERPRÉTATION :")
if slope > 0.01:
    print(f"   ✅ La complexité AUGMENTE : +{slope:.4f} points/an")
elif slope < -0.01:
    print(f"   ✅ La complexité DIMINUE : {slope:.4f} points/an")
else:
    print(f"   ➡️ Complexité STABLE dans le temps")

print("="*70)

---

# 🥗 ANALYSE 3 : Évolution nutritionnelle

**Type:** Multivarié  
**Variables:** `calories, protein_pct, sugar_pct, total_fat_pct ~ year`  
**Tests:**
- ANOVA à un facteur (F-test)
- Test de Kruskal-Wallis (non paramétrique)

**Question:** Le profil nutritionnel des recettes s'améliore-t-il ?

In [None]:
# 📊 Agrégation nutrition par année
nutrition_by_year = (
    df.group_by("year")
    .agg([
        pl.mean("calories").alias("mean_calories"),
        pl.mean("protein_pct").alias("mean_protein"),
        pl.mean("sugar_pct").alias("mean_sugar"),
        pl.mean("total_fat_pct").alias("mean_fat"),
        pl.mean("sodium_pct").alias("mean_sodium")
    ])
    .sort("year")
    .to_pandas()
)

print("🥗 Nutrition moyenne par année :")
print(nutrition_by_year)

In [None]:
# 📈 Visualisation
fig, axes = plt.subplots(2, 2, figsize=(16, 10))

# 1. Calories
axes[0,0].plot(nutrition_by_year['year'], nutrition_by_year['mean_calories'], 
               marker='o', linewidth=2.5, color='tomato', markersize=8)
axes[0,0].set_title('🔥 Calories moyennes', fontsize=12, fontweight='bold')
axes[0,0].set_ylabel('Calories')
axes[0,0].grid(True, alpha=0.3)

# 2. Protéines
axes[0,1].plot(nutrition_by_year['year'], nutrition_by_year['mean_protein'], 
               marker='s', linewidth=2.5, color='green', markersize=8)
axes[0,1].set_title('💪 Protéines (%)', fontsize=12, fontweight='bold')
axes[0,1].set_ylabel('Protein %')
axes[0,1].grid(True, alpha=0.3)

# 3. Sucres
axes[1,0].plot(nutrition_by_year['year'], nutrition_by_year['mean_sugar'], 
               marker='^', linewidth=2.5, color='pink', markersize=8)
axes[1,0].set_title('🍭 Sucres (%)', fontsize=12, fontweight='bold')
axes[1,0].set_xlabel('Année')
axes[1,0].set_ylabel('Sugar %')
axes[1,0].grid(True, alpha=0.3)

# 4. Graisses
axes[1,1].plot(nutrition_by_year['year'], nutrition_by_year['mean_fat'], 
               marker='D', linewidth=2.5, color='orange', markersize=7)
axes[1,1].set_title('🧈 Graisses totales (%)', fontsize=12, fontweight='bold')
axes[1,1].set_xlabel('Année')
axes[1,1].set_ylabel('Total Fat %')
axes[1,1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# 🔬 Tests statistiques - ANOVA et Kruskal-Wallis
print("="*70)
print("🔬 TESTS STATISTIQUES - Évolution nutritionnelle")
print("="*70)

# Préparer les données pour les tests
df_pd = df.select(['year', 'calories', 'protein_pct', 'sugar_pct', 'total_fat_pct']).to_pandas()

# Grouper par période (début vs fin)
df_pd['period'] = pd.cut(df_pd['year'], bins=[1999, 2008, 2018], labels=['2000-2008', '2009-2018'])

variables = ['calories', 'protein_pct', 'sugar_pct', 'total_fat_pct']
labels = ['Calories', 'Protéines (%)', 'Sucres (%)', 'Graisses (%)']

for var, label in zip(variables, labels):
    print(f"\n📊 Variable : {label}")
    print("-" * 50)
    
    # Séparer les groupes
    groups = [group[var].dropna() for name, group in df_pd.groupby('period')]
    
    # Test de Kruskal-Wallis (non paramétrique)
    if len(groups) >= 2:
        h_stat, p_kruskal = kruskal(*groups)
        print(f"   • Kruskal-Wallis H : {h_stat:.4f}")
        print(f"   • p-value : {p_kruskal:.6f}")
        print(f"   • Différence significative : {'✅ OUI' if p_kruskal < 0.05 else '❌ NON'}")
        
        # Moyennes par période
        for name, group in df_pd.groupby('period'):
            mean_val = group[var].mean()
            print(f"   • Moyenne {name} : {mean_val:.2f}")

print("\n" + "="*70)

---

# 🧂 ANALYSE 4 : Popularité des ingrédients-clés

**Type:** Bivarié catégoriel-temporel  
**Variables:** % recettes avec "butter", "olive oil", "garlic" par année  
**Tests:**
- Corrélation de Spearman
- Test χ² sur proportions

**Question:** Y a-t-il des changements culturels (beurre ↘, huile d'olive ↗) ?

In [None]:
# 🔍 Détection d'ingrédients clés dans la liste ingredients
# Note: La colonne ingredients est maintenant une liste Polars

# Fonction helper pour détecter un ingrédient
def has_ingredient(ingredient_name: str) -> pl.Expr:
    """Retourne une expression Polars pour détecter un ingrédient."""
    return (
        pl.col("ingredients")
        .list.eval(pl.element().str.to_lowercase().str.contains(ingredient_name))
        .list.any()
    )

# Ajouter les indicateurs
df_ingredients = df.with_columns([
    has_ingredient("butter").alias("has_butter"),
    has_ingredient("olive oil").alias("has_olive_oil"),
    has_ingredient("garlic").alias("has_garlic"),
    has_ingredient("chicken").alias("has_chicken"),
    has_ingredient("sugar").alias("has_sugar")
])

print("✅ Indicateurs d'ingrédients créés")

In [None]:
# 📊 Calculer le % de recettes par ingrédient par année
ingredients_by_year = (
    df_ingredients.group_by("year")
    .agg([
        (pl.col("has_butter").sum() / pl.len() * 100).alias("pct_butter"),
        (pl.col("has_olive_oil").sum() / pl.len() * 100).alias("pct_olive_oil"),
        (pl.col("has_garlic").sum() / pl.len() * 100).alias("pct_garlic"),
        (pl.col("has_chicken").sum() / pl.len() * 100).alias("pct_chicken"),
        (pl.col("has_sugar").sum() / pl.len() * 100).alias("pct_sugar"),
        pl.len().alias("n_recipes")
    ])
    .sort("year")
    .to_pandas()
)

print("🧂 % recettes par ingrédient-clé :")
print(ingredients_by_year)

In [None]:
# 📈 Visualisation
fig, ax = plt.subplots(figsize=(14, 7))

ax.plot(ingredients_by_year['year'], ingredients_by_year['pct_butter'], 
        marker='o', linewidth=2.5, label='🧈 Butter', color='gold')
ax.plot(ingredients_by_year['year'], ingredients_by_year['pct_olive_oil'], 
        marker='s', linewidth=2.5, label='🫒 Olive Oil', color='olive')
ax.plot(ingredients_by_year['year'], ingredients_by_year['pct_garlic'], 
        marker='^', linewidth=2.5, label='🧄 Garlic', color='purple')
ax.plot(ingredients_by_year['year'], ingredients_by_year['pct_chicken'], 
        marker='D', linewidth=2, label='🐔 Chicken', color='orange')
ax.plot(ingredients_by_year['year'], ingredients_by_year['pct_sugar'], 
        marker='v', linewidth=2, label='🍬 Sugar', color='pink')

ax.set_title('🧂 Évolution de la popularité des ingrédients-clés', fontsize=14, fontweight='bold')
ax.set_xlabel('Année', fontsize=12)
ax.set_ylabel('% de recettes contenant l\'ingrédient', fontsize=12)
ax.legend(loc='best', fontsize=11)
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# 🔬 Tests statistiques - Corrélations
print("="*70)
print("🔬 TESTS STATISTIQUES - Popularité des ingrédients")
print("="*70)

ingredients_list = [
    ('pct_butter', '🧈 Butter'),
    ('pct_olive_oil', '🫒 Olive Oil'),
    ('pct_garlic', '🧄 Garlic'),
    ('pct_chicken', '🐔 Chicken'),
    ('pct_sugar', '🍬 Sugar')
]

for col, label in ingredients_list:
    corr, p_value = spearmanr(ingredients_by_year['year'], ingredients_by_year[col])
    
    print(f"\n{label} :")
    print(f"   • Corrélation de Spearman (ρ) : {corr:+.4f}")
    print(f"   • p-value : {p_value:.6f}")
    print(f"   • Significatif : {'✅ OUI' if p_value < 0.05 else '❌ NON'}")
    
    # Tendance
    first_val = ingredients_by_year[col].iloc[0]
    last_val = ingredients_by_year[col].iloc[-1]
    change = last_val - first_val
    
    if change > 1:
        trend = f"📈 HAUSSE : +{change:.1f}% ({first_val:.1f}% → {last_val:.1f}%)"
    elif change < -1:
        trend = f"📉 BAISSE : {change:.1f}% ({first_val:.1f}% → {last_val:.1f}%)"
    else:
        trend = f"➡️ STABLE : {change:+.1f}%"
    
    print(f"   • Tendance : {trend}")

print("\n" + "="*70)

---

# 📊 ANALYSE 5 : Évolution des tags (types de recettes)

**Type:** Catégoriel temporel  
**Variables:** Tags "vegan", "dessert", "quick", "healthy" vs year  
**Tests:**
- Test χ² sur tables de contingence
- Comparaison de proportions

**Question:** Les types de recettes évoluent-ils dans le temps ?

In [None]:
# 🏷️ Détection de tags clés
def has_tag(tag_name: str) -> pl.Expr:
    """Retourne une expression Polars pour détecter un tag."""
    return (
        pl.col("tags")
        .list.eval(pl.element().str.to_lowercase().str.contains(tag_name))
        .list.any()
    )

# Ajouter les indicateurs de tags
df_tags = df.with_columns([
    has_tag("vegan").alias("is_vegan"),
    has_tag("dessert").alias("is_dessert"),
    has_tag("quick").alias("is_quick"),
    has_tag("healthy").alias("is_healthy"),
    has_tag("vegetarian").alias("is_vegetarian")
])

print("✅ Indicateurs de tags créés")

In [None]:
# 📊 Calculer le % de recettes par tag par année
tags_by_year = (
    df_tags.group_by("year")
    .agg([
        (pl.col("is_vegan").sum() / pl.len() * 100).alias("pct_vegan"),
        (pl.col("is_dessert").sum() / pl.len() * 100).alias("pct_dessert"),
        (pl.col("is_quick").sum() / pl.len() * 100).alias("pct_quick"),
        (pl.col("is_healthy").sum() / pl.len() * 100).alias("pct_healthy"),
        (pl.col("is_vegetarian").sum() / pl.len() * 100).alias("pct_vegetarian"),
        pl.len().alias("n_recipes")
    ])
    .sort("year")
    .to_pandas()
)

print("📊 % recettes par tag :")
print(tags_by_year)

In [None]:
# 📈 Visualisation
fig, ax = plt.subplots(figsize=(14, 7))

ax.plot(tags_by_year['year'], tags_by_year['pct_vegan'], 
        marker='o', linewidth=2.5, label='🌱 Vegan', color='green')
ax.plot(tags_by_year['year'], tags_by_year['pct_vegetarian'], 
        marker='s', linewidth=2.5, label='🥗 Vegetarian', color='lightgreen')
ax.plot(tags_by_year['year'], tags_by_year['pct_dessert'], 
        marker='^', linewidth=2.5, label='🍰 Dessert', color='pink')
ax.plot(tags_by_year['year'], tags_by_year['pct_quick'], 
        marker='D', linewidth=2, label='⚡ Quick', color='orange')
ax.plot(tags_by_year['year'], tags_by_year['pct_healthy'], 
        marker='v', linewidth=2, label='💚 Healthy', color='teal')

ax.set_title('📊 Évolution des types de recettes (tags)', fontsize=14, fontweight='bold')
ax.set_xlabel('Année', fontsize=12)
ax.set_ylabel('% de recettes avec ce tag', fontsize=12)
ax.legend(loc='best', fontsize=11)
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# 🔬 Tests statistiques - Corrélations et tendances
print("="*70)
print("🔬 TESTS STATISTIQUES - Évolution des tags")
print("="*70)

tags_list = [
    ('pct_vegan', '🌱 Vegan'),
    ('pct_vegetarian', '🥗 Vegetarian'),
    ('pct_dessert', '🍰 Dessert'),
    ('pct_quick', '⚡ Quick'),
    ('pct_healthy', '💚 Healthy')
]

for col, label in tags_list:
    corr, p_value = spearmanr(tags_by_year['year'], tags_by_year[col])
    
    print(f"\n{label} :")
    print(f"   • Corrélation de Spearman (ρ) : {corr:+.4f}")
    print(f"   • p-value : {p_value:.6f}")
    print(f"   • Significatif : {'✅ OUI' if p_value < 0.05 else '❌ NON'}")
    
    # Tendance
    first_val = tags_by_year[col].iloc[0]
    last_val = tags_by_year[col].iloc[-1]
    change = last_val - first_val
    pct_change = (change / first_val * 100) if first_val > 0 else 0
    
    if change > 1:
        trend = f"📈 HAUSSE : +{change:.1f}% points ({first_val:.1f}% → {last_val:.1f}%, +{pct_change:.0f}%)"
    elif change < -1:
        trend = f"📉 BAISSE : {change:.1f}% points ({first_val:.1f}% → {last_val:.1f}%, {pct_change:.0f}%)"
    else:
        trend = f"➡️ STABLE"
    
    print(f"   • Tendance : {trend}")

print("\n" + "="*70)

---

# 📋 SYNTHÈSE GLOBALE

Résumé de toutes les analyses statistiques effectuées.

In [None]:
print("="*80)
print("📋 SYNTHÈSE DES ANALYSES STATISTIQUES LONG-TERME (2000-2018)")
print("="*80)

print("\n📊 DONNÉES ANALYSÉES :")
print(f"   • Nombre de recettes : {df.shape[0]:,}")
print(f"   • Période : {df['year'].min()} - {df['year'].max()}")
print(f"   • Variables analysées : {df.shape[1]} colonnes")

print("\n" + "-"*80)
print("🔬 RÉSULTATS DES TESTS STATISTIQUES")
print("-"*80)

print("\n1️⃣ DURÉE DE PRÉPARATION (minutes) :")
print(f"   • Tendance : {'Décroissante ↘' if slope_ols < 0 else 'Croissante ↗'}")
print(f"   • Pente : {slope_ols:.4f} minutes/an")
print(f"   • Corrélation Spearman : ρ={corr_spearman:.4f}, p={p_spearman:.6f}")

print("\n2️⃣ COMPLEXITÉ DES RECETTES :")
complexity_trend = "Stable" if abs(slope) < 0.01 else ("Croissante ↗" if slope > 0 else "Décroissante ↘")
print(f"   • Tendance : {complexity_trend}")
print(f"   • R² régression : {r2:.4f}")

print("\n3️⃣ PROFIL NUTRITIONNEL :")
print(f"   • Calories : {nutrition_by_year['mean_calories'].iloc[0]:.0f} → {nutrition_by_year['mean_calories'].iloc[-1]:.0f} kcal")
print(f"   • Variation : {((nutrition_by_year['mean_calories'].iloc[-1] - nutrition_by_year['mean_calories'].iloc[0]) / nutrition_by_year['mean_calories'].iloc[0] * 100):+.1f}%")

print("\n4️⃣ INGRÉDIENTS-CLÉS (changements culturels) :")
print(f"   • Butter : {ingredients_by_year['pct_butter'].iloc[0]:.1f}% → {ingredients_by_year['pct_butter'].iloc[-1]:.1f}%")
print(f"   • Olive Oil : {ingredients_by_year['pct_olive_oil'].iloc[0]:.1f}% → {ingredients_by_year['pct_olive_oil'].iloc[-1]:.1f}%")
print(f"   • Garlic : {ingredients_by_year['pct_garlic'].iloc[0]:.1f}% → {ingredients_by_year['pct_garlic'].iloc[-1]:.1f}%")

print("\n5️⃣ TYPES DE RECETTES (tags) :")
print(f"   • Vegan : {tags_by_year['pct_vegan'].iloc[0]:.1f}% → {tags_by_year['pct_vegan'].iloc[-1]:.1f}%")
print(f"   • Healthy : {tags_by_year['pct_healthy'].iloc[0]:.1f}% → {tags_by_year['pct_healthy'].iloc[-1]:.1f}%")
print(f"   • Quick : {tags_by_year['pct_quick'].iloc[0]:.1f}% → {tags_by_year['pct_quick'].iloc[-1]:.1f}%")

print("\n" + "="*80)
print("✅ ANALYSE TERMINÉE AVEC SUCCÈS !")
print("="*80)