In [12]:
# notebooks/validation/02_validation_fusion_finale.ipynb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

print("üîç VALIDATION FINALE DE LA FUSION")
print("=" * 40)

# Chargement du dataset final
df = pd.read_csv("../../data/cleaned/sursaud_iqvia_fusionne_finale.csv")
df['date_debut_semaine'] = pd.to_datetime(df['date_debut_semaine'])

print(f"‚úÖ Dataset final : {df.shape}")

# Validation compl√®te
print("\nüìä VALIDATION COMPL√àTE")
print("=" * 30)

# 1. V√©rification des valeurs manquantes
missing = df.isnull().sum().sum()
print(f"Valeurs manquantes : {missing}")

# 2. V√©rification des classes d'√¢ge
age_counts = df['classe_age'].value_counts()
print(f"\nClasses d'√¢ge :")
for age, count in age_counts.items():
    print(f"   - {age}: {count:,} observations")

# 3. V√©rification des ratios par classe d'√¢ge
print(f"\nRatios doses/urgences par classe d'√¢ge :")
for age in df['classe_age'].unique():
    age_data = df[df['classe_age'] == age]
    doses_mean = age_data['DOSES_J07E1'].mean()
    urgences_mean = age_data['taux_urgences_grippe'].mean()
    ratio = doses_mean / urgences_mean if urgences_mean > 0 else 0
    
    print(f"   - {age}: {ratio:.1f} doses/urgences")

# 4. V√©rification de la coh√©rence temporelle
print(f"\nCoh√©rence temporelle :")
for age in df['classe_age'].unique():
    age_data = df[df['classe_age'] == age]
    period_days = (age_data['date_debut_semaine'].max() - age_data['date_debut_semaine'].min()).days
    print(f"   - {age}: {period_days} jours")

# 5. Score de validation final
validation_score = 0
total_checks = 6

# Check 1: Volume
if len(df) > 1000:
    validation_score += 1
    print(f"\n‚úÖ Volume suffisant : {len(df):,} observations")

# Check 2: Classes d'√¢ge
if df['classe_age'].nunique() >= 5:
    validation_score += 1
    print(f"‚úÖ Classes d'√¢ge compl√®tes : {df['classe_age'].nunique()}")

# Check 3: Valeurs manquantes
if missing == 0:
    validation_score += 1
    print(f"‚úÖ Aucune valeur manquante")

# Check 4: P√©riode
period_days = (df['date_debut_semaine'].max() - df['date_debut_semaine'].min()).days
if period_days > 1000:
    validation_score += 1
    print(f"‚úÖ P√©riode √©tendue : {period_days} jours")

# Check 5: Ratios coh√©rents
ratios = []
for age in df['classe_age'].unique():
    age_data = df[df['classe_age'] == age]
    ratio = age_data['DOSES_J07E1'].mean() / age_data['taux_urgences_grippe'].mean()
    ratios.append(ratio)

if all(0 < r < 10000 for r in ratios):
    validation_score += 1
    print(f"‚úÖ Ratios coh√©rents")

# Check 6: Donn√©es r√©elles vs estim√©es
real_data = len(df[df['classe_age'].isin(['65 ans ou plus', 'moins de 65 ans'])])
estimated_data = len(df) - real_data
if real_data > 0 and estimated_data > 0:
    validation_score += 1
    print(f"‚úÖ Mix donn√©es r√©elles ({real_data}) + estim√©es ({estimated_data})")

validation_percentage = (validation_score / total_checks) * 100
print(f"\nüéØ SCORE DE VALIDATION : {validation_score}/{total_checks} ({validation_percentage:.0f}%)")

if validation_percentage >= 80:
    print("üéâ FUSION VALID√âE - Pr√™t pour Prophet !")
else:
    print("‚ö†Ô∏è FUSION PARTIELLEMENT VALID√âE")

print(f"\nüìä R√âSUM√â FINAL :")
print(f"   Observations : {len(df):,}")
print(f"   Classes d'√¢ge : {df['classe_age'].nunique()}")
print(f"   P√©riode : {period_days} jours")
print(f"   Donn√©es r√©elles : {real_data:,}")
print(f"   Donn√©es estim√©es : {estimated_data:,}")

üîç VALIDATION FINALE DE LA FUSION
‚úÖ Dataset final : (1284, 9)

üìä VALIDATION COMPL√àTE
Valeurs manquantes : 0

Classes d'√¢ge :
   - 00-04 ans: 302 observations
   - 05-14 ans: 302 observations
   - 15-64 ans: 302 observations
   - Tous √¢ges: 302 observations
   - 65 ans ou plus: 76 observations

Ratios doses/urgences par classe d'√¢ge :
   - 65 ans ou plus: 738.6 doses/urgences
   - 00-04 ans: 500.0 doses/urgences
   - 05-14 ans: 500.0 doses/urgences
   - 15-64 ans: 1000.0 doses/urgences
   - Tous √¢ges: 738.6 doses/urgences

Coh√©rence temporelle :
   - 65 ans ou plus: 1197 jours
   - 00-04 ans: 2107 jours
   - 05-14 ans: 2107 jours
   - 15-64 ans: 2107 jours
   - Tous √¢ges: 2107 jours

‚úÖ Volume suffisant : 1,284 observations
‚úÖ Classes d'√¢ge compl√®tes : 5
‚úÖ Aucune valeur manquante
‚úÖ P√©riode √©tendue : 2107 jours
‚úÖ Ratios coh√©rents
‚úÖ Mix donn√©es r√©elles (76) + estim√©es (1208)

üéØ SCORE DE VALIDATION : 6/6 (100%)
üéâ FUSION VALID√âE - Pr√™t pour Prophet !
