In [None]:
import pandas as pd
import json
from datetime import datetime, timedelta
import requests

class AIDataPipeline:
    
    def __init__(self, api_base_url='http://localhost:8080/api'):
        self.api_base_url = api_base_url
        self.token = None
    
    def login(self, email, password):
        """Authentification"""
        response = requests.post(
            f"{self.api_base_url}/v1/auth/login",
            json={'email': email, 'motDePasse': password}
        )
        self.token = response.json()['token']
        print("‚úÖ Authentifi√©")
    
    def collect_training_data(self):
        """Collecte toutes les donn√©es n√©cessaires pour l'IA"""
        headers = {'Authorization': f'Bearer {self.token}'}
        
        print("üìä Collecte des donn√©es d'entra√Ænement...")
        
        # 1. R√©cup√©rer toutes les recettes
        recipes = self._fetch_all_recipes(headers)
        
        # 2. R√©cup√©rer toutes les interactions
        interactions = self._fetch_all_interactions(headers)
        
        # 3. R√©cup√©rer l'historique de recherche
        searches = self._fetch_search_history(headers)
        
        # 4. R√©cup√©rer les notes et commentaires
        ratings = self._fetch_ratings(headers)
        
        print(f"‚úÖ Donn√©es collect√©es:")
        print(f"  - Recettes: {len(recipes)}")
        print(f"  - Interactions: {len(interactions)}")
        print(f"  - Recherches: {len(searches)}")
        print(f"  - Notes: {len(ratings)}")
        
        return {
            'recipes': recipes,
            'interactions': interactions,
            'searches': searches,
            'ratings': ratings
        }
    
    def _fetch_all_recipes(self, headers):
        """R√©cup√®re toutes les recettes"""
        response = requests.get(
            f"{self.api_base_url}/v1/recettes/all",
            headers=headers
        )
        return response.json()
    
    def _fetch_all_interactions(self, headers):
        """R√©cup√®re toutes les interactions"""
        # Selon votre API, adapter l'endpoint
        try:
            response = requests.get(
                f"{self.api_base_url}/v1/recette-interactions/all",
                headers=headers
            )
            return response.json()
        except:
            return []
    
    def _fetch_search_history(self, headers):
        """R√©cup√®re l'historique de recherche"""
        try:
            response = requests.get(
                f"{self.api_base_url}/v1/historique-recherche/all",
                headers=headers
            )
            return response.json()
        except:
            return []
    
    def _fetch_ratings(self, headers):
        """R√©cup√®re toutes les notes"""
        try:
            response = requests.get(
                f"{self.api_base_url}/v1/notes/all",
                headers=headers
            )
            return response.json()
        except:
            return []
    
    def prepare_user_item_matrix(self, data):
        """Cr√©e la matrice utilisateur-recette pour le collaborative filtering"""
        
        # Cr√©er un DataFrame des interactions
        interactions_data = []
        
        for interaction in data['interactions']:
            interactions_data.append({
                'user_id': interaction['userId'],
                'recipe_id': interaction['recetteId'],
                'interaction_type': interaction['typeInteraction'],
                'timestamp': interaction['dateInteraction']
            })
        
        df = pd.DataFrame(interactions_data)
        
        # Pond√©rer les types d'interaction
        weight_map = {
            'CONSULTATION': 1,
            'FAVORI_AJOUTE': 3,
            'PARTAGE': 2,
            'RECHERCHE': 1
        }
        
        df['weight'] = df['interaction_type'].map(weight_map)
        
        # Cr√©er la matrice
        matrix = df.groupby(['user_id', 'recipe_id'])['weight'].sum().unstack(fill_value=0)
        
        print(f"üìä Matrice cr√©√©e: {matrix.shape}")
        
        return matrix
    
    def prepare_content_features(self, recipes):
        """Pr√©pare les features de contenu pour le content-based filtering"""
        
        features = []
        
        for recipe in recipes:
            # Vectoriser les caract√©ristiques
            feature = {
                'recipe_id': recipe['id'],
                'type_recette': recipe.get('typeRecette', 'plat'),
                'difficulte': recipe.get('difficulte', 'MOYEN'),
                'temps_total': recipe.get('tempsPreparation', 0) + recipe.get('tempsCuisson', 0),
                'vegetarien': recipe.get('vegetarien', False),
                'cuisine': recipe.get('cuisine', 'francaise'),
                'tags': recipe.get('tags', [])
            }
            
            features.append(feature)
        
        df = pd.DataFrame(features)
        
        # One-hot encoding pour les features cat√©gorielles
        df_encoded = pd.get_dummies(df, columns=['type_recette', 'difficulte', 'cuisine'])
        
        print(f"üìä Features de contenu: {df_encoded.shape}")
        
        return df_encoded
    
    def export_for_ai_training(self, data, output_dir='ai_training_data'):
        """Export les donn√©es au format requis par votre mod√®le IA"""
        import os
        os.makedirs(output_dir, exist_ok=True)
        
        # 1. Matrice utilisateur-recette
        matrix = self.prepare_user_item_matrix(data)
        matrix.to_csv(f'{output_dir}/user_recipe_matrix.csv')
        
        # 2. Features de contenu
        content_features = self.prepare_content_features(data['recipes'])
        content_features.to_csv(f'{output_dir}/recipe_features.csv', index=False)
        
        # 3. Donn√©es brutes pour MongoDB
        with open(f'{output_dir}/raw_data.json', 'w', encoding='utf-8') as f:
            json.dump(data, f, ensure_ascii=False, indent=2, default=str)
        
        # 4. Statistiques utilisateurs
        user_stats = self._compute_user_stats(data)
        user_stats.to_csv(f'{output_dir}/user_statistics.csv', index=False)
        
        print(f"‚úÖ Donn√©es export√©es dans {output_dir}/")
        
        return output_dir
    
    def _compute_user_stats(self, data):
        """Calcule les statistiques par utilisateur pour la segmentation RFM"""
        
        stats = []
        
        # Grouper par utilisateur
        user_interactions = {}
        for interaction in data['interactions']:
            user_id = interaction['userId']
            if user_id not in user_interactions:
                user_interactions[user_id] = []
            user_interactions[user_id].append(interaction)
        
        for user_id, interactions in user_interactions.items():
            # Recency: jours depuis derni√®re interaction
            dates = [datetime.fromisoformat(i['dateInteraction'].replace('Z', '+00:00')) 
                    for i in interactions]
            recency = (datetime.now(dates[0].tzinfo) - max(dates)).days
            
            # Frequency: nombre d'interactions
            frequency = len(interactions)
            
            # Monetary: valeur bas√©e sur les types d'interaction
            monetary = sum([3 if i['typeInteraction'] == 'FAVORI_AJOUTE' 
                          else 2 if i['typeInteraction'] == 'PARTAGE' 
                          else 1 
                          for i in interactions])
            
            stats.append({
                'user_id': user_id,
                'recency': recency,
                'frequency': frequency,
                'monetary': monetary,
                'last_interaction': max(dates).isoformat()
            })
        
        return pd.DataFrame(stats)

# Utilisation
if __name__ == "__main__":
    pipeline = AIDataPipeline()
    pipeline.login('dianekassi@admin.com', 'Mydayana48')
    
    # Collecter les donn√©es
    data = pipeline.collect_training_data()
    
    # Exporter pour l'IA
    pipeline.export_for_ai_training(data)