In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
import pickle  # Ajout pour la sérialisation

def load_csv(file_path: str) -> pd.DataFrame:
    df = pd.read_csv(file_path).sample(frac=0.1)
    df['Dates'] = pd.to_datetime(df['Dates']).dt.to_period('M')
    return df

def create_pipeline():
    numerical_features = ['X', 'Y']
    categorical_features = ['Dates']

    numerical_transformer = StandardScaler()
    categorical_transformer = OneHotEncoder(handle_unknown='ignore')

    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numerical_transformer, numerical_features),
            ('cat', categorical_transformer, categorical_features)
        ]
    )

    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('classifier', LogisticRegression())
    ])

    return pipeline

def train_pipeline(pipeline, df):
    X = df[['Dates', 'X', 'Y']]
    y = df['Category']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Entraîner la pipeline
    pipeline.fit(X_train, y_train)

    # Évaluer le modèle
    score = pipeline.score(X_test, y_test)

    # Sérialiser la pipeline
    with open('pipeline_model.pkl', 'wb') as file:
        pickle.dump(pipeline, file)
    print("Pipeline sauvegardée dans 'pipeline_model.pkl'.")

    return pipeline, score

def make_prediction(pipeline, input_data: dict) -> dict:
    input_df = pd.DataFrame([input_data])

    prediction = pipeline.predict(input_df)
    return {"prediction": prediction[0]}

def data_pipeline(csv_path: str, input_data: dict):
    # Charger les données
    df = load_csv(csv_path)

    # Créer la pipeline
    pipeline = create_pipeline()

    # Entraîner la pipeline
    trained_pipeline, score = train_pipeline(pipeline, df)

    # Faire une prédiction
    prediction = make_prediction(trained_pipeline, input_data)

    return {"model_score": score, "prediction": prediction}
