In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import joblib

In [None]:
# === Prétraitement et Entraînement ===

# Load data
df = pd.read_csv("E1.csv", sep=";")
df = df.drop(columns=['Prix_Unitaire_Totale'])
df['Prix_Total'] = df['Prix_Total'].astype(int)

# Transform year to relative value
min_year = df['Annee'].min()
df['Annee_relative'] = df['Annee'] - min_year
df = df.drop(columns=['Annee'])

# Target
y = df['Prix_Total']

# One-hot encode categorical vars
encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
encoded_cat = encoder.fit_transform(df[['Delegation', 'localite']])
encoded_cat_df = pd.DataFrame(encoded_cat, columns=encoder.get_feature_names_out(['Delegation', 'localite']))
df = df.drop(columns=['Delegation', 'localite']).reset_index(drop=True)
df = pd.concat([df, encoded_cat_df], axis=1)

# Features
X = df.drop(columns=['Prix_Total'])

# Normalize numerical features
scaler = MinMaxScaler()
X_scaled = X.copy()
X_scaled[['Quantite_Totale', 'Annee_relative']] = scaler.fit_transform(X_scaled[['Quantite_Totale', 'Annee_relative']])

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Models
models = {
    'KNN': KNeighborsRegressor(n_neighbors=5),
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(n_estimators=100, random_state=42)
}

# Train and select best
best_model = None
best_r2 = -np.inf

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    print(f"{name} - R² Score: {r2:.4f}")
    if r2 > best_r2:
        best_r2 = r2
        best_model = model

print(f"\n✅ Best Model: {best_model.__class__.__name__} with R² Score: {best_r2:.4f}")

In [None]:
# Evaluate
y_pred_best = best_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred_best)
mse = mean_squared_error(y_test, y_pred_best)
rmse = np.sqrt(mse)

print(f"🔹 MAE: {mae:.2f}")
print(f"🔹 MSE: {mse:.2f}")
print(f"🔹 RMSE: {rmse:.2f}")
print(f"🔹 R² Score: {best_r2:.4f}")

In [None]:
# Save components
feature_columns = X_scaled.columns.tolist()
joblib.dump(best_model, 'best_model_relative.pkl')
joblib.dump(encoder, 'onehot_encoder_relative.pkl')
joblib.dump(scaler, 'scaler_relative.pkl')
joblib.dump(feature_columns, 'feature_columns_relative.pkl')

In [None]:
# === Fonction de prédiction ===

def predict_price(annee, mois, quantite_totale, delegation, localite):
    model = joblib.load('best_model_relative.pkl')
    encoder = joblib.load('onehot_encoder_relative.pkl')
    scaler = joblib.load('scaler_relative.pkl')
    feature_columns = joblib.load('feature_columns_relative.pkl')

    min_year = 2018  # hardcoded to match training data
    annee_relative = annee - min_year

    input_df = pd.DataFrame([{
        'Mois': mois,
        'Quantite_Totale': quantite_totale,
        'Annee_relative': annee_relative,
        'Delegation': delegation,
        'localite': localite
    }])

    # print("\n🔍 Input brut :")
    # print(input_df)

    encoded_input = encoder.transform(input_df[['Delegation', 'localite']])
    encoded_input_df = pd.DataFrame(encoded_input, columns=encoder.get_feature_names_out(['Delegation', 'localite']))

    input_df = input_df.drop(columns=['Delegation', 'localite']).reset_index(drop=True)
    input_df[['Quantite_Totale', 'Annee_relative']] = scaler.transform(input_df[['Quantite_Totale', 'Annee_relative']])
    input_df[['Quantite_Totale', 'Annee_relative']] = np.clip(input_df[['Quantite_Totale', 'Annee_relative']], 0, 1)

    input_df = pd.concat([input_df, encoded_input_df], axis=1)

    for col in feature_columns:
        if col not in input_df.columns:
            input_df[col] = 0
    input_df = input_df[feature_columns]

    # print("\n✅ Données finales envoyées au modèle :")
    # print(input_df)

    prediction = model.predict(input_df)[0]
    return prediction

In [None]:
# === Test ===
if __name__ == "__main__":
    pred = predict_price(2025, 6, 20000, "Monastir", "Jemmal")
    print(f"\n🎯 Predicted Prix_Total: {pred:.2f}")


In [None]:
# === Test ===
if __name__ == "__main__":
    test_cases = [
        (2023, 6, 15000, "Monastir", "Jemmal"),
        (2023, 6, 15000, "Monastir", "Jemal"),
        (2023, 6, 15000, "x", "x"),
        (2020, 3, 18000, "Sousse", "Kalaâ Kebira"),
        (2019, 12, 5000, "Le Kef", "Dahmani"),
        (2022, 5, 25000, "Médenine", "Zarzis"),
        (2024, 1, 20000, "Monastir", "Sayada"),
        (2025, 6, 20000, "Monastir", "Jemmal"),
        (2030, 6, 20000, "Monastir", "Jemmal"),  # extrapolation future
        (2018, 1, 10000, "Sousse", "Kalaâ Kebira"),  # début des données
    ]

    for i, (annee, mois, qte, delegation, localite) in enumerate(test_cases, 1):
        pred = predict_price(annee, mois, qte, delegation, localite)
        print(f"🧪 Test {i} — {annee}, {mois}, {qte}, {delegation}, {localite} → 📦 Prix_Total = {pred:.2f}")
