In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_absolute_error
from flask import Flask, request, jsonify
import numpy as np
import joblib

app = Flask(__name__)

# Step 1: Load and prepare data
def load_data():
    file_path = './29.11.2024 BDD Client (1).xlsx'
    data = pd.read_excel(file_path)

    # Rename columns properly
    data.columns = ['Numero', 'Nom', 'Type_Offre', 'MMR', 'Voix_minutes', 'Data_MO',
                    'Youtube', 'Navigation', 'Gaming', 'Musique', 'Reseaux_sociaux', 'Films', 'Sport']

    # Convert binary columns (OUI/NON) to 1/0
    binary_columns = ['Youtube', 'Navigation', 'Gaming', 'Musique', 'Reseaux_sociaux', 'Films', 'Sport']
    data[binary_columns] = data[binary_columns].replace({'OUI': 1, 'NON': 0})

    # Map Type_Offre to numeric
    data['Type_Offre'] = data['Type_Offre'].replace({'Prépayée': 0, 'Postpayée': 1})
    data['MMR'] = pd.to_numeric(data['MMR'], errors='coerce')
    return data

# Step 2: Train models and save them
def train_and_save_models(data):
    X = data[['Voix_minutes', 'Data_MO', 'Youtube', 'Navigation', 'Gaming', 'Musique', 'Reseaux_sociaux']]
    y_entertainment = data[['Sport', 'Films']]
    y_offer = data['Type_Offre']
    y_mmr = data['MMR']
    y_preferences = data[['Youtube', 'Navigation', 'Gaming', 'Musique', 'Reseaux_sociaux', 'Films', 'Sport']]

    X_train, X_test, y_ent_train, y_ent_test = train_test_split(X, y_entertainment, test_size=0.2, random_state=42)
    X_train, X_test, y_offer_train, y_offer_test = train_test_split(X, y_offer, test_size=0.2, random_state=42)
    X_train, X_test, y_mmr_train, y_mmr_test = train_test_split(X, y_mmr, test_size=0.2, random_state=42)
    X_train, X_test, y_pref_train, y_pref_test = train_test_split(X, y_preferences, test_size=0.2, random_state=42)

    ent_model = RandomForestClassifier().fit(X_train, y_ent_train)
    offer_model = RandomForestClassifier().fit(X_train, y_offer_train)
    mmr_model = RandomForestRegressor().fit(X_train, y_mmr_train)
    pref_model = RandomForestClassifier().fit(X_train, y_pref_train)

    # Save models
    joblib.dump(ent_model, 'ent_model.pkl')
    joblib.dump(offer_model, 'offer_model.pkl')
    joblib.dump(mmr_model, 'mmr_model.pkl')
    joblib.dump(pref_model, 'pref_model.pkl')
    joblib.dump(X_train.columns, 'model_features.pkl')

data = load_data()
train_and_save_models(data)

  data[binary_columns] = data[binary_columns].replace({'OUI': 1, 'NON': 0})
  data['Type_Offre'] = data['Type_Offre'].replace({'Prépayée': 0, 'Postpayée': 1})


In [None]:
import sklearn
print(sklearn.__version__)


1.5.2
