In [None]:
import pandas as pd
import numpy as np

# Charger ton dataset
data = pd.read_csv('smart_medishop_data_export_20251102_142208.xlsx - Raw_Transactions.csv')

# V√©rifier les colonnes attendues
print(data.columns)

# Exemple de colonnes attendues : StockId, Name, Marque, Type, State, Country, Transaction, Price, Status


Index(['transaction_id', 'amount', 'hour', 'day_of_week', 'user_age',
       'payment_method', 'transaction_count_24h', 'transaction_count_7d',
       'avg_transaction_amount', 'user_registration_days', 'device_type',
       'location_country', 'is_fraud', 'timestamp'],
      dtype='object')


In [None]:
# Variables explicatives
X = data[['State', 'Type', 'Price']].copy()

# Variable cible
y = data['Marque'].copy()

# Encoder les variables cat√©gorielles (State et Type)
from sklearn.preprocessing import OneHotEncoder

# Remplacer 'sparse' par 'sparse_output'
ohe = OneHotEncoder(sparse_output=False)
X_cat = ohe.fit_transform(X[['State', 'Type']])

# Normaliser Price
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_price = scaler.fit_transform(X[['Price']])

# Combiner features
import numpy as np
X_final = np.hstack([X_cat, X_price])

print("Shape des features finales :", X_final.shape)


Shape des features finales : (1590, 33)


In [None]:
from sklearn.neighbors import KNeighborsClassifier

# KNN avec pond√©ration par distance
model = KNeighborsClassifier(n_neighbors=5, weights='distance')
model.fit(X_final, y)


In [None]:
def recommend_products_diverse_strict_ordered(state, type_, price, top_n=10, max_per_type=3):
    """
    Recommande top_n produits diff√©rents (Type, Price) avec pond√©ration,
    limite max par Type et pas de Type identique pour deux produits cons√©cutifs.
    """
    # 1Ô∏è‚É£ Features avec pond√©ration
    x_cat = ohe.transform([[state, type_]]) * 5
    x_price = scaler.transform([[price]]) * 3
    x_input = np.hstack([x_cat, x_price])

    # 2Ô∏è‚É£ R√©cup√©rer les 300 produits les plus proches
    distances, indices = model.kneighbors(x_input, n_neighbors=300)
    recommended = data.iloc[indices[0]][['Type','Price']].copy()

    # 3Ô∏è‚É£ S√©lection avec contraintes
    seen_keys = set()
    type_counts = {}
    selected = []
    last_type = None

    for _, row in recommended.iterrows():
        key = (row['Type'], row['Price'])
        t = row['Type']
        if key not in seen_keys:
            count = type_counts.get(t, 0)
            # Conditions : max_per_type et pas le m√™me que le pr√©c√©dent
            if count < max_per_type and t != last_type:
                selected.append(row)
                seen_keys.add(key)
                type_counts[t] = count + 1
                last_type = t
        if len(selected) >= top_n:
            break

    # 4Ô∏è‚É£ Compl√©ter si moins de top_n
    if len(selected) < top_n:
        remaining = top_n - len(selected)
        remaining_pool = data[['Type','Price']].copy()
        remaining_pool = remaining_pool[~remaining_pool.apply(lambda r: (r['Type'], r['Price']) in seen_keys, axis=1)]

        # Ajouter en √©vitant que deux types identiques se suivent
        for _, row in remaining_pool.iterrows():
            t = row['Type']
            if t != last_type:
                selected.append(row)
                last_type = t
            if len(selected) >= top_n:
                break

    return pd.DataFrame(selected)


In [None]:
# Exemple : produit fictif
state_ex = 'London'
type_ex = 'Pansement m√©dical'
price_ex = 999

recommended_df = recommend_products_diverse_strict_ordered(state_ex, type_ex, price_ex, top_n=10)
print("Top 10 produits recommand√©s :")
print(recommended_df)


Top 10 produits recommand√©s :
                       Type  Price
36        Pansement m√©dical   3596
1465            Suivi sant√©   2876
1287  Proth√®se orthop√©dique   2733
94        Pansement m√©dical   2716
1112       Pansement avanc√©   3356
72        Pansement m√©dical   3417
863         Lit hospitalier   3417
1589            Suivi sant√©   2517
319      Soins chirurgicaux   3596
522       Imagerie m√©dicale   3596




In [None]:
username = "Man"   # ou n‚Äôimporte quel nom existant dans ta colonne 'Name'
user_history = data[data['Name'] == username]
remaining_products = data[~data['StockId'].isin(user_history['StockId'])]


In [None]:
def recommend_for_user_v3(username, top_n=10, max_per_type=3):
    """
    Version optimis√©e de recommandation personnalis√©e.
    Utilise les distances moyennes entre les produits de l'utilisateur
    et les produits restants.
    """
    # Historique utilisateur
    user_history = data[data['Name'] == username]
    remaining_products = data[~data['StockId'].isin(user_history['StockId'])]

    if user_history.empty:
        print(f"Aucun historique trouv√© pour {username}")
        return pd.DataFrame()

    # üîπ Encoder toutes les lignes √† la fois (plus rapide)
    X_user_cat = ohe.transform(user_history[['State', 'Type']]) * 5
    X_user_price = scaler.transform(user_history[['Price']]) * 3
    X_user = np.hstack([X_user_cat, X_user_price])

    X_rem_cat = ohe.transform(remaining_products[['State', 'Type']]) * 5
    X_rem_price = scaler.transform(remaining_products[['Price']]) * 3
    X_rem = np.hstack([X_rem_cat, X_rem_price])

    # üî∏ Calculer toutes les distances √† la fois (broadcast)
    # shape = (len(remaining_products), len(user_history))
    dists = np.linalg.norm(X_rem[:, None, :] - X_user[None, :, :], axis=2)
    avg_dist = dists.mean(axis=1)  # moyenne des distances pour chaque produit restant

    # üîπ Ajouter les scores dans un DataFrame
    scores_df = remaining_products[['Type', 'Price']].copy()
    scores_df['Score'] = avg_dist
    scores_df = scores_df.sort_values(by='Score')

    # üî∏ Appliquer la diversit√© (max 3 par type + pas 2 similaires d‚Äôaffil√©e)
    seen_types = {}
    final_list = []
    last_type = None

    for _, row in scores_df.iterrows():
        t = row['Type']
        seen_types[t] = seen_types.get(t, 0)
        if seen_types[t] < max_per_type and t != last_type:
            final_list.append({'Type': t, 'Price': row['Price']})
            seen_types[t] += 1
            last_type = t
        if len(final_list) >= top_n:
            break

    return pd.DataFrame(final_list)

In [None]:
username = "Man"  # Ton nom dans la colonne "Name"
recommended_user = recommend_for_user_v3(username, top_n=10)
print("üîù Top 10 recommandations personnalis√©es :")
print(recommended_user)


üîù Top 10 recommandations personnalis√©es :
                     Type  Price
0   Proth√®se orthop√©dique   1406
1  Dispositif chirurgical    999
2   Proth√®se orthop√©dique   2156
3  Dispositif chirurgical   3417
4       Pansement m√©dical   2049
5  Dispositif chirurgical   1999
6             Suivi sant√©   1139
7   Proth√®se orthop√©dique    999
8             Suivi sant√©    999
9       Pansement m√©dical    570
