In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import pairwise_distances
import random

In [None]:
transactions = pd.read_csv('../Data/df.csv')

In [None]:
transactions.info()

In [None]:
class SmartBasketRecommenderCV:
    def __init__(self, transactions, min_train=5, min_test=1, k=10):
        self.transactions = transactions.copy()
        self.k = k
        self.min_train = min_train
        self.min_test = min_test

        self.transactions['Date'] = pd.to_datetime(self.transactions['Date'])

    def top_purchase_history(self, client_id, df):
        client_data = df[df['Client ID'] == client_id]
        top_products = (
            client_data['ID Product']
            .value_counts()
            .head(5)
            .index
            .tolist()
        )
        return top_products

    def collaborative_recommendations(self, client_id, interaction_matrix, df, top_n_similar=5):
        if client_id not in interaction_matrix.index:
            return []

        client_idx = interaction_matrix.index.get_loc(client_id)
        distance_matrix = pairwise_distances(interaction_matrix, metric='cosine')
        distances = distance_matrix[client_idx]
        similar_indices = distances.argsort()[1:top_n_similar+1]
        similar_clients = interaction_matrix.index[similar_indices]

        similar_purchases = df[df['Client ID'].isin(similar_clients)]['ID Product']
        target_purchases = df[df['Client ID'] == client_id]['ID Product'].unique()
        recommendations = similar_purchases[~similar_purchases.isin(target_purchases)]
        return recommendations.value_counts().head(5).index.tolist()

    def smart_basket(self, client_id, df, interaction_matrix):
        hist_recs = self.top_purchase_history(client_id, df)
        collab_recs = self.collaborative_recommendations(client_id, interaction_matrix, df)
        final_recs = hist_recs.copy()
        for item in collab_recs:
            if item not in final_recs:
                final_recs.append(item)
            if len(final_recs) == 10:
                break
        return final_recs

    def precision_at_k(self, train_recs, test_items):
        if not test_items:
            return 0.0
        hits = len(set(train_recs[:self.k]) & set(test_items))
        return hits / self.k

    def create_interaction_matrix(self):
        matrix = pd.crosstab(self.transactions['Client ID'], self.transactions['ID Product'])
        return matrix.applymap(lambda x: 1 if x > 0 else 0)

    def monte_carlo_cv(self, iterations=5, weeks_train=45):
        hit_rates_all = []

        for i in range(iterations):
            earliest = self.transactions['Date'].min()
            latest = self.transactions['Date'].max() - pd.to_timedelta(weeks_train, unit='w')
            random_start = earliest + (latest - earliest) * random.random()
            split_date = pd.to_datetime(random_start) + pd.to_timedelta(weeks_train, unit='w')

            self.transactions['train_split'] = (self.transactions['Date'] <= split_date).astype(int)
            train_set = self.transactions[self.transactions['train_split'] == 1]
            test_set = self.transactions[self.transactions['train_split'] == 0]

            interaction_matrix = pd.crosstab(self.transactions['Client ID'], self.transactions['ID Product'])
            interaction_matrix = interaction_matrix.applymap(lambda x: 1 if x > 0 else 0)

            valid_clients = []
            for client_id in self.transactions['Client ID'].unique():
                if (train_set[train_set['Client ID'] == client_id].shape[0] >= self.min_train and
                    test_set[test_set['Client ID'] == client_id].shape[0] >= self.min_test):
                    valid_clients.append(client_id)

            hit_rates = []
            for client_id in valid_clients:
                train_recs = self.smart_basket(client_id, train_set, interaction_matrix)
                test_items = test_set[test_set['Client ID'] == client_id]['ID Product'].unique().tolist()
                hit = self.precision_at_k(train_recs, test_items)
                hit_rates.append(hit)

            mean_hit = np.mean(hit_rates)
            hit_rates_all.append(mean_hit)
            print(f"Iteration {i+1}: Hit Rate = {mean_hit:.2%}")

        print(f"\nFinal MCCV Hit Rate: {np.mean(hit_rates_all):.2%} ± {np.std(hit_rates_all):.2%}")
        return hit_rates_all


In [179]:
recommender = SmartBasketRecommenderCV(transactions, min_train=5, min_test=1, k=10)
hit_rates = recommender.monte_carlo_cv(iterations=5)

  interaction_matrix = interaction_matrix.applymap(lambda x: 1 if x > 0 else 0)


Iteration 1: Hit Rate = 39.83%


  interaction_matrix = interaction_matrix.applymap(lambda x: 1 if x > 0 else 0)


Iteration 2: Hit Rate = 37.62%


  interaction_matrix = interaction_matrix.applymap(lambda x: 1 if x > 0 else 0)


Iteration 3: Hit Rate = 38.64%


  interaction_matrix = interaction_matrix.applymap(lambda x: 1 if x > 0 else 0)


Iteration 4: Hit Rate = 40.41%


  interaction_matrix = interaction_matrix.applymap(lambda x: 1 if x > 0 else 0)


Iteration 5: Hit Rate = 39.42%

Final MCCV Hit Rate: 39.18% ± 0.97%
