In [8]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import cosine
from collections import defaultdict
from typing import List, Dict, Tuple

class ProductRecommender:
    def __init__(self, sales_data_path: str):
        """Initialize the recommender system with sales data"""
        self.sales_df = pd.read_csv(sales_data_path)
        self.cart_matrix = self._create_cart_matrix()
        self.price_ranges = self._calculate_price_ranges()
        self.item_purchase_patterns = self._analyze_purchase_patterns()
        
    def _create_cart_matrix(self) -> Dict[str, Dict[str, float]]:
        """Create a matrix of cart co-occurrences with purchase frequency weights"""
        cart_matrix = defaultdict(lambda: defaultdict(float))
        
        # Group by VchNo to get items purchased together
        for vch_no, group in self.sales_df.groupby('VchNo'):
            items = group['Product Name'].unique()
            
            # Calculate normalized quantities for weighting
            qty_dict = dict(zip(group['Product Name'], group['Qty']))
            
            # Update co-occurrence matrix
            for item1 in items:
                for item2 in items:
                    if item1 != item2:
                        # Weight by quantity purchased together
                        weight = np.log1p(min(qty_dict[item1], qty_dict[item2]))
                        cart_matrix[item1][item2] += weight
        
        return cart_matrix
    
    def _calculate_price_ranges(self) -> Dict[str, Tuple[float, float]]:
        """Calculate acceptable price ranges for each product"""
        price_ranges = {}
        
        for product in self.sales_df['Product Name'].unique():
            prices = self.sales_df[self.sales_df['Product Name'] == product]['Price']
            mean_price = prices.mean()
            std_price = prices.std()
            
            # Define price range as mean ± 1.5 standard deviations
            price_ranges[product] = (
                mean_price - 1.5 * std_price if not pd.isna(std_price) else mean_price * 0.5,
                mean_price + 1.5 * std_price if not pd.isna(std_price) else mean_price * 1.5
            )
        
        return price_ranges
    
    def _analyze_purchase_patterns(self) -> Dict[str, Dict[str, float]]:
        """Analyze typical purchase patterns for each product"""
        patterns = {}
        
        for product in self.sales_df['Product Name'].unique():
            product_data = self.sales_df[self.sales_df['Product Name'] == product]
            
            patterns[product] = {
                'avg_qty': product_data['Qty'].mean(),
                'median_qty': product_data['Qty'].median(),
                'price': product_data['Price'].mean(),
                'purchase_frequency': len(product_data)
            }
            
        return patterns
    
    def _calculate_item_similarity(self, item1: str, item2: str) -> float:
        """Calculate similarity between two items based on co-purchase patterns and price"""
        if item1 not in self.item_purchase_patterns or item2 not in self.item_purchase_patterns:
            return 0.0
        
        # Price similarity (normalized)
        price1 = self.item_purchase_patterns[item1]['price']
        price2 = self.item_purchase_patterns[item2]['price']
        price_diff = abs(price1 - price2) / max(price1, price2)
        price_similarity = 1 - min(price_diff, 1)
        
        # Co-purchase similarity
        copurchase_weight = self.cart_matrix[item1][item2] + self.cart_matrix[item2][item1]
        
        # Combine similarities with weights
        total_similarity = (price_similarity * 0.4 + 
                          min(copurchase_weight / 10, 1) * 0.6)
        
        return total_similarity
    
    def get_recommendations(self, 
                          product: str, 
                          qty: float, 
                          price: float, 
                          n_recommendations: int = 5) -> List[Dict]:
        """
        Get product recommendations based on input product, quantity, and price
        
        Args:
            product: Name of the product
            qty: Quantity purchased
            price: Price per unit
            n_recommendations: Number of recommendations to return
            
        Returns:
            List of recommended products with their details
        """
        if product not in self.item_purchase_patterns:
            return []
        
        # Get all products within similar price range
        target_price_range = self.price_ranges[product]
        candidate_products = [
            p for p in self.item_purchase_patterns.keys()
            if p != product and
            target_price_range[0] <= self.item_purchase_patterns[p]['price'] <= target_price_range[1]
        ]
        
        # Calculate similarities for all candidate products
        similarities = [
            (p, self._calculate_item_similarity(product, p))
            for p in candidate_products
        ]
        
        # Sort by similarity and get top recommendations
        recommendations = []
        for prod, similarity in sorted(similarities, key=lambda x: x[1], reverse=True)[:n_recommendations]:
            pattern = self.item_purchase_patterns[prod]
            
            recommendations.append({
                'product': prod,
                'similarity_score': round(similarity, 3),
                'recommended_qty': round(pattern['median_qty'], 2),
                'avg_price': round(pattern['price'], 2),
                'purchase_frequency': pattern['purchase_frequency']
            })
        
        return recommendations

    def _validate_input(self, product: str, qty: float, price: float) -> bool:
        """Validate input parameters"""
        if product not in self.item_purchase_patterns:
            return False
        
        pattern = self.item_purchase_patterns[product]
        price_range = self.price_ranges[product]
        
        if price < price_range[0] * 0.5 or price > price_range[1] * 1.5:
            return False
        
        if qty < 0 or qty > pattern['avg_qty'] * 10:
            return False
        
        return True

# Example usage
recommender = ProductRecommender("SALE DATA.csv")

# Get recommendations for a product
recommendations = recommender.get_recommendations(
    product="Tang  Orange 500 Gm (165*24)",
    qty=140,
    price=105
)
print(recommendations)

# Print recommendations
for rec in recommendations:
    print(f"\nRecommended Product: {rec['product']}")
    print(f"Similarity Score: {rec['similarity_score']}")
    print(f"Recommended Quantity: {rec['recommended_qty']}")
    print(f"Average Price: ₹{rec['avg_price']}")
    print(f"Purchase Frequency: {rec['purchase_frequency']} times")


[{'product': 'Tang  Lemon 500 Gm(165*24)', 'similarity_score': 0.997, 'recommended_qty': 480.0, 'avg_price': 104.56, 'purchase_frequency': 9}, {'product': 'Tang Mango 500 Gm(165*24)', 'similarity_score': 0.997, 'recommended_qty': 360.0, 'avg_price': 104.59, 'purchase_frequency': 10}, {'product': 'Lifebuoy Silver Shield125Gm PO4(150*30)', 'similarity_score': 0.396, 'recommended_qty': 300.0, 'avg_price': 104.9, 'purchase_frequency': 26}, {'product': 'DETTOL SOAP ORG 100gx4(154*42)', 'similarity_score': 0.392, 'recommended_qty': 420.0, 'avg_price': 101.84, 'purchase_frequency': 42}, {'product': 'DETTOL SOAP ORG 100gx4 (154*42)', 'similarity_score': 0.392, 'recommended_qty': 420.0, 'avg_price': 101.82, 'purchase_frequency': 59}]

Recommended Product: Tang  Lemon 500 Gm(165*24)
Similarity Score: 0.997
Recommended Quantity: 480.0
Average Price: ₹104.56
Purchase Frequency: 9 times

Recommended Product: Tang Mango 500 Gm(165*24)
Similarity Score: 0.997
Recommended Quantity: 360.0
Average Price

# the abovecode produces good output from similar cart but may have some repeated items.
# it also does not consider the case where the cart is empty.

In [15]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import cosine
from sklearn.metrics import silhouette_score
from collections import defaultdict
from typing import List, Dict, Tuple
import warnings
warnings.filterwarnings('ignore')

class ProductRecommender:
    def __init__(self, sales_data_path: str):
        """Initialize the recommender system with sales data"""
        self.sales_df = pd.read_csv(sales_data_path)
        self.cart_matrix = self._create_cart_matrix()
        self.price_ranges = self._calculate_price_ranges()
        self.item_purchase_patterns = self._analyze_purchase_patterns()
        self.product_features = self._create_product_features()
        
    def _create_product_features(self) -> pd.DataFrame:
        """Create feature matrix for products"""
        features = []
        
        for product in self.sales_df['Product Name'].unique():
            product_data = self.sales_df[self.sales_df['Product Name'] == product]
            
            # Calculate features
            avg_qty = product_data['Qty'].mean()
            price = product_data['Price'].mean()
            purchase_freq = len(product_data)
            unique_customers = product_data['Party'].nunique()
            
            # Create feature vector
            features.append({
                'Product Name': product,
                'avg_qty': avg_qty,
                'price': price,
                'purchase_freq': purchase_freq,
                'unique_customers': unique_customers,
                'price_per_qty': price / avg_qty if avg_qty > 0 else price
            })
        
        return pd.DataFrame(features).set_index('Product Name')
    
    def _create_cart_matrix(self) -> Dict[str, Dict[str, float]]:
        """Create a matrix of cart co-occurrences with purchase frequency weights"""
        cart_matrix = defaultdict(lambda: defaultdict(float))
        
        # Group by VchNo to get items purchased together
        for vch_no, group in self.sales_df.groupby('VchNo'):
            items = group['Product Name'].unique()
            qty_dict = dict(zip(group['Product Name'], group['Qty']))
            
            for item1 in items:
                for item2 in items:
                    if item1 != item2:
                        # Enhanced weighting system
                        qty_weight = np.log1p(min(qty_dict[item1], qty_dict[item2]))
                        price_similarity = 1 - abs(
                            group[group['Product Name'] == item1]['Price'].iloc[0] -
                            group[group['Product Name'] == item2]['Price'].iloc[0]
                        ) / max(group['Price'])
                        
                        cart_matrix[item1][item2] += qty_weight * price_similarity
        
        return cart_matrix
    
    def _calculate_price_ranges(self) -> Dict[str, Tuple[float, float]]:
        """Calculate dynamic price ranges for each product"""
        price_ranges = {}
        
        for product in self.sales_df['Product Name'].unique():
            prices = self.sales_df[self.sales_df['Product Name'] == product]['Price']
            mean_price = prices.mean()
            std_price = prices.std()
            
            # Dynamic range based on price volatility
            multiplier = 1.5 if std_price/mean_price < 0.2 else 2.0
            
            price_ranges[product] = (
                mean_price - multiplier * std_price if not pd.isna(std_price) else mean_price * 0.5,
                mean_price + multiplier * std_price if not pd.isna(std_price) else mean_price * 1.5
            )
        
        return price_ranges
    
    def _analyze_purchase_patterns(self) -> Dict[str, Dict[str, float]]:
        """Analyze comprehensive purchase patterns for each product"""
        patterns = {}
        
        for product in self.sales_df['Product Name'].unique():
            product_data = self.sales_df[self.sales_df['Product Name'] == product]
            
            patterns[product] = {
                'avg_qty': product_data['Qty'].mean(),
                'median_qty': product_data['Qty'].median(),
                'price': product_data['Price'].mean(),
                'purchase_frequency': len(product_data),
                'unique_customers': product_data['Party'].nunique(),
                'avg_cart_size': product_data.groupby('VchNo').size().mean()
            }
            
        return patterns
    
    def _calculate_item_similarity(self, item1: str, item2: str) -> Tuple[float, Dict[str, float]]:
        """Calculate detailed similarity metrics between two items"""
        if item1 not in self.item_purchase_patterns or item2 not in self.item_purchase_patterns:
            return 0.0, {}
        
        # Price similarity
        price1 = self.item_purchase_patterns[item1]['price']
        price2 = self.item_purchase_patterns[item2]['price']
        price_diff = abs(price1 - price2) / max(price1, price2)
        price_similarity = 1 - min(price_diff, 1)
        
        # Purchase pattern similarity
        pattern1 = self.product_features.loc[item1]
        pattern2 = self.product_features.loc[item2]
        pattern_similarity = 1 - cosine(pattern1[1:], pattern2[1:])
        
        # Co-purchase strength
        copurchase_strength = (self.cart_matrix[item1][item2] + 
                             self.cart_matrix[item2][item1]) / 2
        
        # Customer overlap
        customers1 = set(self.sales_df[self.sales_df['Product Name'] == item1]['Party'])
        customers2 = set(self.sales_df[self.sales_df['Product Name'] == item2]['Party'])
        customer_overlap = len(customers1.intersection(customers2)) / len(customers1.union(customers2))
        
        # Combine similarities with weights
        similarity_components = {
            'price_similarity': price_similarity,
            'pattern_similarity': pattern_similarity,
            'copurchase_strength': min(copurchase_strength / 10, 1),
            'customer_overlap': customer_overlap
        }
        
        total_similarity = (
            price_similarity * 0.3 +
            pattern_similarity * 0.3 +
            min(copurchase_strength / 10, 1) * 0.2 +
            customer_overlap * 0.2
        )
        
        return total_similarity, similarity_components
    
    def _get_fallback_recommendations(self, 
                                    target_price: float, 
                                    excluded_products: List[str], 
                                    n_recommendations: int) -> List[Dict]:
        """Get recommendations based on price similarity when no direct recommendations available"""
        price_diffs = []
        
        for product in self.item_purchase_patterns.keys():
            if product not in excluded_products:
                price = self.item_purchase_patterns[product]['price']
                price_diff = abs(price - target_price) / max(price, target_price)
                pattern = self.item_purchase_patterns[product]
                
                price_diffs.append({
                    'product': product,
                    'price_diff': price_diff,
                    'pattern': pattern
                })
        
        # Sort by price difference and get top matches
        price_diffs.sort(key=lambda x: x['price_diff'])
        recommendations = []
        
        for item in price_diffs[:n_recommendations]:
            pattern = item['pattern']
            recommendations.append({
                'product': item['product'],
                'similarity_score': 1 - item['price_diff'],  # Convert difference to similarity
                'recommended_qty': pattern['median_qty'],
                'avg_price': pattern['price'],
                'purchase_frequency': pattern['purchase_frequency'],
                'recommendation_type': 'price_based_fallback',
                'confidence_score': (1 - item['price_diff']) * 0.7  # Lower confidence for fallback
            })
        
        return recommendations
    
    def get_recommendations(self, 
                          product: str, 
                          qty: float, 
                          price: float, 
                          n_recommendations: int = 5) -> Tuple[List[Dict], Dict]:
        """
        Get product recommendations with detailed evaluation metrics
        
        Returns:
            Tuple of (recommendations list, evaluation metrics)
        """
        if not self._validate_input(product, qty, price):
            return [], {'error': 'Invalid input parameters'}
        
        # Get initial recommendations based on co-purchase and similarity
        candidate_products = set(self.item_purchase_patterns.keys()) - {product}
        similarities = []
        
        for candidate in candidate_products:
            similarity, components = self._calculate_item_similarity(product, candidate)
            if similarity > 0:
                similarities.append((candidate, similarity, components))
        
        # Sort by similarity and get top recommendations
        similarities.sort(key=lambda x: x[1], reverse=True)
        recommendations = []
        used_products = set()
        
        # Process top similar products
        for prod, similarity, components in similarities[:n_recommendations]:
            if prod not in used_products:
                pattern = self.item_purchase_patterns[prod]
                
                recommendation = {
                    'product': prod,
                    'similarity_score': round(similarity, 3),
                    'similarity_components': {k: round(v, 3) for k, v in components.items()},
                    'recommended_qty': round(pattern['median_qty'], 2),
                    'avg_price': round(pattern['price'], 2),
                    'purchase_frequency': pattern['purchase_frequency'],
                    'recommendation_type': 'collaborative',
                    'confidence_score': round(similarity * 0.9, 3)  # High confidence for collaborative
                }
                
                recommendations.append(recommendation)
                used_products.add(prod)
        
        # If we need more recommendations, add fallback recommendations
        if len(recommendations) < n_recommendations:
            fallback_count = n_recommendations - len(recommendations)
            fallback_recs = self._get_fallback_recommendations(
                price, 
                used_products.union({product}), 
                fallback_count
            )
            recommendations.extend(fallback_recs)
        
        # Calculate evaluation metrics
        evaluation_metrics = self._calculate_evaluation_metrics(product, recommendations)
        
        return recommendations, evaluation_metrics
    
    def _calculate_evaluation_metrics(self, 
                                   target_product: str, 
                                   recommendations: List[Dict]) -> Dict:
        """Calculate comprehensive evaluation metrics for recommendations"""
        if not recommendations:
            return {'error': 'No recommendations to evaluate'}
        
        # Prepare feature matrix for silhouette score
        products = [target_product] + [r['product'] for r in recommendations]
        features = self.product_features.loc[products].values
        
        # Calculate silhouette score
        if len(products) > 2:  # Silhouette score needs at least 2 samples
            silhouette = silhouette_score(
                features,
                [0] + [1] * len(recommendations),
                metric='cosine'
            )
        else:
            silhouette = None
        
        # Calculate other metrics
        avg_similarity = np.mean([r['similarity_score'] for r in recommendations])
        avg_confidence = np.mean([r['confidence_score'] for r in recommendations])
        
        # Calculate price cohesion
        target_price = self.item_purchase_patterns[target_product]['price']
        price_diffs = [abs(r['avg_price'] - target_price) / target_price 
                      for r in recommendations]
        price_cohesion = 1 - np.mean(price_diffs)
        
        # Calculate recommendation diversity
        recommendation_types = [r['recommendation_type'] for r in recommendations]
        diversity_score = len(set(recommendation_types)) / len(recommendations)
        
        return {
            'silhouette_score': round(silhouette, 3) if silhouette is not None else None,
            'average_similarity': round(avg_similarity, 3),
            'average_confidence': round(avg_confidence, 3),
            'price_cohesion': round(price_cohesion, 3),
            'diversity_score': round(diversity_score, 3),
            'collaborative_ratio': round(
                sum(1 for r in recommendations if r['recommendation_type'] == 'collaborative') / 
                len(recommendations),
                3
            )
        }
    
    def _validate_input(self, product: str, qty: float, price: float) -> bool:
        """Validate input parameters"""
        if product not in self.item_purchase_patterns:
            return False
        
        pattern = self.item_purchase_patterns[product]
        price_range = self.price_ranges[product]
        
        if price < price_range[0] * 0.5 or price > price_range[1] * 1.5:
            return False
        
        if qty < 0 or qty > pattern['avg_qty'] * 10:
            return False
        
        return True

recommender = ProductRecommender("SALE DATA.csv")
    
    # Get recommendations with evaluation metrics
recommendations, evaluation = recommender.get_recommendations(
        product="Odomos Cream Natural 50gm(62*72)",
        qty=400,
        price=32
    )
    
    # Print recommendations
print("\nRecommendations:")
for rec in recommendations:
        print(f"\nProduct: {rec['product']}")
        print(f"Similarity Score: {rec['similarity_score']}")
        print(f"Confidence Score: {rec['confidence_score']}")
        print(f"Recommendation Type: {rec['recommendation_type']}")
        print(f"Similarity Components: {rec['similarity_components']}")
        print(f"Recommended Quantity: {rec['recommended_qty']}")
        print(f"Average Price: ₹{rec['avg_price']}")
        print(f"Purchase Frequency: {rec['purchase_frequency']}")
    
    # Print evaluation metrics
print("\nEvaluation Metrics:")
for metric, value in evaluation.items():
        print(f"{metric}: {value}")


Recommendations:

Product: Dove BathingSoap 100gm+20gm(65*60)
Similarity Score: 0.664
Confidence Score: 0.597
Recommendation Type: collaborative
Similarity Components: {'price_similarity': 0.785, 'pattern_similarity': 0.999, 'copurchase_strength': 0.558, 'customer_overlap': 0.083}
Recommended Quantity: 600.0
Average Price: ₹42.72
Purchase Frequency: 9

Product: Odomos Cream Natural 100gm(115*36)
Similarity Score: 0.632
Confidence Score: 0.569
Recommendation Type: collaborative
Similarity Components: {'price_similarity': 0.542, 'pattern_similarity': 0.977, 'copurchase_strength': 0.683, 'customer_overlap': 0.2}
Recommended Quantity: 1350.0
Average Price: ₹61.89
Purchase Frequency: 2

Product: Pears Pure & Gentle Soap 120g(54*72)
Similarity Score: 0.611
Confidence Score: 0.55
Recommendation Type: collaborative
Similarity Components: {'price_similarity': 0.927, 'pattern_similarity': 0.977, 'copurchase_strength': 0.0, 'customer_overlap': 0.2}
Recommended Quantity: 36.0
Average Price: ₹36.1

# Because K means perform poorly for the cases of high Dimensionality and sparse data, we will use the Adapttive Fusion Weights with Multigraph algorithm as alternative

In [8]:
#apart from K means, trying with the method of Adaptive Weights
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity
from datetime import datetime

class ProductRecommender:
    def __init__(self):
        self.purchase_history = pd.DataFrame()
        self.customer_encoder = LabelEncoder()
        self.product_encoder = LabelEncoder()
        self.customer_product_matrix = None
        self.product_similarity_matrix = None
        self.product_mapping = {}

    def parse_date(self, date_str):
        try:
            return pd.to_datetime(date_str, errors='coerce')
        except:
            return pd.NaT

    def load_and_process_data(self, sales_file, customer_file):
        # Read data
        sales_df = pd.read_csv(sales_file, encoding='utf-8', on_bad_lines='skip')
        customers_df = pd.read_csv(customer_file, encoding='utf-8', on_bad_lines='skip')
        # Clean data
        sales_df = sales_df.dropna(subset=['Product Name', 'Party', 'Qty', 'Price'])
        self.purchase_history = sales_df[['Product Name', 'Party', 'Qty', 'Price', 'Date']].copy()
        self.purchase_history['Date'] = self.purchase_history['Date'].apply(self.parse_date)
        self.purchase_history['Product Name'] = self.purchase_history['Product Name'].str.strip()
        self.purchase_history['Party'] = self.purchase_history['Party'].str.strip()
        self.purchase_history = self.purchase_history.drop_duplicates()
        # Encode
        try:
            self.purchase_history['CustomerEncoded'] = self.customer_encoder.fit_transform(
                self.purchase_history['Party']
            )
            self.purchase_history['ProductEncoded'] = self.product_encoder.fit_transform(
                self.purchase_history['Product Name']
            )
        except Exception as e:
            print(f"Error during encoding: {e}")
            raise
        self.product_mapping = dict(zip(
            self.product_encoder.transform(self.product_encoder.classes_),
            self.product_encoder.classes_
        ))
        # Create interaction matrix
        self.create_interaction_matrix()
        # Calculate similarity
        self.calculate_product_similarity()

    def create_interaction_matrix(self):
        try:
            interaction_df = self.purchase_history.groupby(
                ['CustomerEncoded', 'ProductEncoded']
            ).agg({
                'Qty': 'sum',
                'Price': 'mean',
                'Date': 'max'
            }).reset_index()
            current_date = pd.Timestamp.now()
            interaction_df['DaysSinceLastPurchase'] = (
                current_date - interaction_df['Date']
            ).dt.days
            interaction_df['TimeWeight'] = np.exp(-0.001 * interaction_df['DaysSinceLastPurchase'])
            interaction_df['InteractionScore'] = (
                np.log1p(interaction_df['Qty']) *
                interaction_df['Price'] *
                interaction_df['TimeWeight']
            )
            # Pivot into matrix
            self.customer_product_matrix = interaction_df.pivot(
                index='CustomerEncoded',
                columns='ProductEncoded',
                values='InteractionScore'
            ).fillna(0)
        except Exception as e:
            print(f"Error creating interaction matrix: {e}")
            self.customer_product_matrix = None

    def calculate_product_similarity(self):
        try:
            self.product_similarity_matrix = cosine_similarity(self.customer_product_matrix.T)
        except Exception as e:
            print(f"Error during similarity calculation: {e}")
            raise

    def get_similar_products(self, current_product, n=5):
        if current_product not in self.product_encoder.classes_:
            return f"No data available for '{current_product}'"
        product_encoded = self.product_encoder.transform([current_product])[0]
        similarity_scores = self.product_similarity_matrix[product_encoded]
        top_indices = similarity_scores.argsort()[::-1][1:n+1]
        return [(self.product_mapping[idx], similarity_scores[idx]) for idx in top_indices]

    def calculate_dynamic_price_range(self, price):
        if price < 50:
            return (0.6 * price, 1.4 * price)
        elif price < 200:
            return (0.7 * price, 1.3 * price)
        else:
            return (0.8 * price, 1.2 * price)

    def calculate_confidence_score(self, similarity, n_purchases):
        purchase_weight = min(1, np.log1p(n_purchases) / np.log1p(100))
        return similarity * purchase_weight

    def recommend_products(self, current_product, price, quantity, n_recommendations=5):
        try:
            similar_products = self.get_similar_products(current_product, n_recommendations*5)
            if isinstance(similar_products, str):
                return similar_products
            
            price_range = self.calculate_dynamic_price_range(price)
            filtered_recommendations = []
            seen_patterns = set()  # Track similar product patterns
            
            # Helper function to check if product is similar to existing ones
            def is_similar_to_existing(prod_name):
                prod_parts = set(prod_name.lower().split())
                # Check for substantial overlap with existing products
                for pattern in seen_patterns:
                    pattern_parts = set(pattern.split())
                    if len(prod_parts.intersection(pattern_parts)) >= 2:  # If 2 or more words match
                        return True
                return False
            
            for prod, similarity in similar_products:
                # Skip if product is too similar to ones we've already selected
                if is_similar_to_existing(prod):
                    continue
                    
                prod_data = self.purchase_history[self.purchase_history['Product Name'] == prod]
                if not prod_data.empty:
                    avg_price = prod_data['Price'].mean()
                    avg_quantity = prod_data['Qty'].mean()
                    if price_range[0] <= avg_price <= price_range[1]:
                        confidence = self.calculate_confidence_score(
                            similarity, prod_data.shape[0]
                        )
                        filtered_recommendations.append({
                            'product': prod,
                            'similarity_score': similarity,
                            'avg_price': avg_price,
                            'typical_quantity': avg_quantity,
                            'confidence_score': confidence
                        })
                        # Add product pattern to seen patterns
                        seen_patterns.add(' '.join(prod.lower().split()))
                        
                        if len(filtered_recommendations) >= n_recommendations:
                            break
                            
            # If we don't have enough recommendations, try to find products in similar price range
            if len(filtered_recommendations) < n_recommendations:
                price_similar_products = self.purchase_history[
                    (self.purchase_history['Price'] >= price_range[0]) & 
                    (self.purchase_history['Price'] <= price_range[1]) &
                    (~self.purchase_history['Product Name'].isin([r['product'] for r in filtered_recommendations]))
                ]['Product Name'].unique()
                
                for prod in price_similar_products:
                    if is_similar_to_existing(prod):
                        continue
                        
                    prod_data = self.purchase_history[self.purchase_history['Product Name'] == prod]
                    if not prod_data.empty:
                        avg_price = prod_data['Price'].mean()
                        similarity = 0.3  # Base similarity for price-based recommendations
                        confidence = self.calculate_confidence_score(
                            similarity, prod_data.shape[0]
                        )
                        filtered_recommendations.append({
                            'product': prod,
                            'similarity_score': similarity,
                            'avg_price': avg_price,
                            'typical_quantity': prod_data['Qty'].mean(),
                            'confidence_score': confidence
                        })
                        seen_patterns.add(' '.join(prod.lower().split()))
                        
                    if len(filtered_recommendations) >= n_recommendations:
                        break
                        
            filtered_recommendations.sort(key=lambda x: x['confidence_score'], reverse=True)
            return filtered_recommendations[:n_recommendations]
        except Exception as e:
            print(f"Error during recommendation generation: {e}")
            return []

def main():
    try:
        recommender = ProductRecommender()
        recommender.load_and_process_data('SALE DATA.csv', 'CUSTOMER DATABASE.csv')
        sample_product = "Dabur Amla HairOil450ml(225*32)Offer"
        sample_price = 127
        sample_quantity = 5000
        recommendations = recommender.recommend_products(sample_product, sample_price, sample_quantity)
        return recommendations
    except Exception as e:
        print(f"Error in main execution: {e}")
        return None

if __name__ == "__main__":
    recs = main()
    if recs:
        print("\nRecommended products:")
        for i, rec in enumerate(recs, 1):
            print(f"\n{i}. {rec['product']}")
            print(f"   Similarity Score: {rec['similarity_score']:.2f}")
            print(f"   Confidence Score: {rec['confidence_score']:.2f}")
            print(f"   Average Price: ₹{rec['avg_price']:.2f}")
            print(f"   Typical Quantity: {rec['typical_quantity']:.0f}")


Recommended products:

1. Lipton Green Tea Lemon- Honey(170*24)
   Similarity Score: 0.39
   Confidence Score: 0.39
   Average Price: ₹112.54
   Typical Quantity: 497

2. Vaseline  Deep Moisture 400ml (435*36)
   Similarity Score: 0.45
   Confidence Score: 0.39
   Average Price: ₹146.19
   Typical Quantity: 345

3. Lifebuoy Silver Shield125Gm PO5(155*30)
   Similarity Score: 0.37
   Confidence Score: 0.33
   Average Price: ₹107.63
   Typical Quantity: 696

4. Horlicks Chocolate Jar -500g (249*24)
   Similarity Score: 0.38
   Confidence Score: 0.32
   Average Price: ₹144.25
   Typical Quantity: 387

5. TATA TEA PRM LEAF 500GM(255*48)
   Similarity Score: 0.36
   Confidence Score: 0.30
   Average Price: ₹158.52
   Typical Quantity: 546
