In [1]:
import pandas as pd

df = pd.read_excel('D:/Hackathon_Subject2/DataFiles/output1.3.xlsx')

def recommend_products(customer_id, df, threshold=2, top_n=5):
    # Convert customer_id to string
    customer_id = str(customer_id)
    
    # Find products purchased by the specified customer
    customer_products = set(df[df['CustomerID'] == customer_id]['StockCode'])
    
    # Find customers who purchased the same products
    similar_customers = df[df['StockCode'].isin(customer_products)]['CustomerID'].unique()
    
    # Filter out the specified customer from the similar customers
    similar_customers = set(similar_customers) - set([customer_id])
    
    # Calculate the similarity score for each similar customer
    similarity_scores = {}
    for sim_customer in similar_customers:
        sim_customer_products = set(df[df['CustomerID'] == sim_customer]['StockCode'])
        similarity_scores[sim_customer] = len(customer_products.intersection(sim_customer_products))
    
    # Filter out customers below the similarity threshold
    similar_customers = [cid for cid, score in similarity_scores.items() if score >= threshold]
    
    if not similar_customers:
        # Fallback strategy: Recommend popular products
        popular_products = df['StockCode'].value_counts().index.tolist()
        recommendations = popular_products[:top_n]
    else:
        # Find other products purchased by similar customers
        other_products = df[df['CustomerID'].isin(similar_customers) & ~df['StockCode'].isin(customer_products)]['StockCode'].unique()
        
        # Aggregate the counts of these products
        product_counts = df[df['CustomerID'].isin(similar_customers) & df['StockCode'].isin(other_products)].groupby('StockCode').size().sort_values(ascending=False)
        
        # Recommend the top products
        recommendations = product_counts.head(top_n).index.tolist()
    
    # Map product codes to descriptions
    descriptions = df[df['StockCode'].isin(recommendations)][['StockCode', 'Description']].drop_duplicates().set_index('StockCode').to_dict()['Description']
    
    # Get descriptions for recommendations
    recommendations_with_description = [descriptions.get(code) for code in recommendations]
    
    return recommendations_with_description

# Example usage
customer_id = 15804
recommendations = recommend_products(customer_id, df, threshold=2, top_n=5)
print("Recommended products for customer", customer_id, ":", recommendations)


Recommended products for customer 15804 : ['CREAM HANGING HEART T-LIGHT HOLDER', 'REGENCY CAKESTAND 3 TIER', 'JUMBO BAG RED RETROSPOT', 'ASSORTED COLOUR BIRD ORNAMENT', 'PARTY BUNTING']
