<a href="https://colab.research.google.com/github/swati-0308/retail_recommendation_system/blob/main/Apriori_Cross.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%pip install mlxtend
%pip install pyodbc
%pip install psycopg2
%pip install sqlalchemy



In [2]:
import pandas as pd
from sqlalchemy import create_engine
import pyodbc
import numpy as np
import psycopg2
from datetime import datetime
from mlxtend.frequent_patterns import apriori, association_rules

# Define connection parameters for the RDS PostgreSQL instance
db_config = {
    "host": "projects-database.cxgcu68ksihx.us-east-1.rds.amazonaws.com",  # RDS endpoint
    "port": "5432",                                                       # PostgreSQL port
    "dbname": "postgres",                                                 # Database name
    "user": "postgres",                                                   # Username
    "password": "DATA602_project"                                         # Password
}

try:
    # Establish a connection
    with psycopg2.connect(**db_config) as connection:
        with connection.cursor() as cursor:
            # Fetch sales data from the database
            cursor.execute("SELECT * FROM sales_data;")
            results = cursor.fetchall()

            # Extract column names from the cursor description
            column_names = [desc[0] for desc in cursor.description]

            # Create a DataFrame from the fetched results
            sales_data_df = pd.DataFrame(results, columns=column_names)

except Exception as error:
    print(f"Error connecting to PostgreSQL: {error}")
else:
    print("Data retrieved successfully.")
finally:
    print("Database connection closed.")

# Display the first few rows of the DataFrame
print(sales_data_df.head())

Data retrieved successfully.
Database connection closed.
   index invoiceno stockcode                          description  quantity  \
0      1    536365     71053                  WHITE METAL LANTERN         6   
1      2    536365    84406B       CREAM CUPID HEARTS COAT HANGER         8   
2      3    536365    84029G  KNITTED UNION FLAG HOT WATER BOTTLE         6   
3      5    536365     22752         SET 7 BABUSHKA NESTING BOXES         2   
4      7    536366     22633               HAND WARMER UNION JACK         6   

          invoicedate unitprice customerid         country      category  \
0 2010-12-01 08:26:00      3.39      17850  United Kingdom       Lantern   
1 2010-12-01 08:26:00      2.75      17850  United Kingdom   Coat Hanger   
2 2010-12-01 08:26:00      3.39      17850  United Kingdom  Water Bottle   
3 2010-12-01 08:26:00      7.65      17850  United Kingdom   Nesting Box   
4 2010-12-01 08:28:00      1.85      17850  United Kingdom   Hand Warmer   

   column1 

In [8]:
def convert_to_string(input_data):
    """
    Convert a frozenset, set, or list into a comma-separated string.
    """
    if isinstance(input_data, (frozenset, set, list)):
        return ', '.join(str(item) for item in input_data)
    return str(input_data)

def fetch_category(item, category_map):
    """
    Retrieve the category for a given product using the category map.
    """
    return category_map.get(item)

# Initialize result containers for cross-sell analyses
all_cross_sell_results = []

  and should_run_async(code)


In [10]:
# Iterate through each unique country in the dataset
for country in sales_data_df['country'].unique():
    # Filter data for the current country
    country_data = sales_data_df[sales_data_df['country'] == country]

    # Create a basket for frequent pattern mining
    basket = (
        country_data.groupby(['invoiceno', 'description'])['quantity'].sum()
        .unstack()
        .fillna(0)
        .reset_index()
        .set_index('invoiceno')
    )

    # Encode basket data into binary format
    encoded_basket = basket.map(lambda x: 1 if x > 0 else 0)

    # Generate frequent itemsets and association rules
    frequent_itemsets = apriori(encoded_basket, min_support=0.03, use_colnames=True)
    num_itemsets = len(frequent_itemsets)  # Calculate the total number of itemsets
    rules = association_rules(frequent_itemsets, metric="lift", min_threshold=0.01, num_itemsets=num_itemsets)

    # Sort cross-sell rules by lift and confidence
    cross_sell_rules = rules.sort_values(by=['lift', 'confidence'], ascending=[False, False])

    # Convert frozensets to strings for better readability
    cross_sell_rules['antecedents'] = cross_sell_rules['antecedents'].apply(convert_to_string)
    cross_sell_rules['consequents'] = cross_sell_rules['consequents'].apply(convert_to_string)

    # Add country information to the cross-sell rules
    cross_sell_rules['country'] = country

    # Append cross-sell rules for the current country to the results
    all_cross_sell_results.append(cross_sell_rules)

    # Merge product data with category details
    product_info = country_data[['description', 'unitprice']].merge(
        sales_data_df[['description', 'category']], on='description'
    )

    # Build a product-to-category mapping
    category_map = dict(zip(product_info['description'], product_info['category']))

    # Add category information to association rules
    rules['antecedent_category'] = rules['antecedents'].apply(lambda x: category_map.get(next(iter(x))))
    rules['consequent_category'] = rules['consequents'].apply(lambda x: category_map.get(next(iter(x))))


  and should_run_async(code)


In [13]:
# Join all cross-sell results into final DataFrames
final_cross_sell_results = pd.concat(all_cross_sell_results, ignore_index=True)

# Display the results
print("Final Cross-Sell Rules with Country Labels:")
final_cross_sell_results[['country', 'antecedents', 'support', 'consequents', 'confidence', 'lift']]

Final Cross-Sell Rules with Country Labels:


  and should_run_async(code)
  final_cross_sell_results = pd.concat(all_cross_sell_results, ignore_index=True)


Unnamed: 0,country,antecedents,support,consequents,confidence,lift
0,France,"DOLLY GIRL CHILDRENS CUP, SPACEBOY CHILDRENS BOWL",0.030556,"DOLLY GIRL CHILDRENS BOWL, SPACEBOY CHILDRENS CUP",0.846154,27.692308
1,France,"DOLLY GIRL CHILDRENS BOWL, SPACEBOY CHILDRENS CUP",0.030556,"DOLLY GIRL CHILDRENS CUP, SPACEBOY CHILDRENS BOWL",1.000000,27.692308
2,France,"DOLLY GIRL CHILDRENS CUP, SPACEBOY CHILDRENS BOWL",0.030556,SPACEBOY CHILDRENS CUP,0.846154,23.431953
3,France,SPACEBOY CHILDRENS CUP,0.030556,"DOLLY GIRL CHILDRENS CUP, SPACEBOY CHILDRENS BOWL",0.846154,23.431953
4,France,"DOLLY GIRL CHILDRENS CUP, SPACEBOY CHILDRENS B...",0.030556,SPACEBOY CHILDRENS CUP,0.846154,23.431953
...,...,...,...,...,...,...
2573,EIRE,SET OF 3 REGENCY CAKE TINS,0.038793,IVORY KITCHEN SCALES,0.281250,2.610000
2574,EIRE,ROSES REGENCY TEACUP AND SAUCER,0.043103,SET OF 3 REGENCY CAKE TINS,0.333333,2.416667
2575,EIRE,SET OF 3 REGENCY CAKE TINS,0.043103,ROSES REGENCY TEACUP AND SAUCER,0.312500,2.416667
2576,EIRE,SET OF 3 CAKE TINS PANTRY DESIGN,0.030172,ROSES REGENCY TEACUP AND SAUCER,0.304348,2.353623
