In [2]:
pip install psycopg2


Collecting psycopg2
  Downloading psycopg2-2.9.10.tar.gz (385 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m385.7/385.7 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: psycopg2
  Building wheel for psycopg2 (setup.py) ... [?25ldone
[?25h  Created wheel for psycopg2: filename=psycopg2-2.9.10-cp310-cp310-macosx_10_9_x86_64.whl size=133844 sha256=949d77ac17c9a8afdf38de2ae75895baabe8fe8f9100d0bfbcec99fca280cf23
  Stored in directory: /Users/delin/Library/Caches/pip/wheels/15/c2/53/680416c0eed380edec859de7db3a660a47257b174357c11f64
Successfully built psycopg2
Installing collected packages: psycopg2
Successfully installed psycopg2-2.9.10
Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install psycopg2-binary


Collecting psycopg2-binary
  Downloading psycopg2-binary-2.9.10.tar.gz (385 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m385.8/385.8 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: psycopg2-binary
  Building wheel for psycopg2-binary (setup.py) ... [?25ldone
[?25h  Created wheel for psycopg2-binary: filename=psycopg2_binary-2.9.10-cp310-cp310-macosx_10_9_x86_64.whl size=133921 sha256=e08f685352f30824081263fdea9cf368c8d4c6b13e93b3f751f709787fd0ff3c
  Stored in directory: /Users/delin/Library/Caches/pip/wheels/10/26/2e/160fa82a7c017c38715f9103ba8737c3dc69cc48a973e5c5f4
Successfully built psycopg2-binary
Installing collected packages: psycopg2-binary
Successfully installed psycopg2-binary-2.9.10
Note: you may need to restart the kernel to use updated packages.


## Database Connection and Data Retrieval:

In [11]:
import pandas as pd
import psycopg2
from mlxtend.frequent_patterns import apriori, association_rules

# Define RDS connection parameters
host = "projects-database.cxgcu68ksihx.us-east-1.rds.amazonaws.com"
port = "5432"
dbname = "postgres"
user = "postgres"
password = "DATA602_project"

# Establish connection and fetch data
try:
    conn = psycopg2.connect(
        host=host, port=port, database=dbname, user=user, password=password
    )
    cur = conn.cursor()
    cur.execute("SELECT * FROM sales_data;")
    data = cur.fetchall()
    columns = [desc[0] for desc in cur.description]
    ret = pd.DataFrame(data, columns=columns)

except Exception as err:
    print(f"Database connection error: {err}")
finally:
    if conn:
        cur.close()
        conn.close()

# Helper to convert frozensets to strings
def frozenset_to_string(fset):
    return ', '.join(map(str, fset)) if isinstance(fset, frozenset) else str(fset)


## Basket Preparation and Apriori Rules

In [12]:
def prepare_basket(data):

    return (
        data.groupby(['invoiceno', 'description'])['quantity'].sum()
        .unstack()
        .fillna(0)
        .applymap(lambda x: 1 if x > 0 else 0)
    )

def generate_apriori_rules(basket, min_support=0.03, min_confidence=0.01):
  
    frequent_itemsets = apriori(basket, min_support=min_support, use_colnames=True)
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)
    return rules


## Upsell Filtering

In [16]:
def filter_upsell_rules(rules, data):
  
    valid_upsells = []
    for _, rule in rules.iterrows():
        antecedent_items = list(rule['antecedents'])
        consequent_items = list(rule['consequents'])

        # Fetch prices for comparison
        antecedent_prices = data[data['description'].isin(antecedent_items)]['unitprice'].tolist()
        consequent_prices = data[data['description'].isin(consequent_items)]['unitprice'].tolist()

        if antecedent_prices and consequent_prices and max(antecedent_prices) < min(consequent_prices):
            valid_upsells.append(rule)

    return pd.DataFrame(valid_upsells)


##  Per-Country Processing and Final Output

In [22]:
upsell_results = []

for country in ret['country'].unique():
    country_data = ret[ret['country'] == country]

    if not country_data.empty:
        # Prepare transactional data
        basket = prepare_basket(country_data)

        # Generate rules using Apriori
        rules = generate_apriori_rules(basket, min_support=0.03, min_confidence=0.01)

        if not rules.empty:
            # Filter rules for upsell
            filtered_upsell = filter_upsell_rules(rules, country_data)

            if not filtered_upsell.empty:
                filtered_upsell['country'] = country
                upsell_results.append(filtered_upsell)

if upsell_results:
    final_upsell_df = pd.concat(upsell_results, ignore_index=True)

    # Convert frozensets to strings for readability
    final_upsell_df['antecedents'] = final_upsell_df['antecedents'].apply(frozenset_to_string)
    final_upsell_df['consequents'] = final_upsell_df['consequents'].apply(frozenset_to_string)
    final_upsell_df_sorted = final_upsell_df.sort_values(by=['lift', 'confidence'], ascending=[False, False])

    print("Upsell Recommendations:")
    print(final_upsell_df_sorted[['country', 'antecedents', 'consequents', 'confidence', 'lift']])
else:
    print("No upsell recommendations identified.")




Upsell Recommendations:
     country                                        antecedents  \
72    France                         PACK OF 6 SKULL PAPER CUPS   
48    France                         PACK OF 6 SKULL PAPER CUPS   
47    France                         PACK OF 6 SKULL PAPER CUPS   
111     EIRE                          COOK WITH WINE METAL SIGN   
240     EIRE  REGENCY TEA PLATE PINK, REGENCY MILK JUG PINK,...   
..       ...                                                ...   
34    France                            LUNCH BAG RED RETROSPOT   
78   Germany                         JUMBO BAG WOODLAND ANIMALS   
66    France                  STRAWBERRY LUNCH BOX WITH CUTLERY   
30    France                            LUNCH BAG RED RETROSPOT   
32    France                            LUNCH BAG RED RETROSPOT   

                                           consequents  confidence       lift  
72   PACK OF 20 SKULL PAPER NAPKINS, PACK OF 6 SKUL...    0.523810  14.505495  
48         