In [5]:
import pandas as pd
import sqlalchemy
import unicodedata
import itertools
from collections import Counter

# Basket Analysis

In [6]:
# Create SQLAlchemy engine
engine = sqlalchemy.create_engine("sqlite:///C1_case_study.db")

fact_line_items = pd.read_sql_table('fact_line_items', engine)
dim_items = pd.read_sql_table('dim_items', engine)


In [7]:
# Basket/combo analysis

# Join Fact Line Items on Dim Items (to get Name)
line_items_named = fact_line_items.merge(dim_items, on='item_id')

# Group items by Transaction ID to get baskets
baskets = line_items_named.groupby('transaction_id')['item_name'].apply(list)

# Filter for Multi-Item Baskets
multi_item_baskets = baskets[baskets.apply(len) > 1]


# Find Top Pairs
pair_counts = Counter()
for items in multi_item_baskets:
    unique_items = sorted(list(set(items)))
    if len(unique_items) > 1:
        pair_counts.update(itertools.combinations(unique_items, 2))

# Display Top 10 Combinations
top_pairs = pd.DataFrame(pair_counts.most_common(10), columns=['Pair', 'Frequency'])
print("Top 10 Combinations:")
print(top_pairs)

# Find Top Singles that Appear in Multi-Item Baskets
single_in_multi_counts = Counter()
for items in multi_item_baskets:
    unique_items = set(items)
    single_in_multi_counts.update(unique_items)
top_singles_in_multi = pd.DataFrame(single_in_multi_counts.most_common(10), columns=['Item', 'Frequency'])
print("\nTop 10 Single Items in Multi-Item Baskets:")
print(top_singles_in_multi)

# Find single-item baskets containing top pair items
#=====================================================

# Get unique items from top pairs
top_pair_items = set()
for pair in top_pairs['Pair']:
    top_pair_items.update(pair)

# Find single-item baskets
single_baskets = baskets[baskets.apply(len) == 1]

# Filter singles containing top pair items
single_relevant = single_baskets[single_baskets.apply(lambda x: x[0] in top_pair_items)]

# Count frequency per single item
single_counts = single_relevant.apply(lambda x: x[0]).value_counts()
single_counts_df = pd.DataFrame(single_counts.reset_index(), columns=['Item', 'Single_Frequency'])


Top 10 Combinations:
                                    Pair  Frequency
0               (ENTREE 4.29, SIDE 1.29)       3659
1                 (BACON 3 SLICES, EGGS)       2348
2    (ENT -PREMIUM SIDE 1.39, SIDE 1.29)       2195
3               (ENTREE 4.79, SIDE 1.29)       2171
4                     (EGGS, HOME FRIES)       1961
5                  (EGGS, SAUSAGE PATTY)       1548
6         (ADD A BEVERAGE $1, SIDE 1.29)       1197
7       (ADD A BEVERAGE $1, ENTREE 4.29)       1195
8                 (SIDE 1.29, SIDE 1.99)       1187
9  (ENT -PREMIUM SIDE 1.39, ENTREE 4.29)       1155

Top 10 Single Items in Multi-Item Baskets:
                                Item  Frequency
0                  ADD A BEVERAGE $1      10611
1                          SIDE 1.29      10332
2                               EGGS       6406
3                        ENTREE 4.29       5607
4             ENT -PREMIUM SIDE 1.39       4211
5                              FRIES       3881
6                     BACON

In [8]:
# Estimation: Potential earnings if singles were converted to combos
# Assumption: For each top pair (A, B), for singles of A, add B's unit profit (upsell); vice versa.
# Unit profit = price * margin from dim_items (average if multiple entries per item)

discount_factor = 1.0  # Set to 0.9 for discount simulation
conversion_rate = 0.5  # Realistic uptake rate

# Create item to unit_profit map (average per item if variations; handle zero price)
item_profit = dim_items.groupby('item_name').agg({'price': 'mean', 'margin': 'mean'}).apply(
    lambda row: 0 if row['price'] == 0 else row['price'] * row['margin'], axis=1
)

# Calculate potential lift
potential_details = []
overall_lift = 0.0

for pair, freq in pair_counts.most_common(10):  # Fixed loop: Use most_common directly
    a, b = pair
    singles_a = single_counts.get(a, 0)
    singles_b = single_counts.get(b, 0)
    unit_profit_a = item_profit.get(a, 0) * discount_factor
    unit_profit_b = item_profit.get(b, 0) * discount_factor
    
    lift_a_to_ab = singles_a * unit_profit_b * conversion_rate  # Adding B to singles of A, adjusted for uptake
    lift_b_to_ba = singles_b * unit_profit_a * conversion_rate  # Adding A to singles of B
    
    total_lift = lift_a_to_ab + lift_b_to_ba
    overall_lift += total_lift
    
    potential_details.append({
        'Pair': pair,
        'Singles of A': singles_a,
        'Singles of B': singles_b,
        'Unit Profit A': unit_profit_a,
        'Unit Profit B': unit_profit_b,
        'Lift from A to AB': lift_a_to_ab,
        'Lift from B to BA': lift_b_to_ba,
        'Total Lift': total_lift
    })

potential_df = pd.DataFrame(potential_details)
print("\nEstimated Potential Profit Lift from Converting Singles to Combos:")
print(potential_df)
print(f"\nOverall Potential Profit Lift: ${overall_lift:.2f}")


Estimated Potential Profit Lift from Converting Singles to Combos:
                                    Pair  Singles of A  Singles of B  \
0               (ENTREE 4.29, SIDE 1.29)           693           623   
1                 (BACON 3 SLICES, EGGS)           128           257   
2    (ENT -PREMIUM SIDE 1.39, SIDE 1.29)           133           623   
3               (ENTREE 4.79, SIDE 1.29)           143           623   
4                     (EGGS, HOME FRIES)           257             0   
5                  (EGGS, SAUSAGE PATTY)           257             0   
6         (ADD A BEVERAGE $1, SIDE 1.29)           451           623   
7       (ADD A BEVERAGE $1, ENTREE 4.29)           451           693   
8                 (SIDE 1.29, SIDE 1.99)           623            70   
9  (ENT -PREMIUM SIDE 1.39, ENTREE 4.29)           133           693   

   Unit Profit A  Unit Profit B  Lift from A to AB  Lift from B to BA  \
0          1.716          0.516         178.793997         534.534

In [9]:


# Revisions: Parametrize and add scenarios
num_pairs = 50  # Expand from 10
conversion_scenarios = {'Low': 0.3, 'Base': 0.5, 'High': 0.8}
discount_factor = 0.85  # 15% combo discount, but assume 1.2 volume uplift
volume_uplift = 1.2

# Use most_common(num_pairs)
potential_details = {scenario: [] for scenario in conversion_scenarios}
overall_lifts = {scenario: 0.0 for scenario in conversion_scenarios}

for pair, freq in pair_counts.most_common(num_pairs):
    a, b = pair
    singles_a = single_counts.get(a, 0)
    singles_b = single_counts.get(b, 0)
    unit_profit_a = item_profit.get(a, 0) * discount_factor
    unit_profit_b = item_profit.get(b, 0) * discount_factor
    
    for scenario, rate in conversion_scenarios.items():
        lift_a_to_ab = singles_a * unit_profit_b * rate * volume_uplift
        lift_b_to_ba = singles_b * unit_profit_a * rate * volume_uplift
        total_lift = lift_a_to_ab + lift_b_to_ba
        overall_lifts[scenario] += total_lift
        
        potential_details[scenario].append({
            'Pair': pair,
            'Singles of A': singles_a,
            'Singles of B': singles_b,
            'Total Lift': total_lift
        })

# Output for each scenario
for scenario in conversion_scenarios:
    print(f"\n{scenario} Scenario Potential Lift:")
    print(pd.DataFrame(potential_details[scenario]))
    print(f"Overall {scenario} Lift: ${overall_lifts[scenario]:.2f}")


Low Scenario Potential Lift:
                                                Pair  Singles of A  \
0                           (ENTREE 4.29, SIDE 1.29)           693   
1                             (BACON 3 SLICES, EGGS)           128   
2                (ENT -PREMIUM SIDE 1.39, SIDE 1.29)           133   
3                           (ENTREE 4.79, SIDE 1.29)           143   
4                                 (EGGS, HOME FRIES)           257   
5                              (EGGS, SAUSAGE PATTY)           257   
6                     (ADD A BEVERAGE $1, SIDE 1.29)           451   
7                   (ADD A BEVERAGE $1, ENTREE 4.29)           451   
8                             (SIDE 1.29, SIDE 1.99)           623   
9              (ENT -PREMIUM SIDE 1.39, ENTREE 4.29)           133   
10                            (BURGER CHEESE, FRIES)             0   
11                             (EGGS, REFRIED BEANS)           257   
12             (ENT -PREMIUM SIDE 1.39, ENTREE 4.79)        