# ***Data Understanding***

In [None]:
#Upload file in colab
from google.colab import files
uploaded = files.upload()

Saving AR.xlsx to AR (2).xlsx


In [None]:
#Load the dataset
import pandas as pd
df = pd.read_excel('AR.xlsx')

In [None]:
#Display first few rows of the dataset
print(df.head())

  Invoice StockCode                          Description  Quantity  \
0  489434     85048  15CM CHRISTMAS GLASS BALL 20 LIGHTS        12   
1  489434    79323P                   PINK CHERRY LIGHTS        12   
2  489434    79323W                  WHITE CHERRY LIGHTS        12   
3  489434     22041         RECORD FRAME 7" SINGLE SIZE         48   
4  489434     21232       STRAWBERRY CERAMIC TRINKET BOX        24   

          InvoiceDate  Price  Customer ID         Country  
0 2009-12-01 07:45:00   6.95      13085.0  United Kingdom  
1 2009-12-01 07:45:00   6.75      13085.0  United Kingdom  
2 2009-12-01 07:45:00   6.75      13085.0  United Kingdom  
3 2009-12-01 07:45:00   2.10      13085.0  United Kingdom  
4 2009-12-01 07:45:00   1.25      13085.0  United Kingdom  


In [None]:
#Display last few rows of the dataset
print(df.tail())

       Invoice StockCode                         Description  Quantity  \
525456  538171     22271                FELTCRAFT DOLL ROSIE         2   
525457  538171     22750        FELTCRAFT PRINCESS LOLA DOLL         1   
525458  538171     22751      FELTCRAFT PRINCESS OLIVIA DOLL         1   
525459  538171     20970  PINK FLORAL FELTCRAFT SHOULDER BAG         2   
525460  538171     21931              JUMBO STORAGE BAG SUKI         2   

               InvoiceDate  Price  Customer ID         Country  
525456 2010-12-09 20:01:00   2.95      17530.0  United Kingdom  
525457 2010-12-09 20:01:00   3.75      17530.0  United Kingdom  
525458 2010-12-09 20:01:00   3.75      17530.0  United Kingdom  
525459 2010-12-09 20:01:00   3.75      17530.0  United Kingdom  
525460 2010-12-09 20:01:00   1.95      17530.0  United Kingdom  


In [None]:
#Display Basic information about the dataset
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 525461 entries, 0 to 525460
Data columns (total 8 columns):
 #   Column       Non-Null Count   Dtype         
---  ------       --------------   -----         
 0   Invoice      525461 non-null  object        
 1   StockCode    525461 non-null  object        
 2   Description  522533 non-null  object        
 3   Quantity     525461 non-null  int64         
 4   InvoiceDate  525461 non-null  datetime64[ns]
 5   Price        525461 non-null  float64       
 6   Customer ID  417534 non-null  float64       
 7   Country      525461 non-null  object        
dtypes: datetime64[ns](1), float64(2), int64(1), object(4)
memory usage: 32.1+ MB
None


# ***Data Cleaning***

In [None]:
#Identify missing values
print(df.isnull().sum())

Invoice             0
StockCode           0
Description      2928
Quantity            0
InvoiceDate         0
Price               0
Customer ID    107927
Country             0
dtype: int64


In [None]:
#Identify duplicate rows
print(df.duplicated().sum())

6865


Exact duplicate entries (same customer, same date, same item) were removed to ensure that each item within a transaction is represented only once. Since the analysis focuses on identifying item co-occurrence patterns (presence or absence of items), retaining duplicates could have artificially inflated item counts without adding meaningful insights into association relationships.

In [None]:
#Remove duplicate rows
df = df.drop_duplicates()

In [None]:
#Remove missing values
df = df.dropna()

In [None]:
#Change datatype
df['Customer ID'] = df['Customer ID'].astype(int)

In [None]:
#Display number of rows and columns
print(df.shape)

(410763, 8)


# ***Model Preprocessing***

In [None]:
#Group items into transactions
transactions = df.groupby(['Invoice'])['Description'].apply(list).reset_index(name='Items')

In [None]:
#Install mlxtend module
!pip install mlxtend



In [None]:
from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder()
te_array = te.fit(transactions['Items']).transform(transactions['Items'])
df_encoded = pd.DataFrame(te_array, columns=te.columns_)

# ***Model Implementation***

In [None]:
#Implement Apriori Algorithm
from mlxtend.frequent_patterns import apriori
frequent_itemsets = apriori(df_encoded, min_support=0.01, use_colnames=True)

In [None]:
#Generate association rules
from mlxtend.frequent_patterns import association_rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)

# ***Model Evaluation***

In [None]:
#Inspect the top rules
top_rules = rules.sort_values(by='lift', ascending=False)
top_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head(10)

Unnamed: 0,antecedents,consequents,support,confidence,lift
218,(POPPY'S PLAYHOUSE KITCHEN),(POPPY'S PLAYHOUSE BEDROOM ),0.010302,0.812709,70.997659
219,(POPPY'S PLAYHOUSE BEDROOM ),(POPPY'S PLAYHOUSE KITCHEN),0.010302,0.9,70.997659
233,(SET/10 PINK SPOTTY PARTY CANDLES),(SET/10 BLUE SPOTTY PARTY CANDLES),0.010599,0.688705,53.08657
232,(SET/10 BLUE SPOTTY PARTY CANDLES),(SET/10 PINK SPOTTY PARTY CANDLES),0.010599,0.816993,53.08657
142,"(KEY FOB , GARAGE DESIGN)","(KEY FOB , SHED)",0.010684,0.8,47.292231
143,"(KEY FOB , SHED)","(KEY FOB , GARAGE DESIGN)",0.010684,0.631579,47.292231
234,(SET/6 RED SPOTTY PAPER CUPS),(SET/6 RED SPOTTY PAPER PLATES),0.01132,0.758523,46.230686
235,(SET/6 RED SPOTTY PAPER PLATES),(SET/6 RED SPOTTY PAPER CUPS),0.01132,0.689922,46.230686
140,"(KEY FOB , SHED)","(KEY FOB , BACK DOOR )",0.010981,0.649123,43.251016
141,"(KEY FOB , BACK DOOR )","(KEY FOB , SHED)",0.010981,0.731638,43.251016


In [None]:
#Filter for strong rules
strong_rules = rules[
    (rules['support'] >= 0.005) &
    (rules['confidence'] >= 0.4) &
    (rules['lift'] >= 1.2)
]

strong_rules.sort_values(by='lift', ascending=False)[
    ['antecedents', 'consequents', 'support', 'confidence', 'lift']
].head(10)

Unnamed: 0,antecedents,consequents,support,confidence,lift
218,(POPPY'S PLAYHOUSE KITCHEN),(POPPY'S PLAYHOUSE BEDROOM ),0.010302,0.812709,70.997659
219,(POPPY'S PLAYHOUSE BEDROOM ),(POPPY'S PLAYHOUSE KITCHEN),0.010302,0.9,70.997659
232,(SET/10 BLUE SPOTTY PARTY CANDLES),(SET/10 PINK SPOTTY PARTY CANDLES),0.010599,0.816993,53.08657
233,(SET/10 PINK SPOTTY PARTY CANDLES),(SET/10 BLUE SPOTTY PARTY CANDLES),0.010599,0.688705,53.08657
142,"(KEY FOB , GARAGE DESIGN)","(KEY FOB , SHED)",0.010684,0.8,47.292231
143,"(KEY FOB , SHED)","(KEY FOB , GARAGE DESIGN)",0.010684,0.631579,47.292231
235,(SET/6 RED SPOTTY PAPER PLATES),(SET/6 RED SPOTTY PAPER CUPS),0.01132,0.689922,46.230686
234,(SET/6 RED SPOTTY PAPER CUPS),(SET/6 RED SPOTTY PAPER PLATES),0.01132,0.758523,46.230686
140,"(KEY FOB , SHED)","(KEY FOB , BACK DOOR )",0.010981,0.649123,43.251016
141,"(KEY FOB , BACK DOOR )","(KEY FOB , SHED)",0.010981,0.731638,43.251016
