In [1]:
from StoredQueries import POS_transactions

In [2]:
POS_df = POS_transactions()

In [3]:
# Create list of all transactions grouped by each transaction
POS_df.head()

Unnamed: 0,PartRno,PartId,PartName,LineItemID,TransactionID,Seq,PartRno.1,PartPriceRno,Qty,PartMeasureQty,UnitPrice,ExtPrice,CreDtTm
0,202,P70831298,TAIL GATE,3,2,1,202,202.0,1,1.0,29.99,29.99,2021-03-19 09:51:01.643
1,122,I70831252,SEAT BUCKET MANUAL,4,3,1,122,122.0,2,1.0,15.99,31.98,2021-03-19 10:02:09.660
2,75,E70831154,COIL,5,4,1,75,75.0,1,1.0,4.99,4.99,2021-03-19 10:07:59.860
3,125,I70831255,SEAT BELT (EACH),6,5,1,125,125.0,2,1.0,5.99,11.98,2021-03-19 11:04:12.900
4,10,A70831176,CAM SHAFT,7,6,1,10,10.0,1,1.0,15.99,15.99,2021-03-19 11:05:31.170


In [4]:
# Grouping the data by 'TransactionID' and aggregating the 'PartName' into a list
grouped_df = POS_df.groupby('TransactionID')['PartName'].apply(list).reset_index()

# Renaming the columns
grouped_df.columns = ['TransactionID', 'PartNames']



In [5]:
grouped_df.head(20)

Unnamed: 0,TransactionID,PartNames
0,2,[TAIL GATE]
1,3,[SEAT BUCKET MANUAL]
2,4,[COIL]
3,5,[SEAT BELT (EACH)]
4,6,[CAM SHAFT]
5,7,"[CONSOLE LID, CARPET]"
6,8,[RACK AND PINION MANUAL]
7,9,[COIL]
8,10,[WATER PUMP]
9,11,"[THROTTLE BODY, COIL, STARTER]"


In [6]:
# Sorting each list of part names and converting to a tuple
grouped_df['SortedPartNames'] = grouped_df['PartNames'].apply(lambda x: tuple(sorted(x)))

# Group by the sorted part names and count the occurrences
count_df = grouped_df.groupby('SortedPartNames').size().reset_index(name='Count')

# Renaming the columns for clarity
count_df.columns = ['TransactionLists', 'Count']

In [7]:
filtered_count_df = count_df[count_df['TransactionLists'].apply(lambda x: len(x) >= 2)]

In [8]:
filtered_count_df.nlargest(500, 'Count').to_csv('POS_Transactions_Count.csv', index=False)

This section performs the analysis of how often each item appears in a transaction with each other item.

In [9]:
from itertools import combinations
import pandas as pd
transactions = grouped_df["PartNames"]
unique_parts = set(part for sublist in transactions for part in sublist)

combs = list(combinations(unique_parts, 2))

comb_counts = {comb: 0 for comb in combs}

for transaction in transactions:
    for comb in combinations(transaction, 2):
        if comb in comb_counts:
            comb_counts[comb] += 1
        elif (comb[1], comb[0]) in comb_counts:
            comb_counts[(comb[1], comb[0])] += 1
            
comb_df = pd.DataFrame(list(comb_counts.items()), columns=['Combination', 'Count'])


In [10]:
comb_df['Count'] = comb_df['Count'].astype(int) # make sure these are integers so they can be properly sorted
comb_df.to_csv('POS_Transactions_Combinations.csv', index=False)