In [1]:
from StoredQueries import POS_transactions

In [2]:
POS_df = POS_transactions()

In [3]:
# Create list of all transactions grouped by each transaction
POS_df.head()

In [4]:
# Grouping the data by 'TransactionID' and aggregating the 'PartName' into a list
grouped_df = POS_df.groupby('TransactionID')['PartName'].apply(list).reset_index()

# Renaming the columns
grouped_df.columns = ['TransactionID', 'PartNames']



In [5]:
grouped_df.head(20)

In [6]:
# Sorting each list of part names and converting to a tuple
grouped_df['SortedPartNames'] = grouped_df['PartNames'].apply(lambda x: tuple(sorted(x)))

# Group by the sorted part names and count the occurrences
count_df = grouped_df.groupby('SortedPartNames').size().reset_index(name='Count')

# Renaming the columns for clarity
count_df.columns = ['TransactionLists', 'Count']

In [7]:
filtered_count_df = count_df[count_df['TransactionLists'].apply(lambda x: len(x) >= 2)]

In [8]:
filtered_count_df.nlargest(500, 'Count').to_csv('POS_Transactions_Count.csv', index=False)

This section performs the analysis of how often each item appears in a transaction with each other item.

In [None]:
from itertools import combinations
import pandas as pd
transactions = grouped_df["PartNames"]
unique_parts = set(part for sublist in transactions for part in sublist)

combs = list(combinations(unique_parts, 2))

comb_counts = {comb: 0 for comb in combs}

for transaction in transactions:
    for comb in combinations(transaction, 2):
        if comb in comb_counts:
            comb_counts[comb] += 1
        elif (comb[1], comb[0]) in comb_counts:
            comb_counts[(comb[1], comb[0])] += 1
            
comb_df = pd.DataFrame(list(comb_counts.items()), columns=['Combination', 'Count'])


In [None]:
comb_df['Count'] = comb_df['Count'].astype(int) # make sure these are integers, so they can be properly sorted
comb_df = comb_df.sort_values(by="Count", ascending=False).reset_index(drop=True)
comb_df.to_csv('POS_Transactions_Combinations.csv', index=False)

Create the second data frame from the combination dataframe. Step 1: Search through all transactions and create a items sold dataframe of the form (PartName, Qty, TotalPrice). Step 2: Loop through the combo data frame and create copies of each combination as A/B and one as B/A. Step 3: Match counts of the items sold dataframe to the combo dataframe to produce a ratio for each A/B and B/A record.

In [None]:
# Step 1: Search through all transactions and create a items sold dataframe of the form (PartName, Qty, TotalPrice).
part_count_df = POS_transactions()
part_count_df.head()

In [None]:
# Step 1: Initial setup
part_count_df = part_count_df[["PartName", "Qty", "UnitPrice"]]
part_count_df['TotalPrice'] = (part_count_df['Qty'] * part_count_df['UnitPrice'])
part_count_df['TotalPrice'] = part_count_df['TotalPrice'].astype(float).round(2)

# Step 2: Count the number of times each PartName appears
part_count_series = part_count_df['PartName'].value_counts().reset_index()

# Rename the columns to make it clear
part_count_series.columns = ['PartName', 'PartCount']

# Step 3: Sum Part Quantity by PartName
total_qty_series = part_count_df.groupby('PartName')['Qty'].sum().reset_index()
total_qty_series.columns = ['PartName', 'TotalPartQuantity']

# Step 4: Merge the TotalPartQuantity and PartCount back with the original part_count_df
part_count_df = pd.merge(part_count_df, part_count_series, on='PartName', how='left')
part_count_df = pd.merge(part_count_df, total_qty_series, on='PartName', how='left')

In [None]:
part_count_df.head(10)

In [None]:
grouped_df = part_count_df.groupby('PartName').agg({'TotalPrice': 'sum'}).reset_index()

# Add the PartCount feature that counts the number of times each item appears in a transaction
grouped_df = pd.merge(grouped_df, part_count_series, on='PartName', how='left')

# Add the TotalPartQuantity feature from part_count_df, which measures the total number of each item sold across all transactions.
grouped_df = pd.merge(grouped_df, total_qty_series, on='PartName', how='left')

In [None]:
grouped_df.head()

In [None]:
# Step 2: Loop through the combo data frame and create copies of each combination as A/B and one as B/A.
def create_combination_pairs(row):
    part1, part2 = row['Combination']
    return pd.DataFrame({
        'Part1': [part1, part2],
        'Part2': [part2, part1],
        'Count': [row['Count'], row['Count']]
    })

# Apply the function to each row and concatenate the results
expanded_df = pd.concat(comb_df.apply(create_combination_pairs, axis=1).tolist()).reset_index(drop=True)


In [None]:
expanded_df.head()

In [None]:
# Step 3: Match counts of the items sold dataframe to the combo dataframe to produce a ratio for each A/B and B/A record.
# First merge on Part1
merged_df_part1 = pd.merge(expanded_df, grouped_df, left_on='Part1', right_on='PartName', how='left')
merged_df_part1 = merged_df_part1.rename(columns={'Qty': 'Qty_Part1', 'TotalPrice': 'TotalPrice_Part1'})

# Second merge on Part2
final_merged_df = pd.merge(merged_df_part1, grouped_df, left_on='Part2', right_on='PartName', how='left')
final_merged_df = final_merged_df.rename(columns={'Qty': 'Qty_Part2', 'TotalPrice': 'TotalPrice_Part2'})

# Remove extra columns
final_merged_df = final_merged_df.drop(columns=['PartName_x', 'PartName_y'], axis='columns')

# Include part ratio
final_merged_df['PartSaleRatio'] = (final_merged_df['Count'] / final_merged_df['PartCount_x']).round(2)

# Include Average part price
final_merged_df['TotalPrice_Part1'] = pd.to_numeric(final_merged_df['TotalPrice_Part1'], errors='coerce')
final_merged_df['PartCount_x'] = pd.to_numeric(final_merged_df['PartCount_x'], errors='coerce')
final_merged_df['Part1_Average_Price'] = (final_merged_df['TotalPrice_Part1'] / final_merged_df['PartCount_x']).round(2)

# Display Head
final_merged_df.head(25)


In [None]:
final_merged_df.to_csv('POS_Transactions_Analysis.csv', index=False)

Step 1: Loop through the POS_transactions() query dataframe and create a dataframe of the form (Part1, Part2, Part1_Qty, Part2_Qty, PartRatio, Part1AvgPrice, Part2AvgPrice, AddedValue) Where Part1_Qty and Part2_Qty are the sum amount of each that appear in transactions together with the other, PartRatio = Part2 / Part1 when sold in the same transaction on average, Part1AvgPrice and Part2AvgPrice are the averages of each part in total sales, and AddedValue = PartRatio * Part2AvgPrice

Step 2: Send to CSV

Step 3: Create Complete Added Value CSV, which includes each Part and its base value + the AddedValue from ALL of its other combinations.

In [2]:
from StoredQueries import POS_transactions

pos_df = POS_transactions()
pos_df.head()

In [3]:


# Step 1: Create the dataframe with combinations and calculations using PartName
def create_part_combinations_df(pos_df):
    part_combinations = []

    # Loop through each transaction
    for _, transaction in pos_df.groupby('TransactionID'):
        # Get all possible 2-part combinations
        for (part1, part2) in combinations(transaction['PartName'], 2):
            part1_data = transaction[transaction['PartName'] == part1]
            part2_data = transaction[transaction['PartName'] == part2]

            part1_qty = part1_data['Qty'].sum()
            part2_qty = part2_data['Qty'].sum()
            part_ratio = part2_qty / part1_qty if part1_qty != 0 else 0
            part1_avg_price = part1_data['UnitPrice'].mean()
            part2_avg_price = part2_data['UnitPrice'].mean()
            added_value = part_ratio * part2_avg_price

            part_combinations.append({
                'Part1': part1,
                'Part2': part2,
                'Part1_Qty': part1_qty,
                'Part2_Qty': part2_qty,
                'PartRatio': part_ratio,
                'Part1AvgPrice': part1_avg_price,
                'Part2AvgPrice': part2_avg_price,
                'AddedValue': added_value
            })

    part_combinations_df = pd.DataFrame(part_combinations)
    return part_combinations_df

# Create the combinations dataframe using PartName
part_combinations_df = create_part_combinations_df(pos_df)

# Step 2: Save to CSV
part_combinations_df.to_csv('part_combinations.csv', index=False)

# Step 3: Create Complete Added Value CSV using PartName
def create_complete_added_value_df(part_combinations_df):
    complete_added_value = []

    # Loop through each unique Part1 in the combinations dataframe
    for part in part_combinations_df['Part1'].unique():
        part_base_value = part_combinations_df[part_combinations_df['Part1'] == part]['Part1AvgPrice'].mean()
        added_value_sum = part_combinations_df[part_combinations_df['Part1'] == part]['AddedValue'].sum()
        total_value = part_base_value + added_value_sum

        complete_added_value.append({
            'Part': part,
            'BaseValue': part_base_value,
            'TotalAddedValue': added_value_sum,
            'CompleteValue': total_value
        })

    complete_added_value_df = pd.DataFrame(complete_added_value)
    return complete_added_value_df

# Create the complete added value dataframe using PartName
complete_added_value_df = create_complete_added_value_df(part_combinations_df)

# Save the complete added value dataframe to CSV
complete_added_value_df.to_csv('complete_added_value.csv', index=False)


In [1]:

import pandas as pd
from itertools import combinations
from StoredQueries import POS_transactions

pos_df = POS_transactions()
pos_df.head()

# Step 1: Create the dataframe with combinations and calculations using PartName
def create_part_combinations_df(pos_df):
    part_combination_sums = {}

    # Loop through each transaction
    for _, transaction in pos_df.groupby('TransactionID'):
        # Get all possible 2-part combinations
        for (part1, part2) in combinations(transaction['PartName'], 2):
            # Sort the parts to ensure consistent ordering (e.g., A-B and B-A are treated the same)
            sorted_parts = tuple(sorted([part1, part2]))
            
            # Sum the quantities of each part in the current transaction
            part1_qty = transaction[transaction['PartName'] == sorted_parts[0]]['Qty'].sum()
            part2_qty = transaction[transaction['PartName'] == sorted_parts[1]]['Qty'].sum()

            # Accumulate the quantities in the dictionary
            if sorted_parts in part_combination_sums:
                part_combination_sums[sorted_parts]['Part1_Qty'] += part1_qty
                part_combination_sums[sorted_parts]['Part2_Qty'] += part2_qty
            else:
                part_combination_sums[sorted_parts] = {
                    'Part1_Qty': part1_qty,
                    'Part2_Qty': part2_qty
                }

    part_combinations = []
    # Loop through the accumulated quantities and perform calculations
    for (part1, part2), qtys in part_combination_sums.items():
        part1_data = pos_df[pos_df['PartName'] == part1]
        part2_data = pos_df[pos_df['PartName'] == part2]

        part_ratio = qtys['Part2_Qty'] / qtys['Part1_Qty'] if qtys['Part1_Qty'] != 0 else 0
        part1_avg_price = part1_data['UnitPrice'].mean()
        part2_avg_price = part2_data['UnitPrice'].mean()
        added_value = part_ratio * part2_avg_price

        part_combinations.append({
            'Part1': part1,
            'Part2': part2,
            'Part1_Qty': qtys['Part1_Qty'],
            'Part2_Qty': qtys['Part2_Qty'],
            'PartRatio': part_ratio,
            'Part1AvgPrice': part1_avg_price,
            'Part2AvgPrice': part2_avg_price,
            'AddedValue': added_value
        })

    part_combinations_df = pd.DataFrame(part_combinations)
    return part_combinations_df

# Create the combinations dataframe using PartName
part_combinations_df = create_part_combinations_df(pos_df)

# Step 2: Save to CSV
part_combinations_df.to_csv('part_combinations.csv', index=False)
