In [1]:
################################  Storage_Optimization.ipynb  ####################################
# Author: Sukhendu Sain
# Description: Analyse the Data
# Data: 23-Nov-2024
#################################################################################

In [2]:
# Import Necessary Libraries, Utils, and Config Files
import utils
from config import *
import pandas as pd
import numpy as np
import os, re
import matplotlib.pyplot as plt
# import importlib
# importlib.reload(utils)

# Data Import and Clean

In [31]:
#### Read FILE:: (AKINS FoMoCo_Piece_Sales_112222_YTD.xlsx) into Dataframe
df_Akins = utils.read_excel(AKINS_FOMO_FILE_PATH)
df_Akins['Original Part#'] = df_Akins['Part#']
df_Akins['Part#'] = df_Akins['Part#'].apply(lambda a: "".join(str(a).split('-')))
if print_df_after_import: utils.print_df(df_Akins, 200) # Print the Dataframe
# ~1-2secs

╒═════╤══════════════════╤══════════════════════╤═════════╤═════════════╤═════════╤════════════════════╕
│     │ Part#            │ Description          │    Cost │   Sold Pcs  │   Sold% │ Original Part#     │
╞═════╪══════════════════╪══════════════════════╪═════════╪═════════════╪═════════╪════════════════════╡
│   0 │ 5060             │ WASHER               │    0.1  │        1281 │    0.35 │ 5060               │
├─────┼──────────────────┼──────────────────────┼─────────┼─────────────┼─────────┼────────────────────┤
│   1 │ 888830010426     │ HOT SHOT CAP BOTTLE  │    7.8  │           3 │    0    │ 888830010426       │
├─────┼──────────────────┼──────────────────────┼─────────┼─────────────┼─────────┼────────────────────┤
│   2 │ 888830013748     │ 10/20 SLIDER LID     │    6    │           7 │    0    │ 888830013748       │
├─────┼──────────────────┼──────────────────────┼─────────┼─────────────┼─────────┼────────────────────┤
│   3 │ 888830014547     │ 30 MAG SLIDER LID    │    6 

In [None]:
#### Read FILE:: (GPARTS Part Measures.xlsx) into Dataframe
df_Gparts = utils.read_excel(GPARTS_FILE_PATH)
if print_df_after_import: utils.print_df(df_Gparts) # Print the Dataframe
# ~50-60secs

In [None]:
#### Read FILE:: (Wholesale JAN_Oct_Parts_Ranking_Counter_Invoices_All_Brands.xlsx) into Dataframe
df_Wholesale = utils.read_excel(WHOLESALE_FILE_PATH)

# Clean the Wholesale Dataframe
df_Wholesale = df_Wholesale.drop(columns=[col for col in df_Wholesale.columns if 'Unnamed' in col], inplace=False)
df_Wholesale = df_Wholesale[(df_Wholesale['Vendor'] == 'FOR') | (df_Wholesale['Vendor'] == 'CHR')].reset_index()

if print_df_after_import: utils.print_df(df_Wholesale) # Print the Dataframe
# ~15secs

In [None]:
#### Read FILE:: (Service JAN_Oct_Parts_Ranking_ROs_All_Brands.xlsx) into Dataframe
df_Service = utils.read_excel(SERVICE_FILE_PATH)

# Clean the Service Dataframe
df_Service = df_Service.drop(columns=[col for col in df_Service.columns if 'Unnamed' in col], inplace=False)
df_Service = df_Service[(df_Service['Vendor'] == 'FOR') | (df_Service['Vendor'] == 'CHR')].reset_index()

if print_df_after_import: utils.print_df(df_Service, 100) # Print the Dataframe
# ~5-6secs

In [9]:
#### Read FILE:: (Counter Pad) into Dataframe

# Data Analysis


### Akins File Analysis

In [None]:
## Sort 'Sold Pcs' Column Descending
df_Akins = df_Akins.sort_values('Sold Pcs ', ascending=False)
if print_df_data_analyse: utils.print_df(df_Akins, 100) # Print the Dataframe

In [None]:
## Sum up the 'Sold Pcs' for each unique 'Part Desc.'
part_type_sold_sum = df_Akins.groupby('Description')['Sold Pcs '].sum().reset_index()
part_type_sold_sum.columns = ['Part Desc.', 'Total Sold Pcs.']

# Sort the part_type_sold_sum by Total Sold Pcs. in descending order
part_type_sold_sum = part_type_sold_sum.sort_values('Total Sold Pcs.', ascending=False)

if print_df_data_analyse: utils.print_df(part_type_sold_sum, 5)

In [None]:
## Visualize above part_type_sold_sum using Bar, One Fig of Top 10 Most Sold Part Types and One for Top 50
top_10 = part_type_sold_sum.head(10)
top_50 = part_type_sold_sum.head(50)

# Create the bar chart for Top 10
plt.figure(figsize=(8, 6))  # Set the figure size
plt.bar(top_10['Part Desc.'], top_10['Total Sold Pcs.'], width=0.5)

# Customize the chart
plt.title('Top 10 Part Types by Total Sold Pieces (Akins)', fontsize=20)
plt.xlabel('Part Description', fontsize=16)
plt.ylabel('Total Sold Pieces', fontsize=16)
plt.xticks(rotation=90)

# Add value labels on top of each bar
for i, v in enumerate(top_10['Total Sold Pcs.']):
    plt.text(i, v, str(v), ha='center', va='bottom')

plt.tight_layout()
plt.show()


# Create the bar chart for Top 50
plt.figure(figsize=(9, 6))  # Set the figure size
plt.bar(top_50['Part Desc.'], top_50['Total Sold Pcs.'], width=0.5)

# Customize the chart
plt.title('Top 50 Part Types by Total Sold Pieces (Akins)', fontsize=20)
plt.xlabel('Part Description', fontsize=16)
plt.ylabel('Total Sold Pieces', fontsize=16)
plt.xticks(rotation=90)
plt.grid(axis='x', linestyle='--', alpha=1)
plt.grid(axis='y', linestyle='--', alpha=1)

plt.tight_layout()
plt.show()

### GParts File Analysis

In [None]:
## Analyze 'Active' Column of GParts
total_rows = df_Gparts.shape[0]
active = df_Gparts[df_Gparts["Is Active?"] == 'Yes'].shape[0]
active_percent = ( df_Gparts[df_Gparts["Is Active?"] == 'Yes'].shape[0]/total_rows ) * 100

# Print the Counts/Percentage
print(f"Active Parts: {active}; Active Percentage: {active_percent}%")
print(f"Not Active Parts: {total_rows - active}; Not Active Percentage: {100 - active_percent}%")

# Visualize
labels = ['Active', 'Not Active'] # Create labels and sizes for the pie chart
sizes = [active_percent, 100 - active_percent]

plt.figure(figsize=(10, 8)) # Create the pie chart
plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, textprops={'fontsize': 23})

plt.show() # Show the chart

In [None]:
## Find Rows with 0 in either Dimensions
# Depth (Column Here) = Length (In Docs)

# Count no. of Rows with 0 in either Dimensions; no_all share count with other 3
no_depth = df_Gparts[df_Gparts["Prod Att - Length"] == 0].shape[0]
no_width = df_Gparts[df_Gparts["Prod Att- Width"] == 0].shape[0]
no_height = df_Gparts[df_Gparts["Prod Att - Height"] == 0].shape[0]
no_all = df_Gparts[(df_Gparts["Prod Att - Height"] == 0) & (df_Gparts["Prod Att- Width"] == 0) & (df_Gparts["Prod Att - Length"] == 0)].shape[0]

# Calculate percentages
total_rows = df_Gparts.shape[0]
percent_no_depth = (no_depth / total_rows) * 100
percent_no_width = (no_width / total_rows) * 100
percent_no_height = (no_height / total_rows) * 100
percent_no_all = (no_all / total_rows) * 100

# Count 'Active' Parts having No Dimensions and Dimensions
No_Dim_Active = df_Gparts[(df_Gparts["Prod Att - Height"] == 0) & (df_Gparts["Is Active?"] == 'Yes')].shape[0]
No_Dim_Not_Active = df_Gparts[(df_Gparts["Prod Att - Height"] == 0) & (df_Gparts["Is Active?"] == 'No')].shape[0]
No_Dim_Active_Percent = (No_Dim_Active/no_all) * 100
No_Dim_Not_Active_Percent = (No_Dim_Not_Active/no_all) * 100
With_Dim_Active = df_Gparts[(df_Gparts["Prod Att - Height"] != 0) & (df_Gparts["Is Active?"] == 'Yes')].shape[0]
With_Dim_Not_Active = df_Gparts[(df_Gparts["Prod Att - Height"] != 0) & (df_Gparts["Is Active?"] == 'No')].shape[0]
With_Dim_Active_Percent = (With_Dim_Active/(total_rows - no_all)) * 100
With_Dim_Not_Active_Percent = (With_Dim_Not_Active/(total_rows - no_all)) * 100

# Print the Counts/Percentages
print(f"No Length/Depth: {round(no_depth/total_rows*100, 2)}%; No Width: {round(no_width/total_rows*100, 2)}%; No Height: {round(no_height/total_rows*100, 2)}%; No Dimensions: {round(percent_no_all, 2)}%;")
print(f"Active Parts of No Dimensions: {No_Dim_Active}; Percentage with respect to Parts without Dims: {round(No_Dim_Active_Percent, 2)}%")
print(f"Not Active Parts of No Dimensions: {No_Dim_Not_Active}; Percentage with respect to Parts without Dims: {round(No_Dim_Not_Active_Percent, 2)}%")
print(f"Active Parts with Dimensions: {With_Dim_Active}; Percentage with respect to Parts with Dims: {round(With_Dim_Active_Percent, 2)}%")
print(f"Not Active Parts with Dimensions: {With_Dim_Not_Active}; Percentage with respect to Parts with Dims: {round(With_Dim_Not_Active_Percent, 2)}%")

# Here we find out that a row, if containing 0 in 1 dimension, has 0 in all, or
# A row has either all or none dimensions
# 16% of Rows has 0 Dimensions

In [None]:
# Visualize
labels = ['Active', 'Not Active'] # Create labels and sizes for the pie chart
sizes = [No_Dim_Active_Percent, No_Dim_Not_Active_Percent]

plt.figure(figsize=(10, 8)) # Create the pie chart
plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, textprops={'fontsize': 20})

plt.show() # Show the chart

In [None]:
## Visualize above No Dimensions Data

# Create the bar graph
plt.figure(figsize=(10, 6)) 
bars = ['Total Rows', 'No Length/Depth', 'No Width', 'No Height']
heights = [100, percent_no_depth, percent_no_width, percent_no_height]

plt.bar(bars, heights)
plt.title(f'Distribution of Rows with No Dimensions', fontsize=16)
plt.ylabel('Percentage of Rows')
plt.xticks(rotation=45, fontsize=16)

# Add labels to each bar
for i, v in enumerate(heights):
    plt.text(i, v, str(round(v, 2)) + '%', ha='center', va='bottom')

# Show the legend and display the chart
plt.tight_layout()
plt.show()

In [None]:
## Visualize everything Analyzed above in a single Stacked Bar Chart

categories = ['Total Rows', 'Dimensional Rows', 'No Dimensional Rows']
values1 = [active, With_Dim_Active, No_Dim_Active]
values2 = [total_rows - active, With_Dim_Not_Active, No_Dim_Not_Active]

# Create figure and axis objects
fig, ax = plt.subplots(figsize=(8, 6))

# Plot the stacked bar chart
ax.bar(categories, values1, label='Active', width = 0.3)
ax.bar(categories, values2, bottom=values1, label='Not Active', width = 0.3)

for i, v in enumerate([total_rows, With_Dim_Active+With_Dim_Not_Active, No_Dim_Active+No_Dim_Not_Active]):
    plt.text(i, v, f'Total: {str(v)}', ha='center', va='bottom')

# Customize the plot
ax.set_ylabel('Values')
ax.set_title('Bar Chart of Active and Non-Active Parts (GParts)')
ax.legend()

ax.set_yticks(np.linspace(0, 231045, 20))  # Set more ticks on y-axis

plt.grid(axis='x', linestyle='--', alpha=1)
plt.grid(axis='y', linestyle='--', alpha=1)

# Display the chart
plt.tight_layout()
plt.show()

### Wholesale Files Analysis

In [None]:
## Sort 'Sold' Column Descending
df_Wholesale = df_Wholesale.sort_values('Sold', ascending=False)
if print_df_data_analyse: utils.print_df(df_Wholesale, 100) # Print the Dataframe

In [None]:
## Count the number of negative sold pcs.
neg_sold_count = df_Wholesale[df_Wholesale['Sold'] < 0].shape[0]
print(f"Number of Negative Sold Values: {neg_sold_count}")

In [None]:
## Sum up the 'Gross Profit' for each unique 'Part Desc.'
part_type_profit_sum = df_Wholesale.groupby('Description')['Gross Profit'].sum().reset_index()
part_type_profit_sum.columns = ['Part Desc.', 'Total Gross Profit']
part_type_profit_sum['Total Gross Profit'] = pd.to_numeric(part_type_profit_sum['Total Gross Profit'], errors='coerce').round()

# Sort the part_type_sold_sum by Total Sold Pcs. in descending order
part_type_profit_sum = part_type_profit_sum.sort_values('Total Gross Profit', ascending=False)
part_type_profit_sum['Total Gross Profit'] = part_type_profit_sum['Total Gross Profit'].round(-1)

if print_df_data_analyse: utils.print_df(part_type_profit_sum, 5)

# Visualize using Bar of Top 10
top_10 = part_type_profit_sum.head(10)

# Create the bar chart for Top 10
plt.figure(figsize=(8, 6))  # Set the figure size
plt.bar(top_10['Part Desc.'], top_10['Total Gross Profit'], width=0.5)

# Customize the chart
plt.title('Top 10 Part Types by Total Gross Profit (Wholesale)', fontsize=20)
plt.xlabel('Part Description', fontsize=16)
plt.ylabel('Total Gross Profit', fontsize=16)
plt.xticks(rotation=90)

# Add value labels on top of each bar
for i, v in enumerate(top_10['Total Gross Profit']):
    plt.text(i, v, str(v), ha='center', va='bottom')

plt.tight_layout()
plt.show()

In [None]:
## Sum up the 'Sold' for each unique 'Part Desc.'
part_type_sold_sum = df_Wholesale.groupby('Description')['Sold'].sum().reset_index()
part_type_sold_sum.columns = ['Part Desc.', 'Total Sold Pcs.']

# Sort the part_type_sold_sum by Total Sold Pcs. in descending order
part_type_sold_sum = part_type_sold_sum.sort_values('Total Sold Pcs.', ascending=False)

if print_df_data_analyse: utils.print_df(part_type_sold_sum, 5)

In [None]:
## Visualize above part_type_sold_sum using Bar, One Fig of Top 10 Most Sold Part Types and One for Top 50
top_10 = part_type_sold_sum.head(10)
top_50 = part_type_sold_sum.head(50)

# Create the bar chart for Top 10
plt.figure(figsize=(8, 6))  # Set the figure size
plt.bar(top_10['Part Desc.'], top_10['Total Sold Pcs.'], width=0.5)

# Customize the chart
plt.title('Top 10 Part Types by Total Sold Pieces (Wholesale)', fontsize=20)
plt.xlabel('Part Description', fontsize=16)
plt.ylabel('Total Sold Pieces', fontsize=16)
plt.xticks(rotation=90)

# Add value labels on top of each bar
for i, v in enumerate(top_10['Total Sold Pcs.']):
    plt.text(i, v, str(v), ha='center', va='bottom')

plt.tight_layout()
plt.show()


# Create the bar chart for Top 50
plt.figure(figsize=(9, 6))  # Set the figure size
plt.bar(top_50['Part Desc.'], top_50['Total Sold Pcs.'], width=0.5)

# Customize the chart
plt.title('Top 50 Part Types by Total Sold Pieces (Wholesale)', fontsize=20)
plt.xlabel('Part Description', fontsize=16)
plt.ylabel('Total Sold Pieces', fontsize=16)
plt.xticks(rotation=90)
plt.grid(axis='x', linestyle='--', alpha=1)
plt.grid(axis='y', linestyle='--', alpha=1)

plt.tight_layout()
plt.show()

### Service File Analysis

In [None]:
## Count the number of negative sold pcs.
neg_sold_count = df_Service[df_Service['Qty Sold'] < 0].shape[0]
print(f"Number of Negative Sold Values: {neg_sold_count}")

In [None]:
## Sum up the 'Gross Profit' for each unique 'Part Desc.'
part_type_profit_sum = df_Service.groupby('Description')['Gross Profit'].sum().reset_index()
part_type_profit_sum.columns = ['Part Desc.', 'Total Gross Profit']
part_type_profit_sum['Total Gross Profit'] = pd.to_numeric(part_type_profit_sum['Total Gross Profit'], errors='coerce').round()

# Sort the part_type_sold_sum by Total Sold Pcs. in descending order
part_type_profit_sum = part_type_profit_sum.sort_values('Total Gross Profit', ascending=False)
part_type_profit_sum['Total Gross Profit'] = part_type_profit_sum['Total Gross Profit'].round(-1)

if print_df_data_analyse: utils.print_df(part_type_profit_sum, 5)

# Visualize using Bar of Top 10
top_10 = part_type_profit_sum.head(10)

# Create the bar chart for Top 10
plt.figure(figsize=(8, 6))  # Set the figure size
plt.bar(top_10['Part Desc.'], top_10['Total Gross Profit'], width=0.5)

# Customize the chart
plt.title('Top 10 Part Types by Total Gross Profit (Service)', fontsize=20)
plt.xlabel('Part Description', fontsize=16)
plt.ylabel('Total Gross Profit', fontsize=16)
plt.xticks(rotation=90)

# Add value labels on top of each bar
for i, v in enumerate(top_10['Total Gross Profit']):
    plt.text(i, v, str(v), ha='center', va='bottom')

plt.tight_layout()
plt.show()

In [None]:
## Sum up the 'Sold' for each unique 'Part Desc.'
part_type_sold_sum = df_Service.groupby('Description')['Qty Sold'].sum().reset_index()
part_type_sold_sum.columns = ['Part Desc.', 'Total Sold Pcs.']

# Sort the part_type_sold_sum by Total Sold Pcs. in descending order
part_type_sold_sum = part_type_sold_sum.sort_values('Total Sold Pcs.', ascending=False)

if print_df_data_analyse: utils.print_df(part_type_sold_sum, 5)

In [None]:
## Visualize above part_type_sold_sum using Bar, One Fig of Top 10 Most Sold Part Types and One for Top 50
top_10 = part_type_sold_sum.head(10)
top_50 = part_type_sold_sum.head(50)

# Create the bar chart for Top 10
plt.figure(figsize=(8, 6))  # Set the figure size
plt.bar(top_10['Part Desc.'], top_10['Total Sold Pcs.'], width=0.5)

# Customize the chart
plt.title('Top 10 Part Types by Total Sold Pieces (Service)', fontsize=20)
plt.xlabel('Part Description', fontsize=16)
plt.ylabel('Total Sold Pieces', fontsize=16)
plt.xticks(rotation=90)

# Add value labels on top of each bar
for i, v in enumerate(top_10['Total Sold Pcs.']):
    plt.text(i, v, str(v), ha='center', va='bottom')

plt.tight_layout()
plt.show()


# Create the bar chart for Top 50
plt.figure(figsize=(9, 6))  # Set the figure size
plt.bar(top_50['Part Desc.'], top_50['Total Sold Pcs.'], width=0.5)

# Customize the chart
plt.title('Top 50 Part Types by Total Sold Pieces (Service)', fontsize=20)
plt.xlabel('Part Description', fontsize=16)
plt.ylabel('Total Sold Pieces', fontsize=16)
plt.xticks(rotation=90)
plt.grid(axis='x', linestyle='--', alpha=1)
plt.grid(axis='y', linestyle='--', alpha=1)

plt.tight_layout()
plt.show()

### Find Matching Part Numbers between Gparts and each of other Files

In [32]:
## Find Number of Matching Part Numbers between GParts and Akins

dfs_to_match = [df_Akins, df_Gparts]

# Part# Column Name of DFs to Match
part_number_columns = ['Part#', 'Svc Part Number']

# Find common part numbers
common_part_numbers = set(dfs_to_match[0][part_number_columns[0]])
for i in range(1, len(dfs_to_match)):
    common_part_numbers &= set(dfs_to_match[i][part_number_columns[i]])

print(f"Num of Part numbers common to all DataFrames: {len(common_part_numbers)}")
print(f"Matching Part Numbers: {common_part_numbers}")
# 10283 Matching

Num of Part numbers common to all DataFrames: 10283
Any 10 Matching Part Numbers: {'BB5Z15790A', 'CK4Z17F774FC', 'BC3Z16C826B', '9OO439387', 'CP9Z8K556B', 'CK4Z9B593B', 'FL3Z9F472A', 'LC3Z7G007A', 'FL3Z17C754AA', 'BL3Z1561203AA', '6C2Z8D060A', '7T4Z17K835CCP', '6R3Z19N236A', 'W704277S438', '3U2Z14S411YNA', 'HC3Z4616A', 'FB5Z7841018AA', '6C2Z8146AA', 'FL3Z17D742AA', 'DP5Z16103A', 'LX6Z1125M', 'W715877S300', 'FL3Z1501670D', 'L1MZ3B478A', '5C3Z9VE527BRM', 'LL3Z1130B', '7T4Z6K254BA', '8S4Z15K601A', 'BB5Z17757A', '2L3Z6C324AA', 'F81Z6N653BA', 'CV6Z5422404DA', 'W717186S900', 'BL3Z9439A', 'HL3Z7B399C', 'JX6Z6675C', 'W709856S439', '6U5Z17D696D', 'FT4Z5842528A', 'ML3Z9928261C', '3C3Z7A248AA', 'HL3Z7A095A', 'W505264S442', 'HK4Z6362900CF', 'AG1Z5425766C', 'JC3Z7000HRM', 'LL3Z15264B28A', 'BE5Z3079A', 'ML3Z15K861AA', 'LK4Z19712A', 'W714639S439', 'BC3Z8200ECP', 'AE8Z7H103A', 'W716341S439', 'CK4Z61280K96AC', 'F5RZ6518B', '6L2Z78218A42B', 'FB5Z7820879AB', 'JL3Z17626C', 'AE5Z16103B', 'ES7Z19G490A', 'HC

In [33]:
## Find Number of Matching Part Numbers between GParts and Wholesale

dfs_to_match = [df_Wholesale, df_Gparts]

# Part# Column Name of DFs to Match
part_number_columns = ['Part Number', 'Svc Part Number']

# Find common part numbers
common_part_numbers = set(dfs_to_match[0][part_number_columns[0]])
for i in range(1, len(dfs_to_match)):
    common_part_numbers &= set(dfs_to_match[i][part_number_columns[i]])

print(f"Num of Part numbers common to all DataFrames: {len(common_part_numbers)}")
print(f"Matching Part Numbers: {common_part_numbers}")
# 20763 Matching

Num of Part numbers common to all DataFrames: 20763
Matching Part Numbers: {'CK4Z17F774FC', 'BC3Z16C826B', 'CP9Z8K556B', 'MA1Z17C882C', 'ML3Z5500A', 'FL3Z17C754AA', '6R3Z6343262B', '7T4Z17K835CCP', '1L2Z6020AA', 'HP5Z58278B50A', '5C3Z9VA543BRM', '3U2Z14S411YNA', 'FL3Z17D742AA', 'M2DZ7825325B', 'FL3Z1660044AC', 'ML3Z99292A22CA', '7L3Z6019A', 'W712825S307', '7L1Z1130D', 'F4TZ6L080A', 'FK4Z16112C30BA', 'M1PZ8327A', 'FT4Z5842528A', '2S7Z6379AA', 'F37Z17788A', 'HL3Z7A095A', 'LJ8Z13008AE', 'LL3Z15264B28A', 'L1MZ1125A', 'BC3Z8678B', 'ML3Z15K861AA', 'W714639S439', 'HC3Z3A674G', 'CK4Z61280K96AC', 'JL3Z17626C', 'HC3Z8B434B', '1C3Z9D477AA', '3C3Z5798AA', 'W716375S900', '9L3Z17906ACP', 'AL8Z2C215A', 'KT4Z58611B09AH', 'LC3Z7000DB', 'W304106', 'FR3Z5B759J', 'FR3Z1104G', 'FR3Z6327840A', 'HC3Z17D742BAPTM', 'W710604S439', 'KL3Z6320001AA', '2C2Z8678AA', 'EJ7Z7821596A', 'ML3Z17682SB', 'K2GZ6L092B', '9OO1356374081', 'LC3Z9030S', 'HC3Z4067D', 'FL3Z13008ACP', 'DU2Z14S411BA', 'HL3Z6750B', '7L3Z6A666A', 'GN1Z

In [34]:
## Find Number of Matching Part Numbers between GParts and Service

dfs_to_match = [df_Service, df_Gparts]

# Part# Column Name of DFs to Match
part_number_columns = ['* indicates a superseded part\nPart Number', 'Svc Part Number']

# Find common part numbers
common_part_numbers = set(dfs_to_match[0][part_number_columns[0]])
for i in range(1, len(dfs_to_match)):
    common_part_numbers &= set(dfs_to_match[i][part_number_columns[i]])

print(f"Num of Part numbers common to all DataFrames: {len(common_part_numbers)}")
print(f"Matching Part Numbers: {common_part_numbers}")
# 12926 Matching

Num of Part numbers common to all DataFrames: 12926
Matching Part Numbers: {'BB5Z15790A', '9OO439387', 'PC3Z2604338DA', 'CK4Z9B593B', 'FL3Z9F472A', 'LC3Z7G007A', 'FC4Z9A031B', 'JL1Z76237A02A', '6R3Z19N236A', '5C3Z9VA543BRM', 'PC3Z9920402A', 'D5AZ4216A', 'FB5Z7841018AA', 'FL1Z17E811BC', 'LB5Z78500A18A', 'LC3Z6571A', 'LX6Z1125M', 'BT4Z9C888CA', 'FT4Z6L266A', 'L1MZ3B478A', 'FL3Z1501670D', 'GU2Z14S411TB', 'LB5Z1104B', '5C3Z9VE527BRM', 'ML3Z99292A22CA', 'HC3Z16039BPTM', 'LB5Z6L092B', 'LV4Z1503050B', 'MB3Z4141E', '7T4Z6K254BA', 'MB3Z14A626B', '8S4Z15K601A', 'NL3Z9942528AA', 'W720710S439', 'W717870S300', 'JT4Z8527A', 'JX6Z10346N', '9OO72021317', 'CV6Z5422404DA', 'CV6Z18N344C', 'HL3Z18A927A', 'BL3Z9439A', 'HL3Z7B399C', 'JR3Z4209B', 'M1PZ8327A', 'JX6Z6675C', 'W718416S450B', 'LC3Z14C022A', '6U5Z17D696D', 'FT4Z5842528A', 'E7DZ7Z302AA', 'J1GZ7869B', 'HL3Z7A095A', 'W505264S442', 'GN1Z17528EA', 'KB3Z15A862BAPTM', 'LL3Z15264B28A', 'JC3Z7000HRM', 'K2GZ9J279B', 'JL1Z78255A34B', 'ML3Z15K861AA', 'LK4Z197

In [35]:
## Find Number of Matching Part Numbers between Akins and Wholesale

dfs_to_match = [df_Akins, df_Wholesale]

# Part# Column Name of DFs to Match
part_number_columns = ['Part#', 'Part Number']

# Find common part numbers
common_part_numbers = set(dfs_to_match[0][part_number_columns[0]])
for i in range(1, len(dfs_to_match)):
    common_part_numbers &= set(dfs_to_match[i][part_number_columns[i]])

print(f"Num of Part numbers common to all DataFrames: {len(common_part_numbers)}")
print(f"Matching Part Numbers: {common_part_numbers}")
# 6884 Matching

Num of Part numbers common to all DataFrames: 6884
Matching Part Numbers: {'BB5Z15790A', 'CK4Z17F774FC', 'BC3Z16C826B', 'CP9Z8K556B', 'CK4Z9B593B', 'FL3Z17C754AA', 'BL3Z1561203AA', '7T4Z17K835CCP', '3U2Z14S411YNA', 'HC3Z4616A', 'FL3Z17D742AA', 'DP5Z16103A', 'LX6Z1125M', '5C3Z9VE527BRM', '7T4Z6K254BA', '8S4Z15K601A', 'BB5Z17757A', '2L3Z6C324AA', 'F81Z6N653BA', 'CV6Z5422404DA', 'W717186S900', 'HL3Z7B399C', '6U5Z17D696D', 'FT4Z5842528A', '3C3Z7A248AA', 'HL3Z7A095A', 'HK4Z6362900CF', 'JC3Z7000HRM', 'LL3Z15264B28A', 'ML3Z15K861AA', 'W714639S439', 'BC3Z8200ECP', 'CK4Z61280K96AC', 'F5RZ6518B', '6L2Z78218A42B', 'FB5Z7820879AB', 'JL3Z17626C', 'AE5Z16103B', 'ES7Z19G490A', 'KS7Z15266CF', '1C3Z9D477AA', 'FT4Z5821453C', 'LB5Z9E731AB', 'BL3Z8K153AB', '9L3Z17906ACP', 'W716375S900', 'F81Z9G270BA', 'BM5Z17810ACP', '6G7Z63044A74A', 'AL8Z2C215A', 'BL3Z16006B', 'FR3Z1104G', 'HC3Z17D742BAPTM', 'DS7Z8527A', 'DS7Z8349B', '6C3Z6C646A', 'HC3Z4067D', 'FL3Z13008ACP', '7L3Z6A666A', 'W701259S300', '9L3Z17755A', 'G