In [None]:
import pandas as pd

# Load the dataset
data = pd.read_csv('Parts_Recommendation_CaseStudy.csv')

# Remove duplicates
data = data.drop_duplicates()

# Remove irrelevant columns if any
data = data[['SERVICE', 'PART_NAME','INVOICE_ID','INVOICE_DATE','PART_NUMBER','SALES_MODEL']]

# Optional: Convert to lowercase for case-insensitive matching
data['SERVICE'] = data['SERVICE'].str.lower()
data['PART_NAME'] = data['PART_NAME'].str.lower()

# Optional: Remove leading/trailing whitespaces
data['SERVICE'] = data['SERVICE'].str.strip()
data['PART_NAME'] = data['PART_NAME'].str.strip()

# Optional: Handle missing values
data = data.dropna()

# Print the preprocessed data
print(data.head())



                     SERVICE                      PART_NAME  \
0  drivetrain-brakes-rebuild                 screen filters   
1  drivetrain-brakes-rebuild                        gaskets   
2  drivetrain-brakes-rebuild    seals, gaskets, and o-rings   
3    drivetrain-axle-rebuild               bolts and screws   
5  drivetrain-brakes-rebuild  kit - drivetrain and steering   

                   INVOICE_ID INVOICE_DATE PART_NUMBER SALES_MODEL  
0  U370/9670114009-3350006692   2020-06-17      5O7126        289Y  
1        M060/M060-3073812939   2020-04-30      3N6968        289Y  
2  U370/9670558168-3300504198   2021-07-20      1U0931        289Y  
3            W700/CTS33550363   2020-08-14      0L0181        488V  
5           E050/003503232507   2019-01-25     3090952        289Y  


In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 93603 entries, 0 to 94626
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   SERVICE       93603 non-null  object
 1   PART_NAME     93603 non-null  object
 2   INVOICE_ID    93603 non-null  object
 3   INVOICE_DATE  93603 non-null  object
 4   PART_NUMBER   93603 non-null  object
 5   SALES_MODEL   93603 non-null  object
dtypes: object(6)
memory usage: 5.0+ MB


In [None]:
# Create a user-item matrix
user_item_matrix = pd.crosstab(data['SERVICE'], data['PART_NAME'])

# Optional: Fill missing values with 0
user_item_matrix = user_item_matrix.fillna(0)

# Print the user-item matrix
print(user_item_matrix.head(10))

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

# Calculate the cosine similarity matrix
similarity_matrix = cosine_similarity(user_item_matrix)

# Convert the similarity matrix to a DataFrame
similarity_df = pd.DataFrame(similarity_matrix, index=user_item_matrix.index, columns=user_item_matrix.index)

# Print the similarity matrix
print(similarity_df.head(1))


In [None]:
similarity_df.info()

In [None]:
# Set the neighborhood size
neighborhood_size = 5

# Select the top N most similar services as the neighborhood
neighborhood = {}
for service in similarity_df.index:
    similar_services = similarity_df.loc[service].nlargest(neighborhood_size+1).index.tolist()[1:]
    neighborhood[service] = similar_services

# Print the neighborhood for each service
for service, similar_services in neighborhood.items():
    print(f"Service: {service}, Neighborhood: {similar_services}")


Service: drivetrain-axle-rebuild, Neighborhood: ['drivetrain-brakes-repair', 'engine-turbo group-rebuild', 'engine-injector group-rebuild', 'drivetrain-brakes-rebuild', 'engine-engine cooling system-replacement']
Service: drivetrain-brakes-rebuild, Neighborhood: ['drivetrain-brakes-repair', 'engine-turbo group-rebuild', 'engine-injector group-rebuild', 'engine-engine cooling system-replacement', 'drivetrain-axle-rebuild']
Service: drivetrain-brakes-repair, Neighborhood: ['drivetrain-brakes-rebuild', 'engine-injector group-rebuild', 'engine-turbo group-rebuild', 'drivetrain-axle-rebuild', 'engine-engine cooling system-replacement']
Service: drivetrain-differential-repair, Neighborhood: ['engine-engine cooling system-replacement', 'drivetrain-brakes-rebuild', 'drivetrain-axle-rebuild', 'drivetrain-brakes-repair', 'engine-turbo group-rebuild']
Service: engine-engine cooling system-replacement, Neighborhood: ['engine-turbo group-rebuild', 'drivetrain-brakes-rebuild', 'drivetrain-brakes-rep

In [None]:
# Given set of parts
given_parts = ['accumulators and accumulator components', 'accumulators, receivers, and dryers', 'air cooler  alarm', 'bars plates and strips']

# Recommendation generation
recommendations = {}

for service in user_item_matrix.index:
    similar_services = neighborhood[service]  # Get the similar services from the neighborhood

    # Find parts used by similar services but not used by the given service
    parts_used_by_similar_services = user_item_matrix.loc[similar_services].sum()
    parts_not_used_by_service = parts_used_by_similar_services[~user_item_matrix.loc[service].astype(bool)]

    # Filter the parts based on the given set
    recommended_parts = parts_not_used_by_service[parts_not_used_by_service.index.isin(given_parts)]

    # Rank the recommended parts based on frequency of usage
    recommended_parts = recommended_parts.sort_values(ascending=False)

    # Store the recommendations for the service
    recommendations[service] = recommended_parts.index.tolist()

# Print the recommendations for each service
for service, recommended_parts in recommendations.items():
    print(f"Service: {service}, Recommended Parts: {recommended_parts}")


Service: drivetrain-axle-rebuild, Recommended Parts: ['accumulators and accumulator components', 'accumulators, receivers, and dryers', 'bars plates and strips']
Service: drivetrain-brakes-rebuild, Recommended Parts: []
Service: drivetrain-brakes-repair, Recommended Parts: ['bars plates and strips']
Service: drivetrain-differential-repair, Recommended Parts: ['accumulators and accumulator components', 'accumulators, receivers, and dryers', 'bars plates and strips']
Service: engine-engine cooling system-replacement, Recommended Parts: ['accumulators and accumulator components', 'accumulators, receivers, and dryers', 'bars plates and strips']
Service: engine-injector group-rebuild, Recommended Parts: ['accumulators and accumulator components', 'accumulators, receivers, and dryers', 'bars plates and strips']
Service: engine-turbo group-rebuild, Recommended Parts: ['accumulators and accumulator components', 'accumulators, receivers, and dryers', 'bars plates and strips']
Service: engine-wa

In [None]:
# Set the input parts
input_parts = ['accumulators and accumulator components', 'kit - maintenance', 'air cooler', 'bars plates and strips']

# Recommendation generation for the fifth part
recommendations = {}

for service in user_item_matrix.index:
    similar_services = neighborhood[service]  # Get the similar services from the neighborhood

    # Find parts used by similar services but not in the input parts list
    parts_used_by_similar_services = user_item_matrix.loc[similar_services].sum()
    parts_not_in_input = parts_used_by_similar_services[~parts_used_by_similar_services.index.isin(input_parts)]

    # Rank the recommended parts based on frequency of usage
    recommended_parts = parts_not_in_input.sort_values(ascending=False)

    # Store the recommendations for the service
    recommendations[service] = recommended_parts.index.tolist()

# Print the recommendations for the fifth part
fifth_part_recommendations = set()
for service, recommended_parts in recommendations.items():
    if recommended_parts:
        fifth_part_recommendations.add(recommended_parts[0])  # Consider only the top recommended part

print("Recommendations for the fifth part:")
print(fifth_part_recommendations)


Recommendations for the fifth part:
{'seals, gaskets, and o-rings'}
