In [None]:
import re
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the Products Table
products_table = pd.read_csv('Products Table.csv')


# Clean Product Names
def clean_product_name(name):
    Remove any non-alphanumeric characters and convert to lowercase
   cleaned_name = re.sub(r'[^a-zA-Z0-9\s]', '', name).lower()
   return cleaned_name


# Apply the cleaning function to the 'Product Name' column
products_table['Product Name'] = products_table['Product Name'].apply(clean_product_name)


# Extract the first word of the Product Name for initial grouping
products_table['First_Word'] = products_table['Product Name'].apply(lambda x: x.split()[0].lower())


# Initialize a dictionary to store supplier assignments for unique (first word, category) pairs
supplier_dict = {}
supplier_id = 1


#Iterate over unique (first word, category) pairs to assign supplier IDs
for _, row in products_table.iterrows():
   key = (row['First_Word'], row['Category'])
   if key not in supplier_dict:
        Assign a new supplier ID if the (first word, category) pair hasn't been encountered
       supplier_dict[key] = supplier_id
       supplier_id += 1


# Map supplier IDs back to the products dataframe
products_table['Supplier_ID'] = products_table.apply(lambda row: supplier_dict[(row['First_Word'], row['Category'])], axis=1)


# Ensure Supplier IDs 1-49 remain fixed and reassign the remaining supplier IDs up to 150 based on category distribution
# Define constants for easy adjustment
TOTAL_SUPPLIERS = 150
FIXED_SUPPLIERS = 49


# Filter products with Supplier_IDs from 1-49
fixed_suppliers_table = products_table[products_table['Supplier_ID'] <= FIXED_SUPPLIERS]
remaining_products_table = products_table[products_table['Supplier_ID'] > FIXED_SUPPLIERS]


# Calculate how many new suppliers we need to distribute based on the remaining categories
remaining_suppliers_needed = TOTAL_SUPPLIERS - FIXED_SUPPLIERS
categories = remaining_products_table['Category'].unique()
category_distribution = {category: remaining_suppliers_needed // len(categories) for category in categories}


# Adjust remainder to hit exactly 150 suppliers by incrementing some categories
for idx, category in enumerate(category_distribution.keys()):
   if idx < remaining_suppliers_needed % len(categories):
       category_distribution[category] += 1


# Assign new supplier IDs to remaining products based on category distribution
new_supplier_id = FIXED_SUPPLIERS + 1
supplier_category_map = {}


for category, count in category_distribution.items():
   supplier_ids = list(range(new_supplier_id, new_supplier_id + count))
   new_supplier_id += count
   supplier_category_map[category] = supplier_ids


# Function to assign a supplier ID, preserving fixed suppliers and distributing others by category
def assign_supplier(row):
   if row['Supplier_ID'] <= FIXED_SUPPLIERS:
       return row['Supplier_ID']
   else:
       category_suppliers = supplier_category_map[row['Category']]
       return random.choice(category_suppliers)


# Apply the assignment function to the remaining products and concatenate
remaining_products_table['Supplier_ID'] = remaining_products_table.apply(assign_supplier, axis=1)
final_products_table = pd.concat([fixed_suppliers_table, remaining_products_table]).sort_index()
final_products_table = final_products_table.drop(columns=['First_Word','Unnamed: 0'])


final_products_table.to_csv('Products Table.csv', index=False)












