In [1]:
!pip install openai==0.28
!pip install python-dotenv

Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: openai
Successfully installed openai-0.28.0
Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.1


In [2]:
import openai
import os
import pandas as pd
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

#file name
csv_loc = "/content/transformcategory.csv"
if os.path.exists(csv_loc):
    print(f"File exists at: {csv_loc}")
else:
    print(f"File not found at: {csv_loc}")

from google.colab import userdata
api_key=userdata.get('OPENAI_API_KEY')

# Get API key from environment variable
#api_key = os.getenv('OPENAI_API_KEY')
if not api_key:
    raise ValueError("API key not found. Please set the OPENAI_API_KEY environment variable.")

openai.api_key = api_key




# Function to convert list values to dictionary keys
def list_to_dict(keys_list, default_value=None):
    return {key: default_value for key in keys_list}

def query_response(model_, prompt):
    completion = openai.ChatCompletion.create(
        model=model_,
        messages=[
            {
                "role": "user",
                "content": prompt,
            },
        ],
    )
    return completion.choices[0].message.content

def categorize_item(item_name, model_):
    """Categorizes a given item name using the GPT-3.5 model."""
    categories = [
        "Fresh Produce",
        "Meat/Poultry/Seafood",
        "Dairy",
        "Frozen Foods",
        "Bakery",
        "Snacks/Candy",
        "Prepared/Ready-Made Foods",
        "Beverages",
        "Alcoholic Beverages",
        "Pasta/Grains",
        "Canned/Jarred Goods",
        "Household",
        "Personal Care",
        "Baby",
        "Pet",
        "Other"
    ]

    # Create a more focused and concise prompt
    prompt = f'''Reply only what you are asked, nothing else. Provide a category of a given item by choosing only one value. Categorize this item: '{item_name}'. Choose one category from the list: {', '.join(categories)}'''
    return query_response(model_, prompt)


# defining our prominent classes from dataset
df = pd.read_csv(csv_loc)
df=df.head(3)
print("Dataset loaded with shape: ", print(df.shape))

# Get unique values from the 'category' column and convert to a dictionary keys.
unique_categories = df['category'].unique().tolist()
val = list_to_dict(unique_categories)
# val = {'vegetables': None, 'water': None, 'chicken': None, 'chocolates': None, 'cooked meals': None, 'desserts': None, 'toys': None, 'wine': None, 'games': None, 'chips': None, 'snacks': None, 'potatoes': None, 'ham': None, 'books': None, 'drinks': None, 'champagne': None, 'cheese': None, 'butter': None, 'sausages': None, 'food': None, 'laundry detergent in capsules': None, 'juice': None, 'meat': None, 'candy': None, 'blond beer': None, 'smoked salmon': None, 'yogurt': None, 'coffee': None, 'cookies': None, 'bread': None, 'duck foie gras': None, 'whiskey': None, 'toothpaste': None, 'pizza': None, 'washing-up liquid': None, 'ice': None, 'rum': None, 'sauces': None, 'chocolate': None, 'frozen': None, 'coffee capsules': None, 'beer': None, 'prepared dishes': None, 'cleaners': None, 'cream': None, 'shower gel': None, 'toilet paper': None, 'pasta': None, 'salmon': None, 'shampoo': None, 'kitchen accessories': None, 'beef': None, 'milk chocolate': None, 'red wine': None, 'nougat': None, 'olives': None, 'came': None, 'sausage': None, 'softener': None, 'diapers': None, 'liqueur': None, 'milk': None, 'white wine': None, 'broth': None, 'tuna': None, 'compresses': None, 'burgers': None, 'dishwashing detergent': None, 'cat food': None, 'yoghurt': None, 'dog food': None, 'pate for dogs': None, 'refreshments': None, 'geneva': None, 'smartphones': None, 'salads': None, 'rice': None, 'turkey breast': None, 'preserves': None, 'ice creams': None, 'jam': None, 'cereals': None, 'gel': None, 'seafood': None, 'combi refrigerator': None, 'shrimp': None, 'detergent': None, 'ready meals': None, 'liquid detergent': None, 'deodorant': None, 'energy drink': None, 'light tuna': None, 'fried tomato': None, 'olive oil': None, 'wipes': None, 'extra virgin olive oil': None, 'mayonnaise': None, 'apples': None, 'kitchen paper': None, 'smart tv': None, 'ground coffee': None, 'washing machines': None, 'tomato puree': None, "cat's food": None, 'ready-made foods': None, 'baked ham': None, 'dash detergent': None, 'the': None, 'face cream': None, 'fresh pasta': None, 'dried fruit': None, 'food for dogs': None, 'dishwasher detergent': None, 'low-fat milk': None, 'degreaser': None, 'mortadella': None, 'pesto': None, 'piadine': None, 'chicken breast': None, 'alcohol-free drinks': None, 'waterfall': None, 'absorbent lines': None, 'soft drinks': None, 'bresaola': None, 'pharmacy': None, 'games for children': None, 'fabric softener': None, 'fruit juices': None, 'sauce': None, 'hair care': None, 'spreadable cheese': None, 'bacon': None, 'bubbly wine': None, 'perfumes': None, 'french fries': None, 'soups': None, 'prosecco': None, 'salami': None, 'croissant': None, 'cod fillets': None, 'parmesan': None, 'christmas baskets': None, 'detergents': None, 'laundry detergent': None, 'dish detergent': None, 'paper rolls': None, 'semolina pasta': None, 'candies': None, 'dessert': None, 'pastry shop': None, 'anchovies': None, 'frozen foods': None, 'mozzarella cheese': None, 'herbal teas': None, 'hamburger': None, 'salad': None, 'stuffed pasta': None, 'gorgonzola': None, 'peeled tomatoes': None, 'egg': None, 'panettone': None, 'speck': None, 'pecorino cheese': None, 'housecleaning': None, 'raw ham': None, 'air fresheners': None, 'floor cleaning': None, 'lasagna': None, 'flour': None, 'face cleaning': None, 'liquid soap': None, 'pasta sauces': None, 'bathroom cleaning': None, 'ricotta': None, 'crackers': None, 'rusks': None, 'minestrone': None, 'napkins': None, 'razor': None, 'cured meat': None, 'medicine': None, 'body care': None, 'spirits': None, 'skin care': None, 'vitamins': None, 'supplements': None, 'laptops': None, 'tyres': None, 'medicines': None, 'absorbent': None, 'biscuit and biscuit': None, 'wines': None, 'powder detergent': None, 'sunscreen': None, 'noodle': None, 'juices': None, 'tomato sauce': None, 'soap': None, 'cheeses': None}
keys = list(val.keys())

# using llm to map parent categories with dataset categories
model = "gpt-3.5-turbo"

# Iterate over the keys using index to save api costs, we dont run if values are not None.
for i in range(len(keys)):
    key = keys[i]
    if val[key] is None:
        val[key] = categorize_item(key, model)


# saving file
with open('category_llm.txt', 'w') as file:
        for key, value in val.items():
            file.write(f"{key}: {value}\n")

File exists at: /content/transformcategory.csv
(3, 3)
Dataset loaded with shape:  None


In [4]:
df=pd.read_csv('/content/category.csv')

In [18]:
def replace_categories(df, column_name, dictionary_file):
    # Replace values in a DataFrame column using a dictionary read from a text file.
    value_mapping_dict = {}
    with open(dictionary_file, "r") as file:
        for line in file:
            parts = line.strip().split(":", 1)  # Split at the first occurrence of ':'
            if len(parts) == 2:
                old_value, new_value = parts
                value_mapping_dict[old_value.strip()] = new_value.strip()

    # Replace values in the specified column using the dictionary
    df[column_name] = df[column_name].map(lambda x: value_mapping_dict.get(x, x))

    return df


df = replace_categories(df, 'category', 'category_llm.txt')
df.head(2)

Unnamed: 0,product_name,category,product_brand
0,Flotador Infantil Para Brazo,Frozen Foods,Bestway
1,Alimento Para Perro Cachoro Y Adulto,Pet,Buen Can


In [19]:
df

Unnamed: 0,product_name,category,product_brand
0,Flotador Infantil Para Brazo,Frozen Foods,Bestway
1,Alimento Para Perro Cachoro Y Adulto,Pet,Buen Can
2,Suavizante,Household,Perla
3,Sanduchera,Household,Hometech
4,Crema Dental Triple Accion,Personal Care,Colgate
...,...,...,...
66,Servilleta Familia,Household,Acolchamax
67,Shampoo Anticaspa,Personal Care,Head & Shoulders
68,Olla Arrocera,Pasta/Grains,Hometech
69,Lavadoras 17 Kg,Household,Whirlpool
