In [29]:
import openai
import pandas as pd
import numpy as np
import json
import os
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix

#### **Helper Function & Prompt Templates**

In [43]:
def request_completion(prompt, completion_model="gpt-3.5-turbo"):
    completion_response = openai.ChatCompletion.create(
        model=completion_model,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        temperature=0,
        max_tokens=20,
        top_p=1
    )
    return completion_response

def classify_description(data, prompt):
    prompt = prompt.replace('DESCRIPTION', data['Description'])
    classification = request_completion(prompt).choices[0].message.content.replace('\n','')
    return classification

def test_sample(data):
  test = data.sample(n = 200, random_state = 42) # 200 random rows for evaluation of model performance
  return test

def compute_metrics(test):
  print("Count of actual categories in test data: ")
  print(test['Categories'].value_counts())
  print(" ")

  print("Count of predicted categories in test data: ")
  print(test["Classification"].value_counts())
  print(" ")

  accuracy = accuracy_score(test['Categories'], test['Classification'])
  print(f"Accuracy of classification model: {accuracy:.2f}")
  print(" ")

  category_accuracies = test.groupby('Categories').apply(
        lambda x: accuracy_score(x['Categories'], x['Classification']))
  print("Accuracy for each category: ")
  print(category_accuracies)


In [40]:
zero_shot_prompt = '''You are a data annotation expert working for an E-commerce company.
You are analysing E-commerce item descriptions and classifying them into one of four categories.
The four categories are
1. Household
2. Books
3. Electronics
4. Clothing & Accessories.
If you can't tell what it is, say Could not classify. Do not make up categories that are not any of the above four.

Item Description:

Description: DESCRIPTION_TEXT

Your final answer should be only one of the four categories: Household, Books, Electronics, Clothing & accessories. Do not include any words, numberings or phrases before and after the classification answer.'''

In [60]:
few_shot_prompt = '''You are a data annotation expert working for an E-commerce company.
You are analysing E-commerce item descriptions and classifying them into one of four categories.
The four categories are
1. Household
2. Books
3. Electronics
4. Clothing & Accessories.
If you can't tell what it is, say Could not classify. Do not make up categories that are not any of the above four.

Item Description:

Description: DESCRIPTION_TEXT

Here are a few examples on how to classify the item descriptions:
1. SAF 'Floral' Framed Painting (Wood, 30 inch x 10 inch, Special Effect UV Print Textured, SAO297) Painting made up in synthetic frame with UV textured print which gives multi effects and attracts towards it. This is an special series of paintings which makes your wall very beautiful and gives a royal touch (A perfect gift for your special ones). --> Household
2. Am Wooden Embroidery Hoop/Frame for Crafters & Designers, Set Of 5-Different Sizes 4, 6, 8, 10, 12 Inches Wooden Embroidery Hoops For Crafting A Number Of Designs. Available In A Variety Of Sizes You Can Select The Shape And Size Of The Embroidery Pattern As Desired. --> Household
3. KraftInn Bamboo Floor Lamp (Brown, 28-inch) --> Household
4. AmazonBasics RJ45 Cat7 Network Ethernet Patch/LAN Cable - 15 Feet (White) --> Electronics
5. Panasonic SR-WA10 450-Watt Automatic Cooker without Warmer (White) Compact and Energy-Efficient Cooker Dish out a variety of rice delicacies like biryanis and pulaos, using the Panasonic SR-WA10 rice cooker. This automatic rice cooker cooks rice quickly and also retains its nutrient value. The anodised aluminium cooking pan of this cooker absorbs heat fast and saves cooking time, thus proving to be energy-efficient. Being compact in size, this electric rice cooker can be placed conveniently on your kitchen platform or over the counter. Featuring automatic power options, the cooker switches off automatically after the rice is cooked, making it convenient and safe for use.Keeps Rice Warm for Hours The Panasonic automatic rice cooker keeps rice warm for up to four hours after it has been cooked, so you can enjoy piping hot rice during your meal time. It has heat-proof bridge handles, ensuring a comfortable grip while holding the rice cooker. With a capacity of 1L, this Panasonic electric rice cooker is suitable for preparing rice for a small family. The Panasonic 550w rice cooker is also equipped with a scoop holder, which keeps the scoop in place and make cooking less messy. This cooker comes with a manufacturer's warranty for 2 years.Brand: Panasonic Anodised aluminium cooking pan Automatic power functions Equipped with a scoop holder Power: 550W Capacity: 1L Manufacturer's warranty: 2 years --> Household
6. It (Signet) Amazon.com Review They were seven teenagers when they first stumbled upon the horror. Now they were grown-up men and women who had gone out into the big world to gain success and happiness. But none of them could withstand the force that drew them back to Derry, Maine to face the nightmare without an end, and the evil without a name. What was it? Read It and find out...if you dare! Review “It will overwhelm you…Characters so real you feel you are reading about yourself…scenes to be read in a well-lit room only.”—Los Angeles Times“King’s most mature work.”—St. Petersburg Times“King is our great storyteller.”—Los Angeles Herald-Examiner See all Product description --> Books
7. Artis BT99 Wireless Portable Bluetooth Speaker with USB/FM/AUX in (Brown) --> Electronics
8. Marcato Atlas Pasta Machine, Made in Italy, Chrome, Includes Pasta Cutter, Hand Crank, and Instructions Style:Atlas 150 Pasta Machine Atlas 150 Pasta MachineMake fresh beautiful pasta with the Marcato Multi-Pasta Set that doesn t even resemble those dry sticks from the grocery store. Cooks up tender and flavorful in just a few minutes and topped with your signature sauce your home made pasta will be the centerpiece of many memorable meals. Atlas hand crank pasta machine includes all the tools attachments needed such as: Lasagne Spaghetti and Fettuccine. Crank the handle and watch golden fresh pasta emerge ready to drop in boiling water. This is a machine you ll pass down - very heavy very sturdy beautifully made. How to clean Just brush off the rollers and store. Includes narrow and wide cutters for fettuccine and vermicelli crank clamp recipes and instructions. All Marcato products are manufactured using only the finest materials available and are subject to continuous quality control. --> Household
9. iCraft Heat Gun for Embossing, Multi-Purpose Hot Air Gun Heat Tool, Heat Shrink Gun DIY Crafts Rated Voltage-230V, Rated Frequency-50Hz, Rated Power Inout -300W HOW TO USE THE HEAT TOOL: 1.Plug in the heat toll. 2.Hold the product as shown on the picture,Do not touch the hot nozzle. 3.Keep 5-6cm distance between the heat tool and item. 4.Do not use more than 10 minutes continuously to prevent overheated. --> Household
10. Chillaxplus 13mm Impact Drill Machine Kit with 101 Pieces Tool Accessories Do you know how handy it is to own a professional drill? The CHILLAXPLUS Impact Drill 13mm Professional is one of the ideal home improvement products that you can buy to meet any kind of day to day fixing requirements. This 13mm Professional Drill has a 220V-240V range with a 600W powerful motor that enables you to drill in easily. Since its weight is only 1.7 kg, you can therefore carry it conveniently from one place to another. This portable kit is packed in a carrying case made of high-quality plastic that has different compartments to store the various components so that you can easily access them. It has a forward-reverse switch that helps to run and drill the screws. With the help of a switch, you can also choose between the options of drilling and hammering. The egronomic shape of the drill lets you have a good grip of the same. It offers you a 6 month-warranty against any kind of manufacturing defects. The powerful drill allows you to easily drill in wood, metal, concrete, tiles etc. The 10 piece screwdriver bit set with 13mm impact drill also allows you to set the screws effortlessly and the 14 drill bits - 5 for metal and concrete and 4 for wood makes the drilling job faster. Overall, this 100 piece tool kit is excellent for any kind of fixing or repairing job. So, shop online for this CHILLAXPLUS Impact Drill 13mm Professional and see how easy it is handle your day to day repairing jobs without requiring to follow up the carpenter for days. --> Household

Your final answer should be only one of the four categories: Household, Books, Electronics, Clothing & accessories. Do not include any words, numberings or phrases before and after the classification answer.'''

In [113]:
few_shot_prompt_with_cot = '''You are a data annotation expert working for an E-commerce company.
You are analysing E-commerce item descriptions and classifying them into one of four categories.
The four categories are
1. Household
2. Books
3. Electronics
4. Clothing & Accessories.

Item Description:

Description: DESCRIPTION_TEXT

Here are a few examples on how to classify the item descriptions:
1. SAF 'Floral' Framed Painting (Wood, 30 inch x 10 inch, Special Effect UV Print Textured, SAO297) Painting made up in synthetic frame with UV textured print which gives multi effects and attracts towards it. This is an special series of paintings which makes your wall very beautiful and gives a royal touch (A perfect gift for your special ones). --> Household
2. Am Wooden Embroidery Hoop/Frame for Crafters & Designers, Set Of 5-Different Sizes 4, 6, 8, 10, 12 Inches Wooden Embroidery Hoops For Crafting A Number Of Designs. Available In A Variety Of Sizes You Can Select The Shape And Size Of The Embroidery Pattern As Desired. --> Household
3. KraftInn Bamboo Floor Lamp (Brown, 28-inch) --> Household
4. AmazonBasics RJ45 Cat7 Network Ethernet Patch/LAN Cable - 15 Feet (White) --> Electronics
5. Panasonic SR-WA10 450-Watt Automatic Cooker without Warmer (White) Compact and Energy-Efficient Cooker Dish out a variety of rice delicacies like biryanis and pulaos, using the Panasonic SR-WA10 rice cooker. This automatic rice cooker cooks rice quickly and also retains its nutrient value. The anodised aluminium cooking pan of this cooker absorbs heat fast and saves cooking time, thus proving to be energy-efficient. Being compact in size, this electric rice cooker can be placed conveniently on your kitchen platform or over the counter. Featuring automatic power options, the cooker switches off automatically after the rice is cooked, making it convenient and safe for use.Keeps Rice Warm for Hours The Panasonic automatic rice cooker keeps rice warm for up to four hours after it has been cooked, so you can enjoy piping hot rice during your meal time. It has heat-proof bridge handles, ensuring a comfortable grip while holding the rice cooker. With a capacity of 1L, this Panasonic electric rice cooker is suitable for preparing rice for a small family. The Panasonic 550w rice cooker is also equipped with a scoop holder, which keeps the scoop in place and make cooking less messy. This cooker comes with a manufacturer's warranty for 2 years.Brand: Panasonic Anodised aluminium cooking pan Automatic power functions Equipped with a scoop holder Power: 550W Capacity: 1L Manufacturer's warranty: 2 years --> Household
6. It (Signet) Amazon.com Review They were seven teenagers when they first stumbled upon the horror. Now they were grown-up men and women who had gone out into the big world to gain success and happiness. But none of them could withstand the force that drew them back to Derry, Maine to face the nightmare without an end, and the evil without a name. What was it? Read It and find out...if you dare! Review “It will overwhelm you…Characters so real you feel you are reading about yourself…scenes to be read in a well-lit room only.”—Los Angeles Times“King’s most mature work.”—St. Petersburg Times“King is our great storyteller.”—Los Angeles Herald-Examiner See all Product description --> Books
7. Artis BT99 Wireless Portable Bluetooth Speaker with USB/FM/AUX in (Brown) --> Electronics
8. Marcato Atlas Pasta Machine, Made in Italy, Chrome, Includes Pasta Cutter, Hand Crank, and Instructions Style:Atlas 150 Pasta Machine Atlas 150 Pasta MachineMake fresh beautiful pasta with the Marcato Multi-Pasta Set that doesn t even resemble those dry sticks from the grocery store. Cooks up tender and flavorful in just a few minutes and topped with your signature sauce your home made pasta will be the centerpiece of many memorable meals. Atlas hand crank pasta machine includes all the tools attachments needed such as: Lasagne Spaghetti and Fettuccine. Crank the handle and watch golden fresh pasta emerge ready to drop in boiling water. This is a machine you ll pass down - very heavy very sturdy beautifully made. How to clean Just brush off the rollers and store. Includes narrow and wide cutters for fettuccine and vermicelli crank clamp recipes and instructions. All Marcato products are manufactured using only the finest materials available and are subject to continuous quality control. --> Household
9. iCraft Heat Gun for Embossing, Multi-Purpose Hot Air Gun Heat Tool, Heat Shrink Gun DIY Crafts Rated Voltage-230V, Rated Frequency-50Hz, Rated Power Inout -300W HOW TO USE THE HEAT TOOL: 1.Plug in the heat toll. 2.Hold the product as shown on the picture,Do not touch the hot nozzle. 3.Keep 5-6cm distance between the heat tool and item. 4.Do not use more than 10 minutes continuously to prevent overheated. --> Household
10. Chillaxplus 13mm Impact Drill Machine Kit with 101 Pieces Tool Accessories Do you know how handy it is to own a professional drill? The CHILLAXPLUS Impact Drill 13mm Professional is one of the ideal home improvement products that you can buy to meet any kind of day to day fixing requirements. This 13mm Professional Drill has a 220V-240V range with a 600W powerful motor that enables you to drill in easily. Since its weight is only 1.7 kg, you can therefore carry it conveniently from one place to another. This portable kit is packed in a carrying case made of high-quality plastic that has different compartments to store the various components so that you can easily access them. It has a forward-reverse switch that helps to run and drill the screws. With the help of a switch, you can also choose between the options of drilling and hammering. The egronomic shape of the drill lets you have a good grip of the same. It offers you a 6 month-warranty against any kind of manufacturing defects. The powerful drill allows you to easily drill in wood, metal, concrete, tiles etc. The 10 piece screwdriver bit set with 13mm impact drill also allows you to set the screws effortlessly and the 14 drill bits - 5 for metal and concrete and 4 for wood makes the drilling job faster. Overall, this 100 piece tool kit is excellent for any kind of fixing or repairing job. So, shop online for this CHILLAXPLUS Impact Drill 13mm Professional and see how easy it is handle your day to day repairing jobs without requiring to follow up the carpenter for days. --> Household
11. Cherokee by Unlimited Boys' Cotton Sweater --> Clothing & Accessories
12. Iuhan Toddler Kids Baby Girls Outfit Clothes Button Knitted Sweater Cardigan Coat Tops ❤️Size:2T --- Label Size:90 ----- Bust:69cm/27.2" --- Length:35cm/13.8" --- Height:90CM ❤️Size:3T --- Label Size:100 --- Bust:72cm/28.3" --- Length:37cm/14.6" --- Height:100CM ❤️Size:4T --- Label Size:110 --- Bust:75cm/29.5" --- Length:39cm/15.4" --- Height:110CM ❤️Size:5T --- Label Size:120 --- Bust:78cm/30.7" --- Length:41cm/16.1" --- Height:120CM ❤️Size:6T --- Label Size:130 --- Bust:81cm/31.9" --- Length:43cm/16.9" --- Height:130CM ❤️Size:7T --- Label Size:140 --- Bust:84cm/33.1" --- Length:45cm/17.7" --- Height:140CM ❤️Size:8T --- Label Size:150 --- Bust:87cm/34.3" --- Length:48cm/18.9" --- Height:150CM --> Clothing & Accessories

Go through the following thought process when classifying the item descriptions:
1. Take a deep breath and read through the entire item description.
2. Sieve out key words and phrases that are important in determining the item category such as size of item (eg. S, M, L), material of item, use cases, where it is meant to be used.
3. Determine if the item is meant to be worn (eg. shirt, blazer, jacket), if it is, then it is under Clothing & Accessories. If gender is found in the item description, then it is under Clothing & Accessories.
4. Determine the category that the item belongs to based on the above.


Your final answer should be only one of the four categories: Household, Books, Electronics, Clothing & accessories. Do not make up any other categories. Do not include any words, numberings or phrases before and after the classification answer.'''

#### **Load Data**

In [4]:
data = pd.read_csv("/content/data.csv", engine="python", delimiter=",")

data.columns = ["Categories", "Description"]
data.head()

Unnamed: 0,Categories,Description
0,Household,"SAF 'Floral' Framed Painting (Wood, 30 inch x ..."
1,Household,SAF 'UV Textured Modern Art Print Framed' Pain...
2,Household,"SAF Flower Print Framed Painting (Synthetic, 1..."
3,Household,Incredible Gifts India Wooden Happy Birthday U...
4,Household,Pitaara Box Romantic Venice Canvas Painting 6m...


In [10]:
data['Categories'].value_counts()

Unnamed: 0_level_0,count
Categories,Unnamed: 1_level_1
Household,19312
Books,11820
Electronics,10621
Clothing & Accessories,8671


### **Zero-Shot Prompting (GPT-3.5)**

In [15]:
sample = data.iloc[0]
prompt = zero_shot_prompt.replace('DESCRIPTION_TEXT', sample['Description'])
completion_response = request_completion(prompt)
print(completion_response['choices'][0]['message']['content'].strip())

Household


In [41]:
test = test_sample(data)
test['Classification'] = test.apply(lambda x: classify_description(x, zero_shot_prompt), axis = 1)

In [44]:
compute_metrics(test)

Count of actual categories in test data: 
Categories
Household                 71
Books                     51
Electronics               46
Clothing & Accessories    32
Name: count, dtype: int64
 
Count of predicted categories in test data: 
Classification
Electronics               73
Books                     49
Household                 42
Clothing & Accessories    33
Could not classify.        3
Name: count, dtype: int64
 
Accuracy of classification model: 0.82
 
Accuracy for each category: 
Categories
Books                     0.941176
Clothing & Accessories    0.968750
Electronics               0.978261
Household                 0.563380
dtype: float64


  category_accuracies = test.groupby('Categories').apply(


- The model performed well in terms of accuracy for all categories except for household.

In [45]:
incorrect = test[test['Categories'] != test['Classification']]
incorrect

Unnamed: 0,Categories,Description,Classification
8760,Household,"Glitz LED Spot Light, Bed Side Flexible arm, 3...",Electronics
17922,Household,Monarch Bituminous Waterproofing Coating For S...,Could not classify.
38114,Clothing & Accessories,Perfect Match (Emma and James) Review A touchi...,Books
13661,Household,Eveready RM1001 900-Watt Roti Maker (Black) Wi...,Electronics
18508,Household,Taparia 1621-8 Combination Plier Taparia combi...,Electronics
17233,Household,Prettyia 35mm Precise Cabinet Door Hinge Jig H...,Electronics
7831,Household,Le Craf Men's Brown Genuine Leather RFID Block...,Clothing & Accessories
20574,Books,YaYa Cafe™Birthday Gifts for Family Photo Fram...,Household
12659,Household,Morphy Richards Voyager 300 0.5-Litre Stainles...,Electronics
49065,Electronics,Niks Car Air Outlet Vent Internal Cleaner Keyb...,Household


- Most incorrect household categories were classified as electronics.

### **Few-Shot Prompting (GPT-3.5)**
In this section, we give a few examples to the model, focusing more on classifying household items correctly instead of classifying them as electronics.

In [61]:
test = test_sample(data)
test['Classification'] = test.apply(lambda x: classify_description(x, few_shot_prompt), axis = 1)

In [62]:
compute_metrics(test)

Count of actual categories in test data: 
Categories
Household                 71
Books                     51
Electronics               46
Clothing & Accessories    32
Name: count, dtype: int64
 
Count of predicted categories in test data: 
Classification
Electronics               60
Household                 57
Books                     50
Clothing & Accessories    32
Could not classify.        1
Name: count, dtype: int64
 
Accuracy of classification model: 0.88
 
Accuracy for each category: 
Categories
Books                     0.960784
Clothing & Accessories    0.968750
Electronics               0.934783
Household                 0.732394
dtype: float64


  category_accuracies = test.groupby('Categories').apply(


- With few-shot prompting, accuracy for classification of household items has significantly improved.

In [63]:
incorrect = test[test['Categories'] != test['Classification']]
incorrect

Unnamed: 0,Categories,Description,Classification
8760,Household,"Glitz LED Spot Light, Bed Side Flexible arm, 3...",Electronics
38114,Clothing & Accessories,Perfect Match (Emma and James) Review A touchi...,Books
13661,Household,Eveready RM1001 900-Watt Roti Maker (Black) Wi...,Electronics
7831,Household,Le Craf Men's Brown Genuine Leather RFID Block...,Clothing & Accessories
20574,Books,YaYa Cafe™Birthday Gifts for Family Photo Fram...,Household
12659,Household,Morphy Richards Voyager 300 0.5-Litre Stainles...,Electronics
49065,Electronics,Niks Car Air Outlet Vent Internal Cleaner Keyb...,Household
18972,Household,Stanley 69GR20B Gluepro Trigger Feed Hot Melt ...,Electronics
3850,Household,Hello Dream Imported Digital Smart Backlight A...,Electronics
13047,Household,"SAYSHA 7 Egg-Electric Boiler (1L, Multicolour)...",Electronics


### **Few-Shot Prompting with Chain-of-Thought Reasoning**

In [114]:
test = test_sample(data)
test['Classification'] = test.apply(lambda x: classify_description(x, few_shot_prompt_with_cot), axis = 1)

In [115]:
compute_metrics(test)

Count of actual categories in test data: 
Categories
Household                 71
Books                     51
Electronics               46
Clothing & Accessories    32
Name: count, dtype: int64
 
Count of predicted categories in test data: 
Classification
Electronics               60
Household                 58
Books                     51
Clothing & Accessories    31
Name: count, dtype: int64
 
Accuracy of classification model: 0.89
 
Accuracy for each category: 
Categories
Books                     0.960784
Clothing & Accessories    0.937500
Electronics               0.978261
Household                 0.760563
dtype: float64


  category_accuracies = test.groupby('Categories').apply(


In [116]:
incorrect = test[test['Categories'] != test['Classification']]
incorrect

Unnamed: 0,Categories,Description,Classification
8760,Household,"Glitz LED Spot Light, Bed Side Flexible arm, 3...",Electronics
38114,Clothing & Accessories,Perfect Match (Emma and James) Review A touchi...,Books
13661,Household,Eveready RM1001 900-Watt Roti Maker (Black) Wi...,Electronics
7831,Household,Le Craf Men's Brown Genuine Leather RFID Block...,Clothing & Accessories
20574,Books,YaYa Cafe™Birthday Gifts for Family Photo Fram...,Household
12659,Household,Morphy Richards Voyager 300 0.5-Litre Stainles...,Electronics
3850,Household,Hello Dream Imported Digital Smart Backlight A...,Electronics
32404,Clothing & Accessories,Splash - Baby (Unisex) Winter wear Set - Upper...,Household
13047,Household,"SAYSHA 7 Egg-Electric Boiler (1L, Multicolour)...",Electronics
15269,Household,Dyson V10 Absolute Pro Cord-Free Vacuum (Coppe...,Electronics
