## Imports

In [1]:
import weaviate
from weaviate.classes.config import DataType, Property, Configure

In [2]:
client = weaviate.connect_to_local()
print(client.is_ready())

True


## The fields are 
id	gender	masterCategory	subCategory	articleType	baseColour	season	year	usage	productDisplayName	description	averageRating numberOfRatings	price

## Defining the collection / class / schema

In [3]:
print(client.collections.exists("CleanedProducts"))

True


In [4]:
if client.collections.exists("CleanedProducts"):
    client.collections.delete("CleanedProducts")


client.collections.create(
    name="CleanedProducts",
    properties=[
        Property(name="productDisplayName", data_type=DataType.TEXT), 
        Property(name="season", data_type=DataType.TEXT),
        Property(name="description", data_type=DataType.TEXT),
        Property(name="gender", data_type=DataType.TEXT),
        Property(name="masterCategory", data_type=DataType.TEXT),
        Property(name="subCategory", data_type=DataType.TEXT),
        Property(name="price", data_type=DataType.TEXT),
        Property(name="numberOfRatings", data_type=DataType.INT),
        Property(name="averageRating", data_type=DataType.NUMBER),
        Property(name="productId", data_type=DataType.INT),
        Property(name="articleType", data_type=DataType.TEXT),
        Property(name="baseColour", data_type=DataType.TEXT),
        Property(name="year", data_type=DataType.INT),
        Property(name="usage", data_type=DataType.TEXT),
    ],
    vectorizer_config=[
        # Set a named vector
        Configure.NamedVectors.text2vec_transformers(  # Use the "text2vec-cohere" vectorizer
            name="name_master_sub_art_col_use_seas_gender", source_properties=["productDisplayName","masterCategory", "subCategory", "articleType", "baseColour", "usage", "season", 'gender']       # Set the source property(ies)
        ),
        # Configure.NamedVectors.text2vec_transformers(  # Use the "text2vec-openai" vectorizer
        #     name="name_master_sub_col", source_properties=["productDisplayName","masterCategory", "subCategory", "baseColour"] # Set the source property(ies)
        # ),
        # Configure.NamedVectors.text2vec_transformers(  # Use the "text2vec-openai" vectorizer
        #     name= "name_color_seas", source_properties=["productDisplayName", "baseColour", "season"] # Set the source property(ies)
        # ),
    ],
    
)

<weaviate.collections.collection.sync.Collection at 0x1161ef130>

## Importing and cleaning the data

In [7]:
import pandas as pd 

df = pd.read_csv(input("Enter the path to the csv file: "))

# include these fields id	gender	masterCategory	subCategory	articleType	baseColour	season	year	usage	productDisplayName	description	averageRating numberOfRatings	price
df = df[['productId', 'gender', 'masterCategory', 'subCategory', 'articleType', 'baseColour', 'season', 'year', 'usage', 'productDisplayName', 'description', 'averageRating', 'numberOfRatings', 'price']]

# print if na values are present
print(df.isna().sum())

# convert to dictionary
df_dict = df.to_dict(orient='records')
print(df_dict[:5])


productId             0
gender                0
masterCategory        0
subCategory           0
articleType           0
baseColour            0
season                0
year                  0
usage                 0
productDisplayName    0
description           0
averageRating         0
numberOfRatings       0
price                 0
dtype: int64
[{'productId': 15970, 'gender': 'Men', 'masterCategory': 'Apparel', 'subCategory': 'Topwear', 'articleType': 'Shirts', 'baseColour': 'Navy Blue', 'season': 'Fall', 'year': 2011.0, 'usage': 'Casual', 'productDisplayName': 'Turtle Check Men Navy Blue Shirt', 'description': 'Turtle Check Men Navy Blue Shirt is a Navy Blue Shirts for Men in the Fall season.', 'averageRating': 3.2, 'numberOfRatings': 909, 'price': '₹ 999'}, {'productId': 39386, 'gender': 'Men', 'masterCategory': 'Apparel', 'subCategory': 'Bottomwear', 'articleType': 'Jeans', 'baseColour': 'Blue', 'season': 'Summer', 'year': 2012.0, 'usage': 'Casual', 'productDisplayName': 'Peter En

In [8]:
collection = client.collections.get("CleanedProducts")

In [9]:
# write a function to insert data in batches of 1000, the dataset has about 31000 rows
def insert_data(data):
    try: 
        for i in range(0, len(data), 1000):
            print(f"Inserting data from {i} to {i+1000}")
            collection.data.insert_many(data[i:i+1000])
    except Exception as e:
        print(e)
    finally:
        print("Data Inserted Successfully!")

insert_data(df_dict)


Inserting data from 0 to 1000
Inserting data from 1000 to 2000
Inserting data from 2000 to 3000
Inserting data from 3000 to 4000
Inserting data from 4000 to 5000
Inserting data from 5000 to 6000
Inserting data from 6000 to 7000
Inserting data from 7000 to 8000
Inserting data from 8000 to 9000
Inserting data from 9000 to 10000
Inserting data from 10000 to 11000
Inserting data from 11000 to 12000
Inserting data from 12000 to 13000
Inserting data from 13000 to 14000
Inserting data from 14000 to 15000
Inserting data from 15000 to 16000
Inserting data from 16000 to 17000


{'message': 'Failed to send 1 objects in a batch of 1000. Please inspect the errors variable of the returned object for more information.', 'errors': {583: ErrorObject(message="invalid integer property 'year' on class 'CleanedProducts': requires an integer, the given value is '2012.80649663463'", object_=BatchObject(collection='CleanedProducts', properties={'productId': 53781, 'gender': 'Men', 'masterCategory': 'Apparel', 'subCategory': 'Topwear', 'articleType': 'Tshirts', 'baseColour': 'Blue', 'season': 'Unknown', 'year': 2012.80649663463, 'usage': 'Sports', 'productDisplayName': 'Puma Men Blue Sless Round Neck T-shirt', 'description': 'Puma Men Blue Sless Round Neck T-shirt is a Blue Tshirts for Men in the Unknown season.', 'averageRating': 4.4, 'numberOfRatings': 506, 'price': '₹ 1499'}, references=None, uuid='f1fa36e7-97be-4da2-8b8b-a165c7fa3dcf', vector=None, tenant=None, index=583, retry_count=0), original_uuid='f1fa36e7-97be-4da2-8b8b-a165c7fa3dcf')}}


Inserting data from 17000 to 18000
Inserting data from 18000 to 19000
Inserting data from 19000 to 20000
Inserting data from 20000 to 21000
Inserting data from 21000 to 22000
Inserting data from 22000 to 23000
Inserting data from 23000 to 24000
Inserting data from 24000 to 25000
Inserting data from 25000 to 26000
Inserting data from 26000 to 27000
Inserting data from 27000 to 28000
Inserting data from 28000 to 29000
Inserting data from 29000 to 30000
Inserting data from 30000 to 31000
Inserting data from 31000 to 32000
Data Inserted Successfully!


In [28]:
from weaviate.classes.query import MetadataQuery

response = collection.query.near_text(
    query = "mujhe father ko kuch gift karna hai",
    limit = 100,
    target_vector = "name_master_sub_art_col_use_seas_gender",
    return_metadata=MetadataQuery(distance=True)
)

In [29]:
# Display the query results
print(f"Found {len(response.objects)} results")
for i, obj in enumerate(response.objects[:100]):
    print(f"\nResult {i+1}:")
    print(f"Product: {obj.properties['productDisplayName']}")
    print(f"Distance: {obj.metadata.distance}")
    print(f"Category: {obj.properties['masterCategory']} > {obj.properties['subCategory']} > {obj.properties['articleType']}")
    print(f"Price: {obj.properties['price']}")

Found 100 results

Result 1:
Product: Lino Perros Men Formal Pink Accessory Gift Set
Distance: 0.6268256902694702
Category: Accessories > Accessories > Accessory Gift Set
Price: ₹ 3999

Result 2:
Product: Lino Perros Men Formal Yellow Accessory Gift Set
Distance: 0.6309895515441895
Category: Accessories > Accessories > Accessory Gift Set
Price: ₹ 999

Result 3:
Product: Cabarelli Men Accessory Gift Set
Distance: 0.632706880569458
Category: Accessories > Accessories > Accessory Gift Set
Price: ₹ 1499

Result 4:
Product: Lino Perros Men Formal Red Accessory Gift Set
Distance: 0.6333981156349182
Category: Accessories > Accessories > Accessory Gift Set
Price: ₹ 7499

Result 5:
Product: Lino Perros Men Formal Blue Accessory Gift Set
Distance: 0.6340821385383606
Category: Accessories > Accessories > Accessory Gift Set
Price: ₹ 1999

Result 6:
Product: Lino Perros Men Formal Black Accessory Gift Set
Distance: 0.6355754733085632
Category: Accessories > Accessories > Accessory Gift Set
Price: ₹