In [1]:
import pandas as pd
import numpy as np

from IPython.display import display
from src.embedding_model import EmbeddingModel
from src.utils.timing import calculate_token_processing_speed

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
queries = [
    # Numeric Queries (for `Delivery_distance`)
    "maximum delivery distance",
    "minimum delivery distance",
    "most frequent delivery distance",
    "distances greater than 200",
    "distances less than 150",
    "distance between 100 and 300",

    # Category Queries (for `Product_Category` or `Priority`)
    "apparel product",
    "electronics items",
    "highest priority",
    "low priority shipments",
    "clothing product",
    "all medium priority orders",

    # Date Queries (for `Date`)
    "on date 2023-07-01",
    "before date 2023-08-01",
    "after date 2023-07-01",
    "between dates 2023-07-01 and 2023-07-08",
]

data = pd.read_csv('data.csv', sep=';')
data['Date'] = pd.to_datetime(data['Date'], errors='coerce')
columns = ["Date", "Priority", "Product_Category", "Delivery_distance"]
data = data[columns]
embedding_model = EmbeddingModel(data)


In [3]:
for query in queries:
    print(f"\nQuery: {query}") 
    search_result = embedding_model.search(query)
    print(f"Column: {search_result['column_name']}, Value: {search_result['value']}, Action: {search_result['action']}")
    
    if search_result["row_ids"]:
        result_df = data.iloc[search_result["row_ids"]]  
        display(result_df) 
    else:
        print("No matching results found.")


Query: maximum delivery distance
Column: Delivery_distance, Value: None, Action: maximum


Unnamed: 0,Date,Priority,Product_Category,Delivery_distance
89,2023-07-01,Low,Electronics,9900
90,2023-07-02,Low,Electronics,9900
91,2023-07-03,Low,Electronics,9900
92,2023-07-04,Low,Electronics,9900
93,2023-07-05,Low,Electronics,9900
94,2023-07-06,Low,Electronics,9900
95,2023-07-07,Low,Electronics,9900
96,2023-07-08,Low,Electronics,9900



Query: minimum delivery distance
Column: Delivery_distance, Value: None, Action: minimum


Unnamed: 0,Date,Priority,Product_Category,Delivery_distance
82,2023-07-01,High,Groceries,100
83,2023-07-02,High,Groceries,100
84,2023-07-03,High,Groceries,100
85,2023-07-04,High,Groceries,100
86,2023-07-05,High,Groceries,100
87,2023-07-06,High,Groceries,100
88,2023-07-07,High,Groceries,100



Query: most frequent delivery distance
Column: Delivery_distance, Value: None, Action: most frequent


Unnamed: 0,Date,Priority,Product_Category,Delivery_distance
65,2023-07-01,Low,Toys & Games,1400
66,2023-07-02,Low,Toys & Games,1400
67,2023-07-03,Low,Toys & Games,1400
68,2023-07-04,Low,Toys & Games,1400
69,2023-07-05,Low,Toys & Games,1400
70,2023-07-06,Low,Toys & Games,1400
71,2023-07-07,Low,Toys & Games,1400
72,2023-07-08,Low,Toys & Games,1400
73,2023-07-09,Low,Toys & Games,1400
74,2023-07-10,Low,Toys & Games,1400



Query: distances greater than 200
Column: Delivery_distance, Value: 200.0, Action: greater than


Unnamed: 0,Date,Priority,Product_Category,Delivery_distance
0,2023-07-01,Low,Apparel,300
1,2023-07-02,Low,Apparel,300
2,2023-07-03,Low,Apparel,300
3,2023-07-04,Low,Apparel,300
4,2023-07-01,Medium,Cosmetics & Personal Care,500
...,...,...,...,...
95,2023-07-07,Low,Electronics,9900
96,2023-07-08,Low,Electronics,9900
97,2023-07-01,High,Groceries,5700
98,2023-07-02,High,Groceries,5700



Query: distances less than 150
Column: Delivery_distance, Value: 150.0, Action: less than


Unnamed: 0,Date,Priority,Product_Category,Delivery_distance
82,2023-07-01,High,Groceries,100
83,2023-07-02,High,Groceries,100
84,2023-07-03,High,Groceries,100
85,2023-07-04,High,Groceries,100
86,2023-07-05,High,Groceries,100
87,2023-07-06,High,Groceries,100
88,2023-07-07,High,Groceries,100



Query: distance between 100 and 300
Column: Delivery_distance, Value: (100.0, 300.0), Action: between


Unnamed: 0,Date,Priority,Product_Category,Delivery_distance
0,2023-07-01,Low,Apparel,300
1,2023-07-02,Low,Apparel,300
2,2023-07-03,Low,Apparel,300
3,2023-07-04,Low,Apparel,300
45,2023-07-01,Medium,Apparel,200
46,2023-07-02,Medium,Apparel,200
47,2023-07-03,Medium,Apparel,200
48,2023-07-04,Medium,Apparel,200
49,2023-07-05,Medium,Apparel,200
50,2023-07-06,Medium,Apparel,200



Query: apparel product
Column: Product_Category, Value: Apparel, Action: None


Unnamed: 0,Date,Priority,Product_Category,Delivery_distance
0,2023-07-01,Low,Apparel,300
1,2023-07-02,Low,Apparel,300
2,2023-07-03,Low,Apparel,300
3,2023-07-04,Low,Apparel,300
14,2023-07-01,High,Apparel,1800
15,2023-07-02,High,Apparel,1800
16,2023-07-03,High,Apparel,1800
17,2023-07-04,High,Apparel,1800
18,2023-07-05,High,Apparel,1800
19,2023-07-06,High,Apparel,1800



Query: electronics items
Column: Product_Category, Value: Electronics, Action: None


Unnamed: 0,Date,Priority,Product_Category,Delivery_distance
89,2023-07-01,Low,Electronics,9900
90,2023-07-02,Low,Electronics,9900
91,2023-07-03,Low,Electronics,9900
92,2023-07-04,Low,Electronics,9900
93,2023-07-05,Low,Electronics,9900
94,2023-07-06,Low,Electronics,9900
95,2023-07-07,Low,Electronics,9900
96,2023-07-08,Low,Electronics,9900



Query: highest priority
Column: Priority, Value: High, Action: None


Unnamed: 0,Date,Priority,Product_Category,Delivery_distance
14,2023-07-01,High,Apparel,1800
15,2023-07-02,High,Apparel,1800
16,2023-07-03,High,Apparel,1800
17,2023-07-04,High,Apparel,1800
18,2023-07-05,High,Apparel,1800
19,2023-07-06,High,Apparel,1800
20,2023-07-07,High,Apparel,1800
21,2023-07-08,High,Apparel,1800
22,2023-07-09,High,Apparel,1800
23,2023-07-10,High,Apparel,1800



Query: low priority shipments
Column: Priority, Value: Low, Action: None


Unnamed: 0,Date,Priority,Product_Category,Delivery_distance
0,2023-07-01,Low,Apparel,300
1,2023-07-02,Low,Apparel,300
2,2023-07-03,Low,Apparel,300
3,2023-07-04,Low,Apparel,300
9,2023-07-01,Low,Groceries,500
10,2023-07-02,Low,Groceries,500
11,2023-07-03,Low,Groceries,500
12,2023-07-04,Low,Groceries,500
13,2023-07-05,Low,Groceries,500
51,2023-07-01,Low,Groceries,7000



Query: clothing product
Column: Product_Category, Value: Apparel, Action: None


Unnamed: 0,Date,Priority,Product_Category,Delivery_distance
0,2023-07-01,Low,Apparel,300
1,2023-07-02,Low,Apparel,300
2,2023-07-03,Low,Apparel,300
3,2023-07-04,Low,Apparel,300
14,2023-07-01,High,Apparel,1800
15,2023-07-02,High,Apparel,1800
16,2023-07-03,High,Apparel,1800
17,2023-07-04,High,Apparel,1800
18,2023-07-05,High,Apparel,1800
19,2023-07-06,High,Apparel,1800



Query: all medium priority orders
Column: Priority, Value: Medium, Action: None


Unnamed: 0,Date,Priority,Product_Category,Delivery_distance
4,2023-07-01,Medium,Cosmetics & Personal Care,500
5,2023-07-02,Medium,Cosmetics & Personal Care,500
6,2023-07-03,Medium,Cosmetics & Personal Care,500
7,2023-07-04,Medium,Cosmetics & Personal Care,500
8,2023-07-05,Medium,Cosmetics & Personal Care,500
39,2023-07-01,Medium,Toys & Games,800
40,2023-07-02,Medium,Toys & Games,800
41,2023-07-03,Medium,Toys & Games,800
42,2023-07-04,Medium,Toys & Games,800
43,2023-07-05,Medium,Toys & Games,800



Query: on date 2023-07-01
Column: Date, Value: 2023-07-01, Action: on date


Unnamed: 0,Date,Priority,Product_Category,Delivery_distance
0,2023-07-01,Low,Apparel,300
4,2023-07-01,Medium,Cosmetics & Personal Care,500
9,2023-07-01,Low,Groceries,500
14,2023-07-01,High,Apparel,1800
28,2023-07-01,High,Groceries,1200
33,2023-07-01,High,Apparel,500
39,2023-07-01,Medium,Toys & Games,800
45,2023-07-01,Medium,Apparel,200
51,2023-07-01,Low,Groceries,7000
58,2023-07-01,High,Groceries,600



Query: before date 2023-08-01
Column: Date, Value: 2023-08-01, Action: before date


Unnamed: 0,Date,Priority,Product_Category,Delivery_distance
0,2023-07-01,Low,Apparel,300
1,2023-07-02,Low,Apparel,300
2,2023-07-03,Low,Apparel,300
3,2023-07-04,Low,Apparel,300
4,2023-07-01,Medium,Cosmetics & Personal Care,500
...,...,...,...,...
95,2023-07-07,Low,Electronics,9900
96,2023-07-08,Low,Electronics,9900
97,2023-07-01,High,Groceries,5700
98,2023-07-02,High,Groceries,5700



Query: after date 2023-07-01
Column: Date, Value: 2023-07-01, Action: after date


Unnamed: 0,Date,Priority,Product_Category,Delivery_distance
1,2023-07-02,Low,Apparel,300
2,2023-07-03,Low,Apparel,300
3,2023-07-04,Low,Apparel,300
5,2023-07-02,Medium,Cosmetics & Personal Care,500
6,2023-07-03,Medium,Cosmetics & Personal Care,500
...,...,...,...,...
94,2023-07-06,Low,Electronics,9900
95,2023-07-07,Low,Electronics,9900
96,2023-07-08,Low,Electronics,9900
98,2023-07-02,High,Groceries,5700



Query: between dates 2023-07-01 and 2023-07-08
Column: Date, Value: (datetime.date(2023, 7, 1), datetime.date(2023, 7, 8)), Action: between dates


Unnamed: 0,Date,Priority,Product_Category,Delivery_distance
0,2023-07-01,Low,Apparel,300
1,2023-07-02,Low,Apparel,300
2,2023-07-03,Low,Apparel,300
3,2023-07-04,Low,Apparel,300
4,2023-07-01,Medium,Cosmetics & Personal Care,500
...,...,...,...,...
95,2023-07-07,Low,Electronics,9900
96,2023-07-08,Low,Electronics,9900
97,2023-07-01,High,Groceries,5700
98,2023-07-02,High,Groceries,5700


In [4]:
token_speeds = []
# Do this 100 times to get an average speed:
for _ in range(100):
    token_speeds += [calculate_token_processing_speed(queries, embedding_model)]

print("Average token processing speed:", np.mean(token_speeds))


Average token processing speed: 299.62297978606495
