In [1]:
from searchlite.document import Document
import pandas as pd
import os

In [2]:
df = pd.read_csv(os.path.join(os.getcwd(), "../data/synthetic_data.csv"), index_col = 0)

In [3]:
texts = df["text"]
metadata = df[["id", "category"]].to_dict(orient = "records")

In [4]:
texts

0     Experience unparalleled sound quality with the...
1     In a world ravaged by climate change, a group ...
2     The city council approved the new public trans...
3     Preheat the oven to 375°F. Mix flour, sugar, a...
4     Discover the hidden gems of Kyoto, from tranqu...
5     This study investigates the effects of micropl...
6     An evocative tale of love and loss, 'The Silen...
7     Looking for a skilled software engineer profic...
8     To reset your device, hold the power button fo...
9     The Berlin Wall, constructed in 1961, symboliz...
10    The blender exceeded my expectations with its ...
11    Regular cardio workouts not only improve heart...
12    This agreement is entered into by and between ...
13    Q: Does this jacket have waterproof capabiliti...
14    Photosynthesis is the process by which green p...
Name: text, dtype: object

In [5]:
metadata

[{'id': 1, 'category': 'Product Description'},
 {'id': 2, 'category': 'Movie Synopsis'},
 {'id': 3, 'category': 'News Article'},
 {'id': 4, 'category': 'Recipe'},
 {'id': 5, 'category': 'Travel Guide'},
 {'id': 6, 'category': 'Scientific Abstract'},
 {'id': 7, 'category': 'Book Review'},
 {'id': 8, 'category': 'Job Posting'},
 {'id': 9, 'category': 'User Manual'},
 {'id': 10, 'category': 'Historical Event'},
 {'id': 11, 'category': 'Customer Review'},
 {'id': 12, 'category': 'Health & Fitness'},
 {'id': 13, 'category': 'Legal Document'},
 {'id': 14, 'category': 'E-commerce FAQ'},
 {'id': 15, 'category': 'Educational Content'}]

In [6]:
doc = Document(texts = texts, metadata = metadata)

In [7]:
doc.texts

['Experience unparalleled sound quality with the EchoSphere wireless earbuds, featuring noise cancellation, 12-hour battery life, and an ergonomic design perfect for workouts.',
 'In a world ravaged by climate change, a group of unlikely heroes embarks on a perilous journey to save humanity from extinction.',
 'The city council approved the new public transportation plan yesterday, aiming to reduce traffic congestion and lower carbon emissions by 2030.',
 'Preheat the oven to 375°F. Mix flour, sugar, and eggs in a bowl, then fold in fresh blueberries. Bake for 25 minutes or until golden brown.',
 'Discover the hidden gems of Kyoto, from tranquil temples to bustling markets, and experience authentic Japanese culture like never before.',
 'This study investigates the effects of microplastic pollution on marine ecosystems, revealing significant impacts on coral reef health and biodiversity.',
 "An evocative tale of love and loss, 'The Silent Horizon' beautifully captures the complexities 

In [8]:
doc_from_csv = Document.from_csv(os.path.join(os.getcwd(), "../data/synthetic_data.csv"), text_columns = "text", 
                  metadata_columns = ["id", "category"])

In [9]:
doc_from_csv.texts

['Experience unparalleled sound quality with the EchoSphere wireless earbuds, featuring noise cancellation, 12-hour battery life, and an ergonomic design perfect for workouts.',
 'In a world ravaged by climate change, a group of unlikely heroes embarks on a perilous journey to save humanity from extinction.',
 'The city council approved the new public transportation plan yesterday, aiming to reduce traffic congestion and lower carbon emissions by 2030.',
 'Preheat the oven to 375°F. Mix flour, sugar, and eggs in a bowl, then fold in fresh blueberries. Bake for 25 minutes or until golden brown.',
 'Discover the hidden gems of Kyoto, from tranquil temples to bustling markets, and experience authentic Japanese culture like never before.',
 'This study investigates the effects of microplastic pollution on marine ecosystems, revealing significant impacts on coral reef health and biodiversity.',
 "An evocative tale of love and loss, 'The Silent Horizon' beautifully captures the complexities 

In [10]:
doc_from_csv.metadata

[{'id': 1, 'category': 'Product Description'},
 {'id': 2, 'category': 'Movie Synopsis'},
 {'id': 3, 'category': 'News Article'},
 {'id': 4, 'category': 'Recipe'},
 {'id': 5, 'category': 'Travel Guide'},
 {'id': 6, 'category': 'Scientific Abstract'},
 {'id': 7, 'category': 'Book Review'},
 {'id': 8, 'category': 'Job Posting'},
 {'id': 9, 'category': 'User Manual'},
 {'id': 10, 'category': 'Historical Event'},
 {'id': 11, 'category': 'Customer Review'},
 {'id': 12, 'category': 'Health & Fitness'},
 {'id': 13, 'category': 'Legal Document'},
 {'id': 14, 'category': 'E-commerce FAQ'},
 {'id': 15, 'category': 'Educational Content'}]

In [11]:
set(doc.texts) == set(doc_from_csv.texts)

True

In [12]:
doc.metadata == doc_from_csv.metadata

True

In [13]:
doc.embed()

In [14]:
doc.query("wireless earphone")

[{'id': 1,
  'category': 'Product Description',
  'text': 'Experience unparalleled sound quality with the EchoSphere wireless earbuds, featuring noise cancellation, 12-hour battery life, and an ergonomic design perfect for workouts.',
  'similarity score': 0.22940706653595777},
 {'id': 15,
  'category': 'Educational Content',
  'text': 'Photosynthesis is the process by which green plants convert sunlight into chemical energy, producing oxygen as a byproduct.',
  'similarity score': 0.0},
 {'id': 14,
  'category': 'E-commerce FAQ',
  'text': 'Q: Does this jacket have waterproof capabilities? A: Yes, it is made with breathable waterproof fabric suitable for heavy rain.',
  'similarity score': 0.0}]

In [15]:
doc_from_csv.embed()

In [16]:
doc_from_csv.query("wireless earphone")

[{'id': 1,
  'category': 'Product Description',
  'text': 'Experience unparalleled sound quality with the EchoSphere wireless earbuds, featuring noise cancellation, 12-hour battery life, and an ergonomic design perfect for workouts.',
  'similarity score': 0.22940706653595777},
 {'id': 15,
  'category': 'Educational Content',
  'text': 'Photosynthesis is the process by which green plants convert sunlight into chemical energy, producing oxygen as a byproduct.',
  'similarity score': 0.0},
 {'id': 14,
  'category': 'E-commerce FAQ',
  'text': 'Q: Does this jacket have waterproof capabilities? A: Yes, it is made with breathable waterproof fabric suitable for heavy rain.',
  'similarity score': 0.0}]

In [17]:
doc.metadata

[{'id': 1, 'category': 'Product Description'},
 {'id': 2, 'category': 'Movie Synopsis'},
 {'id': 3, 'category': 'News Article'},
 {'id': 4, 'category': 'Recipe'},
 {'id': 5, 'category': 'Travel Guide'},
 {'id': 6, 'category': 'Scientific Abstract'},
 {'id': 7, 'category': 'Book Review'},
 {'id': 8, 'category': 'Job Posting'},
 {'id': 9, 'category': 'User Manual'},
 {'id': 10, 'category': 'Historical Event'},
 {'id': 11, 'category': 'Customer Review'},
 {'id': 12, 'category': 'Health & Fitness'},
 {'id': 13, 'category': 'Legal Document'},
 {'id': 14, 'category': 'E-commerce FAQ'},
 {'id': 15, 'category': 'Educational Content'}]

In [18]:
doc_from_csv.metadata

[{'id': 1, 'category': 'Product Description'},
 {'id': 2, 'category': 'Movie Synopsis'},
 {'id': 3, 'category': 'News Article'},
 {'id': 4, 'category': 'Recipe'},
 {'id': 5, 'category': 'Travel Guide'},
 {'id': 6, 'category': 'Scientific Abstract'},
 {'id': 7, 'category': 'Book Review'},
 {'id': 8, 'category': 'Job Posting'},
 {'id': 9, 'category': 'User Manual'},
 {'id': 10, 'category': 'Historical Event'},
 {'id': 11, 'category': 'Customer Review'},
 {'id': 12, 'category': 'Health & Fitness'},
 {'id': 13, 'category': 'Legal Document'},
 {'id': 14, 'category': 'E-commerce FAQ'},
 {'id': 15, 'category': 'Educational Content'}]