In [9]:
import pandas as pd

# Sample data
identifiers = [f'ID_{i+1}' for i in range(20)]
names = [f'Name_{i+1}' for i in range(20)]
childhood_paragraphs = [
    "Name_1 grew up in a small town surrounded by nature. As a child, they loved exploring the woods and building forts with friends. Their parents encouraged creativity, often taking them to local art fairs and teaching them how to paint.",
    "Name_2 had a lively childhood filled with music and laughter. They spent weekends at their grandmother's house, where they learned to play the piano. Family gatherings were filled with sing-alongs and stories from older relatives.",
    "Name_3 was a curious child, always asking questions about the world. They loved reading books and often spent hours at the library, imagining adventures in far-off lands. Their inquisitive nature led them to become a passionate learner.",
    "Name_4 grew up in a bustling city, where they enjoyed the vibrancy of urban life. They loved visiting local parks and museums, fostering a love for history and art from a young age. Friends were always nearby for spontaneous adventures.",
    "Name_5 was an adventurous spirit, often seen climbing trees or riding bikes around the neighborhood. They had a close-knit group of friends, and together they created their own little world filled with imagination and laughter.",
    "Name_6 was raised in a family that valued education. They often accompanied their parents to various cultural events, sparking an early interest in learning about different cultures and traditions from around the world.",
    "Name_7 had a playful childhood filled with sports and outdoor activities. They spent most weekends playing soccer with friends and dreaming of becoming a professional athlete, inspired by local heroes.",
    "Name_8 enjoyed a quiet childhood in the countryside, surrounded by fields and animals. They learned responsibility at an early age by helping on the family farm and developed a deep appreciation for nature.",
    "Name_9 was a bit of a dreamer, often lost in their thoughts. They loved writing stories and would create elaborate tales inspired by their daily life and imagination, much to the delight of their teachers.",
    "Name_10 grew up with a passion for science, often conducting small experiments at home. Their parents supported their curiosity, and they frequently visited science museums and planetariums.",
    "Name_11 had a multicultural upbringing, celebrating various traditions from their parents' backgrounds. This exposure enriched their childhood and fostered a sense of inclusivity and respect for diversity.",
    "Name_12 was always surrounded by books, as their family had a small library at home. This love for reading opened up a world of imagination and creativity, influencing their future pursuits.",
    "Name_13 enjoyed a whimsical childhood, filled with imaginary friends and adventures. They would spend hours creating stories and acting them out, laying the groundwork for a future in storytelling.",
    "Name_14 was passionate about art from a young age, often found sketching or painting. They received encouragement from their family, who recognized their talent and nurtured it through classes and workshops.",
    "Name_15 grew up in a busy household, where laughter and chaos were the norms. They learned to navigate challenges and find joy in everyday moments, which shaped their resilient character.",
    "Name_16 had a nurturing childhood, marked by family gatherings and strong connections. They learned the importance of community and kindness, values that continue to guide them.",
    "Name_17 spent many summers at their uncle's farm, where they learned about hard work and the value of nature. This experience instilled a sense of responsibility and a love for the outdoors.",
    "Name_18 enjoyed a tech-savvy upbringing, growing up alongside the rise of the internet. They were always fascinated by computers and would often tinker with gadgets, paving the way for a future in technology.",
    "Name_19 had a compassionate upbringing, where helping others was emphasized. Volunteering at local charities with their family instilled a sense of empathy and a desire to make a positive impact.",
    "Name_20 had a unique childhood, growing up in a traveling family. They experienced different cultures and landscapes, which broadened their horizons and fostered a deep appreciation for diversity."
]

# Create the DataFrame
df = pd.DataFrame({
    'Identifier': identifiers,
    'Name': names,
    'Childhood Description': childhood_paragraphs
})

#writing to csv
df.to_csv('childhood_memories.csv', index=False)

In [10]:
from langchain_community.document_loaders.csv_loader import CSVLoader

file_path = ('childhood_memories.csv')

loader = CSVLoader(file_path=file_path)
data = loader.load()

for record in data[:2]:
    print(record)

page_content='Identifier: ID_1
Name: Name_1
Childhood Description: Name_1 grew up in a small town surrounded by nature. As a child, they loved exploring the woods and building forts with friends. Their parents encouraged creativity, often taking them to local art fairs and teaching them how to paint.' metadata={'source': 'childhood_memories.csv', 'row': 0}
page_content='Identifier: ID_2
Name: Name_2
Childhood Description: Name_2 had a lively childhood filled with music and laughter. They spent weekends at their grandmother's house, where they learned to play the piano. Family gatherings were filled with sing-alongs and stories from older relatives.' metadata={'source': 'childhood_memories.csv', 'row': 1}


In [18]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=100,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False,
)

texts=text_splitter.split_documents(data)

for text in texts[:20]:
    print(text)


page_content='Identifier: ID_1
Name: Name_1' metadata={'source': 'childhood_memories.csv', 'row': 0}
page_content='Childhood Description: Name_1 grew up in a small town surrounded by nature. As a child, they loved' metadata={'source': 'childhood_memories.csv', 'row': 0}
page_content='a child, they loved exploring the woods and building forts with friends. Their parents encouraged' metadata={'source': 'childhood_memories.csv', 'row': 0}
page_content='parents encouraged creativity, often taking them to local art fairs and teaching them how to paint.' metadata={'source': 'childhood_memories.csv', 'row': 0}
page_content='Identifier: ID_2
Name: Name_2' metadata={'source': 'childhood_memories.csv', 'row': 1}
page_content='Childhood Description: Name_2 had a lively childhood filled with music and laughter. They spent' metadata={'source': 'childhood_memories.csv', 'row': 1}
page_content='They spent weekends at their grandmother's house, where they learned to play the piano. Family' metadata={'

In [19]:
from langchain_core.embeddings import FakeEmbeddings

embeddings = FakeEmbeddings(size=4096)

In [21]:
from langchain_chroma import Chroma

vector_store = Chroma(
    collection_name="my_collection",
    embedding_function=embeddings,
    persist_directory="./chroma_langchain_db",
)

In [23]:
from uuid import uuid4
uuids = [str(uuid4()) for _ in range(len(data))]


vector_store.add_documents(documents=data, ids=uuids)

['e0710d99-8784-4fde-bee9-667cd23e3c8e',
 '6ffae748-8cc1-4bcd-8ef9-2c18819593f2',
 '17620c13-8223-4e40-9859-ac6120deb8ec',
 'ee8496ab-a68b-4e6b-bc33-8d48d5c522ea',
 '29c1cb37-5983-44b2-bb94-6adfeb49ea39',
 '96ffa700-db3a-4b81-a481-f4e23f157ca1',
 'adce377f-1bb6-4255-8d63-a7f897446a98',
 '2e8d2c85-abec-44b4-a2cb-d762a2e86797',
 '727d366b-f36a-478a-8539-d41aa36b46fe',
 '4b97b1f2-2334-481a-9232-88ca5619cb50',
 '326429be-8e54-412c-babc-c98162c64734',
 '65008ff9-6b3b-404f-9f82-65b125b093bf',
 'bb46bac1-f25b-4d59-aa9a-1d6c062162ce',
 '4a46e120-c98c-465a-bf4e-e283ab8386d4',
 '9ead8e98-dd14-43b2-ab8d-a29f3db62316',
 'c2398a2f-408c-462d-bd83-7470d319efe2',
 'f9db2ebd-d140-4846-b025-e68515484a2e',
 '025054f2-6892-4e7f-83f6-ffef8f2c7a73',
 '68dd2f18-0bb3-45ba-a4c3-87a075098dc9',
 '8c9aa074-6867-4128-9b84-81b94528a0e3']