## Setting up the development environment

In [1]:
!pip install cohere tiktoken openai
!pip install langchain

Collecting cohere
  Downloading cohere-4.31-py3-none-any.whl (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tiktoken
  Downloading tiktoken-0.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m26.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting openai
  Downloading openai-0.28.1-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
Collecting backoff<3.0,>=2.0 (from cohere)
  Downloading backoff-2.2.1-py3-none-any.whl (15 kB)
Collecting fastavro==1.8.2 (from cohere)
  Downloading fastavro-1.8.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.7/2.7 MB[0m [31m72.1 MB/s[0m eta [36m0:00:00[0m
Installing collecte

## Loading data

In [2]:
!pip install python-dotenv

Collecting python-dotenv
  Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.0


In [3]:
import openai
import os
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())
openai.api_key  = os.environ["OPENAI_API_KEY"]

In [4]:
import pandas as pd
pd.set_option('display.max_colwidth', None)

file_name = "conference_session_info.csv"

df = pd.read_csv(file_name)
df.shape

(30, 8)

In [5]:
df.head()

Unnamed: 0,Start Date,End Date,Session Name,Session Description,Session Track,Industry,Speaker Name,Room Name
0,07/27/2020 02:00 PM,07/27/2020 03:30 PM,3D Printing for the Non-Tech Minded,This is 3D Printing 101 for those in the makerspace that don’t consider themselves technically astute.,3D Printing and Design,Technology,Jeffery Lowe & Marysa Balma,Room 101
1,07/27/2020 02:00 PM,07/27/2020 03:00 PM,3D Printing with Clay,"Clay has historically been a hands-on medium for over 20,000 years, both to create practical items for day-to-day living, and art for day-to-day beauty. Now with the advent of commercially available 3D clay printers, artists and engineers alike are creating inspirational pieces that were previously unimaginable.",3D Printing and Design,Education,Julie Parker,Room 201
2,07/27/2020 02:00 PM,07/27/2020 03:30 PM,Art in the Age of Automation,"There are some people who don’t believe that art can be “art” if it is made by a machine. The most intriguing and sometimes surprising beautiful art is made by non-sentient robots, based on data and interpretations of that data. So what are artists afraid of?",Ethics and Environment,Technology,Jamill Waters & Jess Abbott,Room 103
3,07/27/2020 02:00 PM,07/27/2020 03:30 PM,Augmented Real(ity) Estate,"Imagine if your company is moving you to a state too far away to spend time looking for a new place to live. Wouldn't it be nice to be able to meet an agent, walk through a house, open doors, go up steps, and check out the neighborhood from the comfort of your couch? Check out the latest innovations in augmented reality in the real estate market, and discuss its economic benefits.",Virtual and Augmented Reality,Technology,Grant Jacobson,Room 104
4,07/27/2020 02:00 PM,07/27/2020 03:00 PM,Hands-On Hacks,Join your fellow makers in learning their favorite hacks in popular maker categories.,Education and Training,Education,David Powlowski,Grand View Hall


### Load a CSV file into a list of Documents

Each document represents one row of the CSV file. Every row is converted into a key/value pair and outputted to a new line in the document’s page_content.

Reference: https://api.python.langchain.com/en/latest/document_loaders/langchain.document_loaders.csv_loader.CSVLoader.html

In [6]:
from langchain.document_loaders.csv_loader import CSVLoader

loader = CSVLoader(file_path=file_name)
docs = loader.load()

In [7]:
len(docs)

30

In [8]:
print(docs[0].page_content[:500])

Start Date: 07/27/2020 02:00 PM
End Date: 07/27/2020 03:30 PM
Session Name: 3D Printing for the Non-Tech Minded
Session Description: This is 3D Printing 101 for those in the makerspace that don’t consider themselves technically astute.
Session Track: 3D Printing and Design
Industry: Technology
Speaker Name: Jeffery Lowe & Marysa Balma
Room Name: Room 101


### Split documents

In [9]:
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter

chunk_size = 512
chunk_overlap = 32

c_text_splitter = CharacterTextSplitter(
    separator=" ",
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap,
    length_function=len
)

r_text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = chunk_size,
    chunk_overlap  = chunk_overlap,
    length_function = len,
    add_start_index = True,
)

In [10]:
pages = c_text_splitter.split_documents(docs)

print(pages[0])
print(pages[1])

page_content='Start Date: 07/27/2020 02:00 PM\nEnd Date: 07/27/2020 03:30 PM\nSession Name: 3D Printing for the Non-Tech Minded\nSession Description: This is 3D Printing 101 for those in the makerspace that don’t consider themselves technically astute.\nSession Track: 3D Printing and Design\nIndustry: Technology\nSpeaker Name: Jeffery Lowe & Marysa Balma\nRoom Name: Room 101' metadata={'source': 'conference_session_info.csv', 'row': 0}
page_content='Start Date: 07/27/2020 02:00 PM\nEnd Date: 07/27/2020 03:00 PM\nSession Name: 3D Printing with Clay\nSession Description: Clay has historically been a hands-on medium for over 20,000 years, both to create practical items for day-to-day living, and art for day-to-day beauty. Now with the advent of commercially available 3D clay printers, artists and engineers alike are creating inspirational pieces that were previously unimaginable.\nSession Track: 3D Printing and Design\nIndustry: Education\nSpeaker Name: Julie' metadata={'source': 'confere

In [11]:
pages = r_text_splitter.split_documents(docs)

print(pages[0])
print(pages[1])

page_content='Start Date: 07/27/2020 02:00 PM\nEnd Date: 07/27/2020 03:30 PM\nSession Name: 3D Printing for the Non-Tech Minded\nSession Description: This is 3D Printing 101 for those in the makerspace that don’t consider themselves technically astute.\nSession Track: 3D Printing and Design\nIndustry: Technology\nSpeaker Name: Jeffery Lowe & Marysa Balma\nRoom Name: Room 101' metadata={'source': 'conference_session_info.csv', 'row': 0, 'start_index': 0}
page_content='Start Date: 07/27/2020 02:00 PM\nEnd Date: 07/27/2020 03:00 PM\nSession Name: 3D Printing with Clay\nSession Description: Clay has historically been a hands-on medium for over 20,000 years, both to create practical items for day-to-day living, and art for day-to-day beauty. Now with the advent of commercially available 3D clay printers, artists and engineers alike are creating inspirational pieces that were previously unimaginable.\nSession Track: 3D Printing and Design\nIndustry: Education' metadata={'source': 'conference

In [12]:
len(docs)

30

In [13]:
len(pages)

46

## Vectorstore and embedding

In [None]:
!pip install chromadb

In [15]:
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings

embedding = OpenAIEmbeddings()

In [16]:
persist_directory = 'persist_chroma'

In [17]:
vectordb = Chroma.from_documents(
    documents=pages,
    embedding=embedding,
    persist_directory=persist_directory
)

In [18]:
print(vectordb._collection.count())

46


In [19]:
len(pages)

46

In [20]:
vectordb.persist()

In [21]:
question = "which sessions are about augmented reality?"

In [22]:
docs = vectordb.similarity_search(question, k=3)
docs

[Document(page_content="Start Date: 07/27/2020 02:00 PM\nEnd Date: 07/27/2020 03:30 PM\nSession Name: Augmented Real(ity) Estate\nSession Description: Imagine if your company is moving you to a state too far away to spend time looking for a new place to live. Wouldn't it be nice to be able to meet an agent, walk through a house, open doors, go up steps, and check out the neighborhood from the comfort of your couch? Check out the latest innovations in augmented reality in the real estate market, and discuss its economic benefits.", metadata={'row': 3, 'source': 'conference_session_info.csv', 'start_index': 0}),
 Document(page_content='Session Track: Virtual and Augmented Reality\nIndustry: Technology\nSpeaker Name: Grant Jacobson\nRoom Name: Room 104', metadata={'row': 3, 'source': 'conference_session_info.csv', 'start_index': 508}),
 Document(page_content='Session Track: Virtual and Augmented Reality\nIndustry: Technology\nSpeaker Name: Teena Judkins\nRoom Name: Room 200', metadata={'r