In [1]:
# Install necessary libraries
! pip -q install langchain-openai langchain-community weaviate-client tiktoken

In [2]:
# Import requests library to fetch the text file from the URL
import requests
from langchain.document_loaders import TextLoader

In [3]:
# Define the URL of the text file
url = "https://raw.githubusercontent.com/conwayyao/Recipe-Analysis/master/project_synopsis.txt"

In [4]:
# Fetch the content from the URL
response = requests.get(url)

In [5]:
# Print the content fetched from the URL
print(response.text)

# Why this project?

Ironically, I enjoy cooking but I hate using recipes. In my opinion, recipes tend to encourage a slavish devotion to the recipe and divert attention from the more important part of cooking, the physical abilities (or, in my case, the lack thereof) of the chef. Foodies tend to privilege the provenance of obscure ingredients ("coulis of feather saffron hand-picked from a seaside village in Morocco"); I prefer the mundane but practical parts of cooking that get ignored in recipes (like freezing leftover sauce in ice cube trays, or the proper way to peel a mango).

I am curious how much variation exists between dishes, and whether such variation is warranted. Are there really 5000 ways to cook a steak, or are many of these variations superfluous? Some chefs like Heston Blumenthal have taken an experimental approach to answering these questions, systematically and scientifically investigating every property of a dish, its ingredients, and its cooking methods to determin

In [6]:
# Save the fetched content to a local file
with open("project_synopsis.txt","w") as f:
  f.write(response.text)

In [7]:
# Load the text file into a document object
loader = TextLoader("project_synopsis.txt")

In [8]:
# Load the document
document = loader.load()
print(document)

[Document(page_content='# Why this project?\n\nIronically, I enjoy cooking but I hate using recipes. In my opinion, recipes tend to encourage a slavish devotion to the recipe and divert attention from the more important part of cooking, the physical abilities (or, in my case, the lack thereof) of the chef. Foodies tend to privilege the provenance of obscure ingredients ("coulis of feather saffron hand-picked from a seaside village in Morocco"); I prefer the mundane but practical parts of cooking that get ignored in recipes (like freezing leftover sauce in ice cube trays, or the proper way to peel a mango).\n\nI am curious how much variation exists between dishes, and whether such variation is warranted. Are there really 5000 ways to cook a steak, or are many of these variations superfluous? Some chefs like Heston Blumenthal have taken an experimental approach to answering these questions, systematically and scientifically investigating every property of a dish, its ingredients, and its

In [9]:
# Print the content of the first page of the document
document[0].page_content

'# Why this project?\n\nIronically, I enjoy cooking but I hate using recipes. In my opinion, recipes tend to encourage a slavish devotion to the recipe and divert attention from the more important part of cooking, the physical abilities (or, in my case, the lack thereof) of the chef. Foodies tend to privilege the provenance of obscure ingredients ("coulis of feather saffron hand-picked from a seaside village in Morocco"); I prefer the mundane but practical parts of cooking that get ignored in recipes (like freezing leftover sauce in ice cube trays, or the proper way to peel a mango).\n\nI am curious how much variation exists between dishes, and whether such variation is warranted. Are there really 5000 ways to cook a steak, or are many of these variations superfluous? Some chefs like Heston Blumenthal have taken an experimental approach to answering these questions, systematically and scientifically investigating every property of a dish, its ingredients, and its cooking methods to det

In [10]:
# Import CharacterTextSplitter to split the document into chunks
from langchain.text_splitter import CharacterTextSplitter

In [11]:
# Create a CharacterTextSplitter object with specified chunk size and overlap
splitter = CharacterTextSplitter(chunk_size=500,chunk_overlap=50)

# Split the document into smaller text chunks
text_chunks = splitter.split_documents(document)
print(text_chunks)



[Document(page_content='# Why this project?', metadata={'source': 'project_synopsis.txt'}), Document(page_content='Ironically, I enjoy cooking but I hate using recipes. In my opinion, recipes tend to encourage a slavish devotion to the recipe and divert attention from the more important part of cooking, the physical abilities (or, in my case, the lack thereof) of the chef. Foodies tend to privilege the provenance of obscure ingredients ("coulis of feather saffron hand-picked from a seaside village in Morocco"); I prefer the mundane but practical parts of cooking that get ignored in recipes (like freezing leftover sauce in ice cube trays, or the proper way to peel a mango).', metadata={'source': 'project_synopsis.txt'}), Document(page_content='I am curious how much variation exists between dishes, and whether such variation is warranted. Are there really 5000 ways to cook a steak, or are many of these variations superfluous? Some chefs like Heston Blumenthal have taken an experimental a

In [12]:
# Print the first text chunk
text_chunks[0]

Document(page_content='# Why this project?', metadata={'source': 'project_synopsis.txt'})

In [13]:
# Print the content of the first text chunk
text_chunks[0].page_content

'# Why this project?'

In [14]:
# Print the content of the third text chunk
text_chunks[2].page_content

'I am curious how much variation exists between dishes, and whether such variation is warranted. Are there really 5000 ways to cook a steak, or are many of these variations superfluous? Some chefs like Heston Blumenthal have taken an experimental approach to answering these questions, systematically and scientifically investigating every property of a dish, its ingredients, and its cooking methods to determine the "best" way to cook a dish. Since I do not have access to recipe instructions, I can only examine these recipes based on its ingredients. Nevertheless, I hope to use a data-science approach to see if the cooking wisdom of the crowds have arrived at the same answers, and if they match those of traditional experts.'

In [15]:
# Import userdata from google.colab to fetch the OpenAI API key
from google.colab import userdata

# Fetch the OpenAI API key from user data
OPENAI_API_KEY = userdata.get('OPEN_AI_KEY')

In [16]:
# Import necessary modules for creating embeddings and using Weaviate
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Weaviate
from weaviate import Client
from weaviate.embedded import EmbeddedOptions

In [17]:
# Initialize a Weaviate client with embedded options
client = Client(
    embedded_options=EmbeddedOptions()
)

Started /root/.cache/weaviate-embedded: process ID 53728


In [18]:
# Create a vector store using Weaviate from the text chunks and OpenAI embeddings
vector_store = Weaviate.from_documents(client = client,
                                       documents = text_chunks,
                                       embedding = OpenAIEmbeddings(api_key=OPENAI_API_KEY),
                                       by_text=False)

In [19]:
# Create a retriever from the vector store
retriever = vector_store.as_retriever()

In [20]:
# Import ChatPromptTemplate for creating the prompt template
from langchain.prompts import ChatPromptTemplate

In [21]:
# Define the template for the chat prompt
template="""You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
Use ten sentences maximum and keep the answer concise.
Question: {question}
Context: {context}
Answer:
"""

In [22]:
# Create a ChatPromptTemplate object from the defined template
prompt = ChatPromptTemplate.from_template(template)

In [23]:
# Print the chat prompt template
print(prompt)

input_variables=['context', 'question'] messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks.\nUse the following pieces of retrieved context to answer the question.\nIf you don't know the answer, just say that you don't know.\nUse ten sentences maximum and keep the answer concise.\nQuestion: {question}\nContext: {context}\nAnswer:\n"))]


In [24]:
# Import necessary modules for chat models and output parsing
from langchain.chat_models import ChatOpenAI
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

In [25]:
# Initialize a ChatOpenAI model with the specified API key and model name
llm = ChatOpenAI(api_key=OPENAI_API_KEY,model_name="gpt-3.5-turbo")

  warn_deprecated(


In [26]:
# Create an output parser for string output
output_parser = StrOutputParser()

In [27]:
# Define a query to ask about the project
query = "Tell me about this project"

In [28]:
# Create a RAG (retrieval-augmented generation) chain
rag_chain = (
    {"context":retriever,"question":RunnablePassthrough()}|
    prompt|
    llm|
    output_parser
)

In [29]:
# Invoke the RAG chain with the query and print the result
rag_chain.invoke(query)

"This project focuses on utilizing machine learning to predict the cuisine or dish of a recipe based on its ingredients. The project initially used the Spoonacular API but later switched to Yummly's recipe API for better quality and quantity of recipes. The next steps include refining cuisine and dish prediction through experimentation with model hyperparameters. The goal is to improve accuracy by 15% for cuisines prediction and 10% for dish prediction. The project also plans to publish interesting results or infographics online using static images or Bokeh/D3. Additionally, clustering analysis is on the agenda for this project."