In [2]:
# Import the data analysis library
import os
from getpass import getpass
from operator import itemgetter
import pandas as pd
from tqdm import tqdm

# All Elasticsearch based
from elasticsearch import Elasticsearch, exceptions

# Langchain Imports 
from langchain.text_splitter import CharacterTextSplitter
from langchain_elasticsearch import ElasticsearchStore
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.runnable import RunnableMap
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain.schema import format_document
from langchain.chat_models import ChatOpenAI
from langchain.schema.output_parser import StrOutputParser

In [None]:
# IDs and Passwords
ELASTIC_CLOUD_ID = "ab9fc665e06642b4b2c99c5f9898e875:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlvJDg3MzIwMzg0YWMzNzRjNTJiYmQ3YWFkYjI4NWUxNjExJDIxMDFiZTgzNjg5ODQ0MmFhMzg4ZTMzYzU5MTdmOTlh"
ELASTIC_API_KEY = "WUoyVkpKTUJ0ZTlkVlhESWtSVDQ6eGUwY1dzcmlSbGE4bW5iY2gxQl9CZw=="
ELASTIC_PASSWORD = "GjcdSvHrx8Mh1Qy7ZB93m3DX"
OPENAI_API_KEY = "sk-JDd8XASldxnf4JlfzqcFX6IZkRp88UQSOuS6MqsBgLT3BlbkFJoaFWr04LuZoWZQlTBdTzxzbQ7tgFTD2iXbICVfLxcA"

In [5]:
# Initialize the OpenAI API key 
os.environ["OPENAI_API_KEY"] = getpass("OpenAI API Key :")

In [6]:
# Initialize Elasticsearch client
es = Elasticsearch(
    cloud_id=ELASTIC_CLOUD_ID,
    basic_auth=("elastic", ELASTIC_PASSWORD)
)

# Test the connection
if es.ping():
    print("Connected to ElasticCloud!")
else:
    print("Connection failed. Check your credentials.")

Connected to ElasticCloud!


In [None]:
# Delete all indexes. Destructive operation
# try:
#     # Delete all indexes using a wildcard
#     response = es.indices.delete(index="*")
#     print("All indexes deleted successfully:", response)
# except Exception as e:
#     print(f"An error occurred while deleting all indexes: {e}")

In [8]:
# A new index name
elastic_index_name = "openai_mit_llama_index"

# Check if the index already exists
if not es.indices.exists(index=elastic_index_name):
    es.indices.create(index=elastic_index_name)
    print(f"Index '{elastic_index_name}' created successfully.")
else:
    print(f"Index '{elastic_index_name}' already exists.")

Index 'openai_mit_llama_index' already exists.


In [180]:
# Load the dataset 
FILEPATH = "mit_news_94_23.csv"
mit_news = pd.read_csv(FILEPATH, index_col=0)

In [181]:
# Check the data types of all columns before making appropriate changes
mit_news.dtypes

Published Date    object
Author            object
Source            object
Article Header    object
Sub_Headings      object
Article Body      object
Url               object
dtype: object

In [182]:
# Rename the columns first 
mit_news = mit_news.rename(columns={
    "Published Date": "date",
    "Author": "author",
    "Source": "source",
    "Article Header": "heading",
    "Sub_Headings": "description",
    "Article Body": "content",
    "Url": "url"
})


In [183]:
mit_news.head()

Unnamed: 0,date,author,source,heading,description,content,url
0,"July 7, 2023",Adam Zewe,MIT News Office,Learning the language of molecules to predict ...,This AI system only needs a small amount of da...,['Discovering new materials and drugs typicall...,https://news.mit.edu/2023/learning-language-mo...
1,"July 6, 2023",Alex Ouyang,Abdul Latif Jameel Clinic for Machine Learning...,MIT scientists build a system that can generat...,"BioAutoMATED, an open-source, automated machin...",['Is it possible to build machine-learning mod...,https://news.mit.edu/2023/bioautomated-open-so...
2,"June 30, 2023",Jennifer Michalowski,McGovern Institute for Brain Research,"When computer vision works more like a brain, ...",Training artificial neural networks with data ...,"['From cameras to self-driving cars, many of t...",https://news.mit.edu/2023/when-computer-vision...
3,"June 30, 2023",Mary Beth Gallagher,School of Engineering,Educating national security leaders on artific...,"Experts from MIT’s School of Engineering, Schw...",['Understanding artificial intelligence and ho...,https://news.mit.edu/2023/educating-national-s...
4,"June 30, 2023",Adam Zewe,MIT News Office,Researchers teach an AI to write better chart ...,A new dataset can help scientists develop auto...,['Chart captions that explain complex trends a...,https://news.mit.edu/2023/researchers-chart-ca...


In [184]:
# Convert "Published Date" column to datetime
mit_news["date"] = pd.to_datetime(
    mit_news["date"],  # Column to convert
    errors="coerce",            # Invalid parsing will be set as NaT
    format="%B %d, %Y"          # Explicit format: "Month day, Year"
)

In [185]:
# Check for null values in all columns 
mit_news.isnull().sum()

date             0
author         170
source         186
heading          0
description    148
content          0
url              0
dtype: int64

In [186]:
# Fill missing values in all columns with "Missing"
mit_news = mit_news.fillna("Missing")

# Verify that missing values are replaced
print(mit_news.isna().sum())  # Check for remaining missing values
print(mit_news.head())  # Preview the updated DataFrame

date           0
author         0
source         0
heading        0
description    0
content        0
url            0
dtype: int64
        date                author  \
0 2023-07-07             Adam Zewe   
1 2023-07-06           Alex Ouyang   
2 2023-06-30  Jennifer Michalowski   
3 2023-06-30   Mary Beth Gallagher   
4 2023-06-30             Adam Zewe   

                                              source  \
0                                    MIT News Office   
1  Abdul Latif Jameel Clinic for Machine Learning...   
2              McGovern Institute for Brain Research   
3                              School of Engineering   
4                                    MIT News Office   

                                             heading  \
0  Learning the language of molecules to predict ...   
1  MIT scientists build a system that can generat...   
2  When computer vision works more like a brain, ...   
3  Educating national security leaders on artific...   
4  Researchers teach a

In [139]:
# Also clean the content variable 
mit_news["content"] = mit_news["content"].apply(lambda text: text[1: -1])

In [187]:
mit_news["content"] = mit_news["content"].str.replace(r'^[^\w]+|[^\w]+$', '', regex=True)

In [188]:
mit_news["content"][1013][-1]

'm'

In [189]:
mit_news.dtypes

date           datetime64[ns]
author                 object
source                 object
heading                object
description            object
content                object
url                    object
dtype: object

In [190]:
mit_news["author"][12]

'Missing'

In [204]:
# Prepare metadata and content
metadata = []
content = []

for _, row in tqdm(mit_news.iterrows()):
    content.append(row["content"])  # Store article body as content
    metadata.append({"date": row["date"], 
                     "author": row["author"], 
                     "source": row["source"], 
                     "heading": row["heading"], 
                     "description": row["description"]})  # Store heading as metadata

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.create_documents(content, metadatas=metadata)

1018it [00:00, 34585.61it/s]


In [205]:
# Delete everything from the database before reinsertion
try:
    # Delete all documents from the index using a match_all query
    response = es.delete_by_query(
        index=elastic_index_name,
        body={"query": {"match_all": {}}},
        conflicts="proceed"  # Continue even if there are version conflicts
    )
    print(f"All documents deleted from index '{elastic_index_name}': {response}")
except Exception as e:
    print(f"An error occurred: {e}")

All documents deleted from index 'openai_mit_llama_index': {'took': 1, 'timed_out': False, 'total': 0, 'deleted': 0, 'batches': 0, 'version_conflicts': 0, 'noops': 0, 'retries': {'bulk': 0, 'search': 0}, 'throttled_millis': 0, 'requests_per_second': -1.0, 'throttled_until_millis': 0, 'failures': []}


In [9]:
# Check for OpenAI embeddings 
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
# Index documents into Elasticsearch (or any vector store)
vector_store = ElasticsearchStore(
    es_cloud_id=ELASTIC_CLOUD_ID,
    es_api_key=ELASTIC_API_KEY,
    index_name=elastic_index_name,
    embedding=embeddings,
)

  embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)


In [207]:
query = "What does the article talk about J-WAFS Grand Challenge?"

In [11]:
query = "What does the article talk about J-WAFS Grand Challenge?"
results = vector_store.similarity_search(query=query, k=1)
print(results)

[Document(metadata={'date': '2023-05-10T00:00:00', 'author': 'Carolyn Blais', 'source': 'Abdul Latif Jameel Water and Food Systems Lab', 'heading': 'Inaugural J-WAFS Grand Challenge aims to develop enhanced crop variants and move them from lab to land', 'description': 'Matt Shoulders will lead an interdisciplinary team to improve RuBisCO — the photosynthesis enzyme thought to be the holy grail for improving agricultural yield.'}, page_content="According to MIT’s charter, established in 1861, part of the Institute’s mission is to advance the “development and practical application of science in connection with arts, agriculture, manufactures, and commerce.” Today, the Abdul Latif Jameel Water and Food Systems Lab (J-WAFS) is one of the driving forces behind water and food-related research on campus, much of which relates to agriculture. In 2022, J-WAFS established the Water and Food Grand Challenge Grant to inspire MIT researchers to work toward a water-secure and food-secure future for 

In [None]:
# Iterate through each document
# for i, doc in tqdm(enumerate(docs)):
#     try:
#         # Attempt to index the document
#         vector_store.from_documents(
#             documents=[doc],  
#             index_name=elastic_index_name,
#             es_cloud_id=ELASTIC_CLOUD_ID,
#             es_api_key=ELASTIC_API_KEY
#         )
#     except Exception as e:
#         # Log the error and skip the document
#         print(len(doc.page_content))
#         print(f"Error indexing document {i}: {e}")

986it [07:06,  2.31it/s]


In [208]:
# The way of indexing described upward doesn't work. So what to do? 
batch_size = 50
for i in tqdm(range(0, len(docs), batch_size)):
  batch = docs[i:i+batch_size]
  vector_store.from_documents(batch,
                        embeddings,
                        index_name=elastic_index_name,
                        es_cloud_id=ELASTIC_CLOUD_ID,
                        es_api_key=ELASTIC_API_KEY
  )

100%|██████████| 20/20 [01:05<00:00,  3.28s/it]


In [12]:
from typing import List, Any
from langchain.schema import BaseRetriever, Document
from pydantic import Field

class HeadingBasedRetriever(BaseRetriever):
  vector_store: Any  # Define vector_store as a field (type can be adjusted based on your implementation)
  top_k: int = Field(default=1)  # Define top_k with a default value

  def _get_relevant_documents(self, query: str) -> List[Document]:
    # Retrieve top-k documents based on heading similarity
    results = self.vector_store.similarity_search(
        query=query,
        k=self.top_k,
        filter=None  # Optional: Add filters if needed
    )

    # Extract and return the article body of the top result
    if results:
        top_result = results[0]
        return [Document(page_content=top_result.page_content, metadata=top_result.metadata)]

    return []  # Return an empty list if no results are found

In [13]:
# Initialize retriever
retriever = HeadingBasedRetriever(vector_store=vector_store)

In [14]:
context = retriever._get_relevant_documents("What does the article talk about J-WAFS Grand Challenge?")

In [15]:
context

[Document(metadata={'date': '2023-05-10T00:00:00', 'author': 'Carolyn Blais', 'source': 'Abdul Latif Jameel Water and Food Systems Lab', 'heading': 'Inaugural J-WAFS Grand Challenge aims to develop enhanced crop variants and move them from lab to land', 'description': 'Matt Shoulders will lead an interdisciplinary team to improve RuBisCO — the photosynthesis enzyme thought to be the holy grail for improving agricultural yield.'}, page_content="According to MIT’s charter, established in 1861, part of the Institute’s mission is to advance the “development and practical application of science in connection with arts, agriculture, manufactures, and commerce.” Today, the Abdul Latif Jameel Water and Food Systems Lab (J-WAFS) is one of the driving forces behind water and food-related research on campus, much of which relates to agriculture. In 2022, J-WAFS established the Water and Food Grand Challenge Grant to inspire MIT researchers to work toward a water-secure and food-secure future for 

In [16]:
query

'What does the article talk about J-WAFS Grand Challenge?'

In [17]:
# Initialize the model 
llm_chat = ChatOpenAI(model="gpt-4o-mini")

  llm_chat = ChatOpenAI(model="gpt-4o-mini")


In [18]:
llm_chat.invoke("How are you doing today?").content

"I'm just a program, so I don't have feelings, but I'm here and ready to help you! How can I assist you today?"

In [19]:
from langchain_core.prompts import MessagesPlaceholder

In [250]:
query

'What does the article talk about J-WAFS Grand Challenge?'

In [251]:
# ANSWER_PROMPT = ChatPromptTemplate.from_template(
#     """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Be verbose and educational in your response as possible.

#     context: {context}
#     Question: "{question}"
#     Answer:
#     """
# )
SYSTEM_PROMPT = (
        """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Be verbose and educational in your response as possible.
            context: {context}
            Answer:
        """
)

ANSWER_PROMPT = ChatPromptTemplate.from_messages(
    [
       ("system", SYSTEM_PROMPT), 
       ("human", "{question}")
    ]
)

chain = (
    {"context": retriever, "question": RunnablePassthrough()} # Here the "retriever" context refers to the entire document data being passed to the LLM query for context
    | ANSWER_PROMPT
    | llm_chat
    | StrOutputParser()
)

ans = chain.invoke(query) # The "{question}" parameter is the user question being passed as an input

print("---- Answer ----")
print(ans)

---- Answer ----
The article discusses the inaugural J-WAFS Grand Challenge, which is a significant initiative launched by the Abdul Latif Jameel Water and Food Systems Lab (J-WAFS) at MIT. The primary aim of this challenge is to inspire researchers to work towards a water-secure and food-secure future, particularly in the context of a changing planet.

The Grand Challenge Grant was awarded to a team of researchers led by Professor Matt Shoulders and research scientist Robert Wilson from the Department of Chemistry. They plan to tackle a long-standing issue in crop biology: improving the efficiency of photosynthesis, specifically by enhancing the performance of the enzyme RuBisCO (Ribulose-1,5-Bisphosphate Carboxylase/Oxygenase), which is essential for converting carbon dioxide into energy-rich molecules during photosynthesis.

The team will receive $1.5 million over three years to conduct a multistage research project that employs cutting-edge innovations in synthetic and computationa

In [20]:
from langchain.chains import create_history_aware_retriever

contextualize_q_system_prompt = (
    """
        Given a chat history and the latest user question which might reference context in the chat history as well as in the external retrieved context given here,
        formulate a standalone question which can be understood without the chat history. Do NOT answer the question,
        just reformulate it if needed and otherwise return it as is. Be verbose and yet educational in your response.  
    """
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

- Assemble the chain
- This chain prepends a rephrasing of the input query to our retriever, so that the retrieval incorporates the context of the conversation 
- Now I can build my complete QA chain. The chain prepends a rephrasing of the input query to the retriever, so that the retrieval incorporates the context of the conversation.
- Now let's build the entire conversation chain
- 

In [21]:

history_aware_retriever = create_history_aware_retriever(
    llm_chat, retriever, contextualize_q_prompt
)

In [22]:
system_prompt = (
    """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Be verbose and educational in your response as possible.
            context: {context}
            Answer:
    """
)

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [23]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

In [24]:
question_answer_chain = create_stuff_documents_chain(llm_chat, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [314]:
query

'What does the article talk about J-WAFS Grand Challenge?'

In [25]:
from langchain.memory import ChatMessageHistory

In [41]:
chat_history = []

In [42]:
chat_history.append({"role": "assistant", "content": query})
response = rag_chain.invoke({"input": query, "chat_history": chat_history})
# chat_history.append(response["answer"])
chat_history.append({"role": "assistant", "content": response["answer"]})

In [43]:
chat_history

[{'role': 'assistant',
  'content': 'What does the article talk about J-WAFS Grand Challenge?'},
 {'role': 'assistant',
  'content': "The article discusses the J-WAFS (Abdul Latif Jameel Water and Food Systems Lab) Grand Challenge, which is a research initiative aimed at addressing critical issues related to water and food security in the context of a changing global environment. Established in 2022, the Grand Challenge Grant encourages MIT researchers to collaborate across various disciplines to develop innovative solutions for these pressing challenges.\n\nThe program began with a call for proposals, which attracted a significant response from MIT researchers across 18 different departments, labs, and centers. After a series of workshops and reviews, a team led by Professor Matt Shoulders and research scientist Robert Wilson from the Department of Chemistry was awarded the inaugural grant of $1.5 million over three years. Their project focuses on improving the efficiency of photosynt

In [44]:
query_1 = "Who were the first awardees of the challenge?"

response_1 = rag_chain.invoke({"input": query_1, "chat_history": chat_history})
response_1

{'input': 'Who were the first awardees of the challenge?',
 'chat_history': [{'role': 'assistant',
   'content': 'What does the article talk about J-WAFS Grand Challenge?'},
  {'role': 'assistant',
   'content': "The article discusses the J-WAFS (Abdul Latif Jameel Water and Food Systems Lab) Grand Challenge, which is a research initiative aimed at addressing critical issues related to water and food security in the context of a changing global environment. Established in 2022, the Grand Challenge Grant encourages MIT researchers to collaborate across various disciplines to develop innovative solutions for these pressing challenges.\n\nThe program began with a call for proposals, which attracted a significant response from MIT researchers across 18 different departments, labs, and centers. After a series of workshops and reviews, a team led by Professor Matt Shoulders and research scientist Robert Wilson from the Department of Chemistry was awarded the inaugural grant of $1.5 million o

In [45]:
chat_history.append({"role": "assistant", "content": query_1})
chat_history.append({"role": "user", "content": response_1["answer"]})

In [46]:
query_2 = "What was your last reply?"
chat_history.append({"role": "assistant", "content": query_2})
response_2 = rag_chain.invoke({"input": query_2, "chat_history": chat_history})
response_2

{'input': 'What was your last reply?',
 'chat_history': [{'role': 'assistant',
   'content': 'What does the article talk about J-WAFS Grand Challenge?'},
  {'role': 'assistant',
   'content': "The article discusses the J-WAFS (Abdul Latif Jameel Water and Food Systems Lab) Grand Challenge, which is a research initiative aimed at addressing critical issues related to water and food security in the context of a changing global environment. Established in 2022, the Grand Challenge Grant encourages MIT researchers to collaborate across various disciplines to develop innovative solutions for these pressing challenges.\n\nThe program began with a call for proposals, which attracted a significant response from MIT researchers across 18 different departments, labs, and centers. After a series of workshops and reviews, a team led by Professor Matt Shoulders and research scientist Robert Wilson from the Department of Chemistry was awarded the inaugural grant of $1.5 million over three years. The

In [47]:
chat_history.append({"role": "user", "content": response_2["answer"]})
query_3 = "What was my most initial question about?"
chat_history.append({"role": "assistant", "content": query_3})
response_3 = rag_chain.invoke({"input": query_3, "chat_history": chat_history})
response_3

{'input': 'What was my most initial question about?',
 'chat_history': [{'role': 'assistant',
   'content': 'What does the article talk about J-WAFS Grand Challenge?'},
  {'role': 'assistant',
   'content': "The article discusses the J-WAFS (Abdul Latif Jameel Water and Food Systems Lab) Grand Challenge, which is a research initiative aimed at addressing critical issues related to water and food security in the context of a changing global environment. Established in 2022, the Grand Challenge Grant encourages MIT researchers to collaborate across various disciplines to develop innovative solutions for these pressing challenges.\n\nThe program began with a call for proposals, which attracted a significant response from MIT researchers across 18 different departments, labs, and centers. After a series of workshops and reviews, a team led by Professor Matt Shoulders and research scientist Robert Wilson from the Department of Chemistry was awarded the inaugural grant of $1.5 million over t

In [49]:
chat_history.append({"role": "assistant", "content": response_3["answer"]})
query_4 = "Are you able to retrieve information from past conversations over here and frame your responses?"
chat_history.append({"role": "user", "content": query_4})
response_4 = rag_chain.invoke({"input": query_4, "chat_history": chat_history})
response_4["answer"]

'Yes, I can reference information from our current conversation to provide context and formulate responses. However, I do not have the capability to recall previous conversations once the session ends. My responses are based solely on the context provided within the ongoing interaction. If you have specific questions or need clarification on previous points discussed in this session, feel free to ask!'

In [None]:
# qa_prompt = ChatPromptTemplate.from_messages(
#     [
#         ("system", SYSTEM_PROMPT),
#         MessagesPlaceholder("chat_history"),
#         ("human", "{input}"),
#     ]
# )

In [266]:
query

'What does the article talk about J-WAFS Grand Challenge?'

In [267]:
query

'What does the article talk about J-WAFS Grand Challenge?'

In [268]:
# chain = (
#     {"context": retriever, "question": RunnablePassthrough()} # Here the "retriever" context refers to the entire document data being passed to the LLM query for context
#     | contextualize_q_prompt
#     | llm_chat
#     | StrOutputParser()
# )
# chat_history = []
# ans = chain.invoke({"question": query, "chat_history": chat_history}) # The "{question}" parameter is the user question being passed as an input

# print("---- Answer ----")
# print(ans)

In [None]:
# ANSWER_PROMPT = ChatPromptTemplate.from_template(
#     """
# You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Be as verbose and educational in your response as possible.
# Each passage has a SOURCE which is the title of the document. When answering, cite source name of the passages you are answering from below the answer, on a new line, with a prefix of "SOURCE:".

# context:
# {context}

# Question: {question}
# Answer:
# """
# )

# DOCUMENT_PROMPT = PromptTemplate.from_template(
#     """
# ---
# SOURCE: {heading}
# {page_content}
# ---
# """
# )

# CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(
#     """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

# Chat History:
# {chat_history}
# Follow Up Input: {question}
# """
# )

# standalone_question = RunnableMap(
#     standalone_question=RunnablePassthrough.assign(
#         chat_history=lambda x: _format_chat_history(x["chat_history"])
#     )
#     | CONDENSE_QUESTION_PROMPT
#     | llm_chat
#     | StrOutputParser(),
# )


# def _format_chat_history(chat_history) -> str:
#     buffer = ""
#     for dialogue_turn in chat_history:
#         human = "Human: " + dialogue_turn[0]
#         ai = "Assistant: " + dialogue_turn[1]
#         buffer += "\n" + "\n".join([human, ai])
#     return buffer


# def _combine_documents(
#     docs, document_prompt=DOCUMENT_PROMPT, document_separator="\n\n"
# ):
#     doc_strings = [format_document(doc, document_prompt) for doc in docs]
#     return document_separator.join(doc_strings)


# retrieved_documents = RunnableMap(
#     docs=itemgetter("standalone_question") | retriever,
#     question=itemgetter("standalone_question"),
# )

# _context = {
#     "context": lambda x: _combine_documents(x["docs"]),
#     "question": lambda x: x["question"],
# }

# answer = {
#     "answer": _context | ANSWER_PROMPT | llm_chat,
#     "docs": itemgetter("docs"),
# }

# chain = standalone_question | retrieved_documents | answer

# ans = chain.invoke(
#     {
#         "question": "Who were awarded the initial grant for the challenge?",
#         "chat_history": [
#             query,
#         ],
#     }
# )

# print("---- Answer ----")
# print(ans["answer"])

---- Answer ----
content='The initial grant for the MIT Solve challenge was awarded to all Solver teams selected during the Solve Challenge Finals, which took place in September. Each selected team received a $10,000 grant as part of the funding provided by MIT Solve. This funding was part of a larger prize pool of $1.25 million that was available for the 2019 Solver teams, aimed at supporting innovative solutions to various global challenges.\n\nThe announcement specified that multiple sponsors contributed to the funding, including General Motors, the Patrick J. McGovern Foundation, and others. These grants were part of a comprehensive effort to support tech innovators working towards significant social impact.\n\nSOURCE: MIT Solve announces $1.25 million in funding for 2019 Solver teams' additional_kwargs={} response_metadata={'token_usage': <OpenAIObject at 0x15ac28e10> JSON: {
  "prompt_tokens": 1147,
  "completion_tokens": 145,
  "total_tokens": 1292,
  "prompt_tokens_details": {


In [240]:
## Try your tryst with Message History 
from langchain.memory import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

ephemeral_chat_history = ChatMessageHistory()

In [248]:
user_query = "What does the article talk about J-WAFS Grand Challenge?"
print(user_query)
ephemeral_chat_history.add_user_message(str(user_query))
response = chain.invoke({
        str(ephemeral_chat_history.messages[-1].content),
    })
print(response)
ephemeral_chat_history.add_ai_message(str(response))
print(ephemeral_chat_history.messages)
user_query_1 = input("Enter what you want to ask further")
ephemeral_chat_history.add_user_message(str(user_query_1))
response_1 = chain.invoke({
        "messages": ephemeral_chat_history.messages,
    })
print(response_1)

What does the article talk about J-WAFS Grand Challenge?


TypeError: expected string or buffer

In [323]:
retriever

HeadingBasedRetriever(vector_store=<langchain_elasticsearch.vectorstores.ElasticsearchStore object at 0x14bb1afd0>)

In [324]:
# Save the configuration to a file
import pickle
with open("retriever_config.pkl", "wb") as f:
    pickle.dump(retriever, f)

TypeError: cannot pickle '_thread._local' object

In [325]:
retriever_config = {
    "cloud_id": ELASTIC_CLOUD_ID,
    "api_key": ELASTIC_API_KEY,
    "index_name": elastic_index_name,
    "embedding_function": OpenAIEmbeddings()  # Or the function you're using
}

with open("retriever_config.pkl", "wb") as f:
    pickle.dump(retriever_config, f)

AttributeError: module 'openai' has no attribute 'OpenAI'

In [326]:
vector_store

<langchain_elasticsearch.vectorstores.ElasticsearchStore at 0x14bb1afd0>

In [327]:
vector_store_1 = ElasticsearchStore(
    es_cloud_id=ELASTIC_CLOUD_ID,
    es_api_key=ELASTIC_API_KEY,
    index_name=elastic_index_name,
    embedding=embeddings,
)

In [328]:
vector_store_1

<langchain_elasticsearch.vectorstores.ElasticsearchStore at 0x10cfc12d0>

In [329]:
query = "Common, some way of development must be there!!!"
results = vector_store_1.similarity_search(query=query, k=5)

In [330]:
results

[Document(metadata={'date': '2021-11-02T00:00:00', 'author': 'Terri Park', 'source': 'MIT Schwarzman College of Computing', 'heading': '3 Questions: Blending computing with other disciplines at MIT', 'description': 'The Common Ground for Computing Education is facilitating collaborations to develop new classes for students to pursue computational knowledge within the context of their fields of interest.'}, page_content="The demand for computing-related training is at an all-time high. At MIT, there has been a remarkable tide of interest in computer science programs, with heavy enrollment from students studying everything from economics to life sciences eager to learn how computational techniques and methodologies can be used and applied within their primary field.', 'Launched in 2020, the Common Ground for Computing Education was created through the MIT Stephen A. Schwarzman College of Computing to meet the growing need for enhanced curricula that connect computer science and artificia