Read data in text format->Docs->Divide data into chunks->vector embeddings->vector db->query

In [6]:

import pandas as pd
df = pd.read_csv('small_set_mix.csv')
# Define the function to generate descriptive text
def transform_to_descriptive_text(row):
    description = (
        f"On {row['Date']}, the stock {row['Symbol']} opened at {row['Open']}, "
        f"reached a high of {row['High']}, a low of {row['Low']}, and closed at {row['Close']}. "
        f"The previous close was {row['Prev Close']}. The total traded volume was {row['Volume']}, "
        f"with a turnover of {row['Turnover']} and a deliverable volume of {row['Deliverable Volume']} "
        f"which represents {row['%Deliverble']*100:.2f}% of the total volume."
    )
    return description

# Apply the function to each row and save in a list
sentences = df.apply(transform_to_descriptive_text, axis=1).tolist()

In [7]:
# this is a list of document
sentences

['On 2016-12-23, the stock ADANIPORTS opened at 262.15, reached a high of 268.5, a low of 259.05, and closed at 262.9. The previous close was 263.55. The total traded volume was 3336863, with a turnover of 87787444520000.0 and a deliverable volume of 1241539 which represents 37.21% of the total volume.',
 'On 2012-05-03, the stock ADANIPORTS opened at 126.55, reached a high of 127.5, a low of 124.45, and closed at 124.95. The previous close was 126.05. The total traded volume was 626023, with a turnover of 7862758770000.0 and a deliverable volume of 202501 which represents 32.35% of the total volume.',
 'On 2019-04-24, the stock ADANIPORTS opened at 387.7, reached a high of 394.0, a low of 386.1, and closed at 391.75. The previous close was 390.9. The total traded volume was 1674322, with a turnover of 65222301904999.99 and a deliverable volume of 881911 which represents 52.67% of the total volume.',
 'On 2017-03-08, the stock ADANIPORTS opened at 308.25, reached a high of 309.0, a low

In [8]:
type(sentences)

list

In [9]:
type(sentences[0])

str

In [25]:
import os
from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY']=os.getenv("OPENAI_API_KEY")

os.environ['LANGCHAIN_API_KEY']=os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_PROJECT"]=os.getenv("LANGCHAIN_PROJECT")

os.environ["groq_api_key"] = os.getenv("GROQ_API_KEY")


In [11]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(
    model="nomic-embed-text",
)

embeddings

OllamaEmbeddings(model='nomic-embed-text', base_url=None, client_kwargs={})

In [12]:
from langchain.schema import Document

# Assuming 'text_list' is your list of strings
documents = [Document(page_content=text) for text in sentences] 


In [13]:
s= type(documents)
ss= type(documents[0])
print(s)
print(ss)
print(documents)

<class 'list'>
<class 'langchain_core.documents.base.Document'>
[Document(metadata={}, page_content='On 2016-12-23, the stock ADANIPORTS opened at 262.15, reached a high of 268.5, a low of 259.05, and closed at 262.9. The previous close was 263.55. The total traded volume was 3336863, with a turnover of 87787444520000.0 and a deliverable volume of 1241539 which represents 37.21% of the total volume.'), Document(metadata={}, page_content='On 2012-05-03, the stock ADANIPORTS opened at 126.55, reached a high of 127.5, a low of 124.45, and closed at 124.95. The previous close was 126.05. The total traded volume was 626023, with a turnover of 7862758770000.0 and a deliverable volume of 202501 which represents 32.35% of the total volume.'), Document(metadata={}, page_content='On 2019-04-24, the stock ADANIPORTS opened at 387.7, reached a high of 394.0, a low of 386.1, and closed at 391.75. The previous close was 390.9. The total traded volume was 1674322, with a turnover of 65222301904999.99

In [16]:
from langchain_community.document_loaders import TextLoader
# import 
from langchain_community.vectorstores import FAISS
db=FAISS.from_documents(documents,embeddings)

In [17]:
print(db)

<langchain_community.vectorstores.faiss.FAISS object at 0x0000013613E21960>


In [19]:
# querying
query="What's the best stock for 2019?"
docs=db.similarity_search(query)
docs[0].page_content

'On 2019-04-24, the stock ADANIPORTS opened at 387.7, reached a high of 394.0, a low of 386.1, and closed at 391.75. The previous close was 390.9. The total traded volume was 1674322, with a turnover of 65222301904999.99 and a deliverable volume of 881911 which represents 52.67% of the total volume.'

In [20]:
# similarity score
doc_score=db.similarity_search_with_score(query)
doc_score

[(Document(metadata={}, page_content='On 2019-04-24, the stock ADANIPORTS opened at 387.7, reached a high of 394.0, a low of 386.1, and closed at 391.75. The previous close was 390.9. The total traded volume was 1674322, with a turnover of 65222301904999.99 and a deliverable volume of 881911 which represents 52.67% of the total volume.'),
  0.6892146),
 (Document(metadata={}, page_content='On 2019-11-15, the stock ADANIPORTS opened at 369.1, reached a high of 370.9, a low of 365.5, and closed at 366.3. The previous close was 369.1. The total traded volume was 2362065, with a turnover of 86947853715000.0 and a deliverable volume of 1262202 which represents 53.44% of the total volume.'),
  0.70859444),
 (Document(metadata={}, page_content='On 2017-10-19, the stock ADANIPORTS opened at 405.0, reached a high of 406.3, a low of 397.0, and closed at 401.4. The previous close was 406.9. The total traded volume was 137226, with a turnover of 5523814010000.0 and a deliverable volume of 36291 wh

In [21]:
# other way 
retriever = db.as_retriever()
sentences = retriever.invoke(query)
sentences[0].page_content

'On 2019-04-24, the stock ADANIPORTS opened at 387.7, reached a high of 394.0, a low of 386.1, and closed at 391.75. The previous close was 390.9. The total traded volume was 1674322, with a turnover of 65222301904999.99 and a deliverable volume of 881911 which represents 52.67% of the total volume.'

Failed to get info from https://api.smith.langchain.com: LangSmithConnectionError('Connection error caused failure to GET /info in LangSmith API. Please confirm your internet connection. ConnectTimeout(MaxRetryError("HTTPSConnectionPool(host=\'api.smith.langchain.com\', port=443): Max retries exceeded with url: /info (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x0000013613DFAB00>, \'Connection to api.smith.langchain.com timed out. (connect timeout=10.0)\'))"))\nContent-Length: None\nAPI Key: lsv2_********************************************8b')
Failed to batch ingest runs: langsmith.utils.LangSmithConnectionError: Connection error caused failure to POST https://api.smith.langchain.com/runs/batch in LangSmith API. Please confirm your internet connection. ConnectTimeout(MaxRetryError("HTTPSConnectionPool(host='api.smith.langchain.com', port=443): Max retries exceeded with url: /runs/batch (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection 

In [22]:
# locally saving the database
db.save_local("small_db")

In [35]:
# checking
''' 
import ollama
response = ollama.chat(model='llama3.2:1b ', messages=[
    {
        'role': 'user',
        'content': 'Why is sky blue?',
    },
])
print(response['message']['content'])''' 


" \nimport ollama\nresponse = ollama.chat(model='llama3.2:1b ', messages=[\n    {\n        'role': 'user',\n        'content': 'Why is sky blue?',\n    },\n])\nprint(response['message']['content'])"

In [36]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    model="mixtral-8x7b-32768",
    temperature=0,
    max_tokens=None,
    timeout=None,
    # max_retries=2,
    # other params...
)
llm



ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000001364D534B80>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001364D535FC0>, temperature=1e-08, model_kwargs={}, groq_api_key=SecretStr('**********'))

In [37]:
# Prompt Template
from langchain_core.prompts import ChatPromptTemplate
generic_template = "Act as a stock expert and answer the question. Keep it to the point. Use simple language."
prompt = ChatPromptTemplate.from_messages(
    [("system",generic_template),("human", "{input}"),]
)

chain = prompt | llm | parser
result=chain.invoke(
    {

        "input": "Which stock performed best in 2012?",
    }
)
result

"In 2012, the best performing stock in the S&P 500 index was Netflix (NFLX), with a remarkable return of 134%. This was largely due to the company's successful transition from DVD-by-mail to streaming services. However, it's important to note that past performance is not indicative of future results."