# Package Requirements

In [None]:
!pip -q install openai langchain huggingface_hub --quiet
!pip install cohere --quiet
!pip install yfinance --quiet
!pip install -U langchain-openai --quiet
!pip install pytesseract  --quiet
!pip install Pillow --quiet
!pip install tesseract --quiet
!apt install tesseract-ocr --quiet
!apt install libtesseract-dev --quiet
!pip install langchain_cohere
!pip install langchain_community

In [2]:
from langchain.llms import OpenAI
from langchain.llms import HuggingFaceHub
from langchain.llms import Cohere
from langchain import PromptTemplate
from langchain import LLMChain
from langchain.chains import SequentialChain
import yfinance as yf
#import pytesseract
from PIL import Image
from IPython.display import Image as display_image
import os
import getpass

In [3]:
LANGCHAIN_API_KEY=os.getenv("LANGCHAIN_API_KEY")
LANGCHAIN_PROJECT="basic_project_oct24"
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")
TAVILY_API_KEY=os.getenv("TAVILY_API_KEY")  
LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
LANGCHAIN_TRACING_V2=True
#Better way if in colab 
#from google.colab import userdata
#os.environ['OPENAI_API_KEY'] = userdata.get("OPENAI_API_KEY")
#os.environ['HUGGINGFACEHUB_API_TOKEN'] = userdata.get("HUGGINGFACEHUB_API_TOKEN")
#os.environ['COHERE_API_KEY'] = userdata.get("COHERE_API_KEY")

#LLMS

## OpenAI model - Paid

In [None]:
from langchain_openai import OpenAI

llm_openai=OpenAI(temperature=0.9, max_tokens=256)
response = llm_openai.invoke("Write a 4 line poem on AI")
print(response)

# - temperature: Set to 0.9, which controls the randomness of the output.
#   A higher temperature results in more varied and unpredictable outputs,
#   while a lower temperature produces more deterministic and conservative outputs.
#   This is often used in generative tasks to balance between creativity and relevance.

# - max_tokens: Set to 256, which specifies the maximum number of tokens (words or pieces of words)
#   that the model can generate in a single response.


llm_openai=OpenAI(temperature=0.9, max_tokens=256)


## Cohere - Opensource Alternative to OpenAI

In [None]:
from langchain.llms import Cohere

llm = Cohere(model="command-xlarge-nightly")
response = llm.invoke("Write a 4 line poem on AI")
print(response)

## Hugging face model - Free

In [None]:
from langchain.llms import HuggingFaceHub

#repo_id="openai-community/gpt2"
repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1"

llm = HuggingFaceHub(
    repo_id=repo_id,
    model_kwargs={"temperature": 0.9, "max_length": 256},
)

response = llm.invoke("Write a 4 line poem on AI")
print(response)

# Where LLMs fail

In [5]:
llm=OpenAI(temperature=0, max_tokens=256)
#llm=Cohere(model="command-xlarge-nightly")

response = llm.invoke("What is current market price of the Apple Stock?")
print(response)



As of September 2021, the current market price of Apple stock is approximately $148 per share.


In [6]:
import yfinance as yf

# Get the current market price of Apple stock
apple_stock = yf.Ticker("AAPL")
apple_cmp= apple_stock.info["currentPrice"]
print(apple_cmp)

235.0


## Prompt Templates

In [7]:
response = llm.invoke("Write a 4 line poem on AI")
response1 = llm.invoke("Craft a quartet of verses celebrating the marvels of artificial intelligence.")
response2 = llm.invoke("Compose a brief, ode to the wonders of AI.")
response3 = llm.invoke("Pen a short poem that captures the essence of artificial intelligence.")
response4 = llm.invoke("Create a succinct tribute to the advancements in AI.")

print("\n======= response =======\n", response)
print("\n======= response1 =======\n", response1)
print("\n======= response2 =======\n", response2)
print("\n======= response3 =======\n", response3)
print("\n======= response4 =======\n", response4)


 

Artificial intelligence, a marvel of our time
Machines that think and learn, so sublime
With algorithms and data, they make decisions
A future where they may surpass our visions

 

Verse 1:
Oh, the wonders of AI, how it astounds
With its ability to learn and adapt
From self-driving cars to virtual assistants
It's a technology that's truly unmatched

Verse 2:
No task is too great for this digital brain
It can process data at lightning speed
From complex calculations to language translation
AI can fulfill any human need

Verse 3:
With machine learning, it continues to evolve
Improving itself with each passing day
Its potential is limitless, its growth unstoppable
AI is here to stay, in every way

Verse 4:
But let us not forget, it's humans who create
This marvel of technology we admire
With responsible use, we can harness its power
And take humanity to even greater heights.

 

Oh, AI, marvel of our time
With algorithms and data, you shine
Your intelligence, beyond compare
A true wo

In [None]:
from langchain import PromptTemplate

template = "Write a 4 line poem on the subject {subject_name}"

prompt = PromptTemplate(
    input_variables=["subject_name"],
    template=template,
)

print(prompt.format(subject_name="Data Science"))
print(prompt.format(subject_name="Fathers Day"))
print(prompt.format(subject_name="Solar System"))

# LLM Chain

In [12]:

from langchain import PromptTemplate
from langchain import LLMChain

llm=OpenAI(temperature=0.1)
#llm=Cohere(temperature=0.1)

template = "List down the historically significant steps in the field of {filed_name}"
prompt = PromptTemplate(
    input_variables=["filed_name"],
    template=template,
)

#chain=LLMChain(llm=llm, prompt=prompt)
chain= prompt | llm
result=chain.invoke("Machine Learning")
print(result)



1. Development of the Perceptron Algorithm (1957): The Perceptron algorithm, developed by Frank Rosenblatt, was the first successful attempt at creating a machine learning algorithm. It laid the foundation for future developments in neural networks and deep learning.

2. Introduction of Decision Trees (1960s): Decision trees were first introduced in the 1960s by researchers such as Leo Breiman, Jerome Friedman, and Charles Stone. They provided a simple and intuitive way to represent and solve classification and regression problems.

3. Development of the Backpropagation Algorithm (1986): The backpropagation algorithm, developed by Geoffrey Hinton, David Rumelhart, and Ronald Williams, revolutionized the field of neural networks. It allowed for efficient training of multi-layer neural networks, making them more powerful and widely applicable.

4. Introduction of Support Vector Machines (1992): Support Vector Machines (SVMs) were first introduced by Vladimir Vapnik and Alexey Chervonen

### LAB: Example of an LLMChain

In [42]:
llm=OpenAI(temperature=0.9)
#llm=Cohere(temperature=0.9)

template = "The topic name is {topic}. Explain this topic to a 10 years old kid"
prompt = PromptTemplate(
    input_variables=["topic"],
    template=template,
)



#chain=LLMChain(llm=llm, prompt=prompt)
chain= prompt | llm 
result=chain.invoke("Logistic Regression")
print(result)




Logistic regression is a way to predict things based on some information we have. It's like trying to guess what will happen next in a game using the rules and the moves that were already made. Instead of guessing, we use math to figure out the most likely result. It's like having superhero powers to help us make better predictions!


# Sequential Chains

## Chain1 : Finds the top10 books
Find out the top ten books on any subject with this dedicated Chain.

In [25]:
from langchain_core.output_parsers import CommaSeparatedListOutputParser
llm=OpenAI(temperature=0.5)
#llm=Cohere(temperature=0.5)
output_parser=CommaSeparatedListOutputParser()
format_instructions = output_parser.get_format_instructions()
#print(format_instructions)

template="""Please provide a simple list of 3 well-known
                books that center around the theme of {theme}.
                Do not include book description
                {format_instructions}"""

book_name_prompt_template = PromptTemplate(
     template=template,
     input_variables=["theme","format_instructions"]    
)

book_name_chain = LLMChain(llm=llm,prompt=book_name_prompt_template,output_key="book_names_list")
                           
#book_name_chain=book_name_prompt_template | llm | output_parser


#### below code is jus for chcking we are jus crating chains for sequential chain
books_list = book_name_chain.invoke({"theme":"personality development",
              "format_instructions":format_instructions })
print(books_list["book_names_list"])



1. "The 7 Habits of Highly Effective People" by Stephen R. Covey
2. "Quiet: The Power of Introverts in a World That Can't Stop Talking" by Susan Cain
3. "Becoming" by Michelle Obama


## Chain2 : Gives the summary

This delivers a detailed summary for any specified book title.

In [30]:
from langchain_core.output_parsers import StrOutputParser
llm=OpenAI(temperature=0.9, max_tokens=3000)
#llm=Cohere(temperature=0.9, max_tokens=3000)

stroutput_parser = StrOutputParser()

book_summary_prompt_template = PromptTemplate(
    input_variables=["book_names_list"],
    template="""Please take any one book from the books list {book_names_list}.
                Mention the book title.
                Please provide a comprehensive summary of the book,in three sections
                and each section with three summary points""",
   
)


book_summary_chain = LLMChain(llm=llm,prompt=book_summary_prompt_template, output_key="book_summary")
  



## SequentialChain

Takes theme as input. It first gets top 10 books from the given theme. Then it provides summary of any one of the top 10 books, without taking an specific input.

In [33]:
from langchain.chains import SequentialChain

book_chain = SequentialChain(
    chains=[book_name_chain, book_summary_chain],
    input_variables=["theme","format_instructions"],
    output_variables=["book_names_list", "book_summary"]
    )

# Get the book summary for a specific book based on the theme
book_summary = book_chain.invoke(input={"theme": "Personal Finance", "format_instructions":format_instructions})

#print(book_summary)
print(book_summary["book_summary"])




"I Will Teach You to Be Rich" by Ramit Sethi

Section 1: The Basics of Personal Finance
1. The book emphasizes the importance of automating your finances, such as setting up automatic bill payments and savings contributions, to ensure consistency and discipline in managing your money.
2. It also stresses the need for having a budget, but not a restrictive one. Instead, the author advocates for a "conscious spending plan" that allows for guilt-free spending on things that bring happiness and cutting back on unnecessary expenses.
3. The author encourages readers to focus on the big wins, such as negotiating a higher salary or finding ways to earn more money, rather than obsessing over small expenses.

Section 2: Investing and Building Wealth
1. The book breaks down the basics of investing, including different types of accounts and asset allocation strategies, in an easy-to-understand manner for beginners.
2. It also advises readers to prioritize paying off high-interest debt before inv

# LAB : Sequential Chain

In [None]:
SBIN_Stock_Analysis = """

Company name is State Bank of India
NSE Symbol is SBIN
MARKET CAP - ₹ 6,69,078.16 Cr.
Company has a good Return on Equity (ROE) track record: 3 Years ROE 13.46%.
CASA stands at 42.67% of total deposits.
The company has delivered good Profit growth of 51.35% over the past 3 years.
Company has delivered good profit growth of 76.1% CAGR over last 5 years.
Company has been maintaining a healthy dividend payout of 17.3%.
Company's working capital requirements have reduced from 152 days to 118 days
The bank has a very low ROA track record. Average ROA of 3 years is 0.70%.
Low other Income proportion of 11.03%.High Cost to income ratio of 53.87%.
Company has low interest coverage ratio.
The company has delivered a poor sales growth of 8.91% over past five years.
Company has a low return on equity of 12.8% over last 3 years.
Contingent liabilities of Rs.19,00,096 Cr.
Company might be capitalizing the interest cost.
Earnings include an other income of Rs.1,39,611 Cr.

"""
print(SBIN_Stock_Analysis)

## Chain1 : Positives and Negatives

In [None]:
#llm=OpenAI(temperature=0, max_tokens=256)
llm=Cohere(temperature=0, max_tokens=256)

template ="""Read the text data from {stock_analysis_input}.
              Mention the company name and marekt capital.
              Write top3 positive and top3 negative points.
              keep the points short"""

information_extraction_prompt = PromptTemplate(
    input_variables=["stock_analysis_input"],
    template=template,
)

#print(information_extraction_prompt.format(stock_analysis_input=SBIN_Stock_Analysis))

information_extraction_chain=LLMChain( llm=llm,
                                       prompt=information_extraction_prompt,
                                       output_key="Pros_and_Cons")

result=information_extraction_chain.invoke(SBIN_Stock_Analysis)
#print(result.keys())
print(result['Pros_and_Cons'])

## Chain2 : Investor Report

In [None]:
#llm=OpenAI(temperature=0, max_tokens=256)
llm=Cohere(temperature=0, max_tokens=256)

template ="""
Imagine you've been analyzing stocks for over 15 years.
Look at the good and bad points, and see if the company can grow.
Right now, is buying shares of this company a smart move?
take the data from {Pros_and_Cons}
"""

stock_decision_prompt = PromptTemplate(
    input_variables=["Pros_and_Cons"],
    template=template,
)
#print(stock_decision_prompt.format(Pros_and_Cons=result['Pros_and_Cons']))

stock_decision_chain=LLMChain(llm=llm,
                              prompt=stock_decision_prompt,
                              output_key="Investor_Report")
result=stock_decision_chain.invoke(SBIN_Stock_Analysis)
print(result['Investor_Report'])

## Final Sequential Chain


In [None]:
full_chain=SequentialChain(chains=[information_extraction_chain, stock_decision_chain],
                           input_variables=["stock_analysis_input"],
                           output_variables=["Pros_and_Cons", "Investor_Report"])
result=full_chain.invoke(SBIN_Stock_Analysis)
print(result["Investor_Report"])

# LangChain + IDP (Intelligent Document Processing)

In [None]:
#Get the Images, try Invoice_1.png, Invoice_2.png, Invoice_3.png, Invoice_4.png
#Try different images in this example
!wget https://raw.githubusercontent.com/venkatareddykonasani/Datasets/master/IDM_Datasets/Invoices/Invoice_1.png
!wget https://raw.githubusercontent.com/venkatareddykonasani/Datasets/master/IDM_Datasets/Invoices/Invoice_2.png
!wget https://raw.githubusercontent.com/venkatareddykonasani/Datasets/master/IDM_Datasets/Invoices/Invoice_3.png
!wget https://raw.githubusercontent.com/venkatareddykonasani/Datasets/master/IDM_Datasets/Invoices/Invoice_4.png


image_path=image_path = '/content/Invoice_4.png'
display_image(filename=image_path)

## IDP without LLM

In [None]:
import pytesseract
import re

def extract_email_addresses(image_path):
    text = pytesseract.image_to_string(image_path)
    email_addresses = re.findall(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", text)
    # Regular expression to match dates in DD/MM/YYYY, DD-MM-YYYY, or YYYY-MM-DD formats
    dob_patterns = re.findall(r"\b(?:\d{2}[/-]\d{2}[/-]\d{4}|\d{4}[/-]\d{2}[/-]\d{2})\b", text)
    print("Email Address: ", email_addresses)
    return

In [None]:
extract_email_addresses(image_path)

## IDP with LLMs

In [None]:
#Extract Text from Image
img = Image.open(image_path)
invoice_text = pytesseract.image_to_string(img)
#print(invoice_text)

#llm=OpenAI(temperature=0)
llm=Cohere(temperature=0)

template="""
Take the information from {invoice_text} and print the itemwise price and quantity.
"""

invoice_prompt = PromptTemplate(
    input_variables=["invoice_text"],
    template=template,
)

invoice_chain=LLMChain(llm=llm, prompt=invoice_prompt, output_key="itemwise_price_and_quantity")
result=invoice_chain.invoke(invoice_text)
print(result['itemwise_price_and_quantity'])

In [None]:
from IPython.display import display_markdown

result_values=result['itemwise_price_and_quantity']
display_markdown(result_values, raw=True)

In [None]:
template="""
Take the information from {invoice_text} and print the client name,phone number, email and total amout
"""

invoice_prompt = PromptTemplate(
    input_variables=["invoice_text"],
    template=template,
)

invoice_chain=LLMChain(llm=llm, prompt=invoice_prompt, output_key="contact_details")
result=invoice_chain.invoke(invoice_text)
print(result['contact_details'])

In [None]:
template="""
Take the information from {invoice_text} and print the bank account number and payment conditions
"""

invoice_prompt = PromptTemplate(
    input_variables=["invoice_text"],
    template=template,
)

invoice_chain=LLMChain(llm=llm, prompt=invoice_prompt, output_key="bank_details")
result=invoice_chain.invoke(invoice_text)
print(result['bank_details'])

# Assignment - Book Summary App