In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

MODEL = "gpt-3.5-turbo"
# MODEL = "gpt-4o"
# MODEL = "mixtral:8x5b"
# MODEL = "llama2"

# print(os.getenv("OPENAI_API_KEY"))



In [2]:
from langchain_openai.chat_models import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain.llms import Ollama
from langchain.embeddings import OllamaEmbeddings

if MODEL.startswith("gpt"):
    model = ChatOpenAI(openai_api_key = os.getenv("OPENAI_API_KEY"), model = MODEL)
    embeddings = OpenAIEmbeddings(openai_api_key = os.getenv("OPENAI_API_KEY"))
else:
    model = Ollama(model = MODEL)
    embeddings = OllamaEmbeddings(model = MODEL)
model.invoke("Tell me a joke")


AIMessage(content="Why couldn't the bicycle stand up by itself?\n\nBecause it was two tired!", response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 11, 'total_tokens': 27}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-8c7dc392-f8dd-48ab-bc3c-d2373f936e52-0', usage_metadata={'input_tokens': 11, 'output_tokens': 16, 'total_tokens': 27})

## To parse AIMessage to String  

In [3]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

chain = model | parser

chain.invoke("Tell me a joke")

"Why couldn't the bicycle stand up by itself? \n\nBecause it was two tired!"

## Load pdf to python

In [4]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("mlschool.pdf")
pages = loader.load_and_split()
pages

[Document(metadata={'source': 'mlschool.pdf', 'page': 0}, page_content='Building Machine Learning Systems That Don\'t\nSuck\nA live, interactive program that\'ll help you build production-readymachine learning\nsystems from the ground up.\nNext cohort:\xa0August5 - 22, 2024\nCheck the schedulefor more details about upcoming cohorts.\nI want to join!Sign in\nLearn how to design, build, deploy, and scale machine learning\nsystems to solve real-world problems.\nI\'ll lose my mind if I see another book or course teaching people the same basic ideas\nfor the hundredth time. Most people are stuck in beginner mode, and finding help to\nsolve real-world problems is hard.\nI want to change that.\nI started writing software 30 years ago. I\'ve written pipelines and trained models for\nsome of the largest companies in the world. I want to show you how to do the same."This is the best machine learning course I\'ve done.\nWorth every cent."\n— Jose Reyes, AI/ML at Cevo Australia7/19/24, 2:32 PM Bui

# creating a prompt template

In [5]:
from langchain.prompts import PromptTemplate

templat = """
Answer the question based on thee context below. If you can'try
answer the question, reply "I don't know".

context: {context}
question: {question}
"""

prompt = PromptTemplate.from_template(templat)
prompt.format(context="Here is some context", question="Here is a question")



'\nAnswer the question based on thee context below. If you can\'try\nanswer the question, reply "I don\'t know".\n\ncontext: Here is some context\nquestion: Here is a question\n'

In [7]:
chain = prompt | model | parser

In [8]:
chain.input_schema.schema()

{'title': 'PromptInput',
 'type': 'object',
 'properties': {'context': {'title': 'Context', 'type': 'string'},
  'question': {'title': 'Question', 'type': 'string'}},
 'required': ['context', 'question']}

In [9]:
chain.invoke(
    {
        "context": "The name I was given was santiago.",
        "question": "what's my name?"
    }
)

'Your name is Santiago.'

In [10]:
from langchain_community.vectorstores import DocArrayInMemorySearch

vectorstore = DocArrayInMemorySearch.from_documents(
    pages, 
    embedding=embeddings
)

In [14]:
retriever = vectorstore.as_retriever()
retriever.invoke("Machine Learning")

[Document(metadata={'source': 'mlschool.pdf', 'page': 0}, page_content='Building Machine Learning Systems That Don\'t\nSuck\nA live, interactive program that\'ll help you build production-readymachine learning\nsystems from the ground up.\nNext cohort:\xa0August5 - 22, 2024\nCheck the schedulefor more details about upcoming cohorts.\nI want to join!Sign in\nLearn how to design, build, deploy, and scale machine learning\nsystems to solve real-world problems.\nI\'ll lose my mind if I see another book or course teaching people the same basic ideas\nfor the hundredth time. Most people are stuck in beginner mode, and finding help to\nsolve real-world problems is hard.\nI want to change that.\nI started writing software 30 years ago. I\'ve written pipelines and trained models for\nsome of the largest companies in the world. I want to show you how to do the same."This is the best machine learning course I\'ve done.\nWorth every cent."\n— Jose Reyes, AI/ML at Cevo Australia7/19/24, 2:32 PM Bui

In [15]:
chain = prompt | model | parser

In [23]:
from operator import itemgetter

chain = (
        {
            "context": itemgetter("question") | retriever, 
            "question": itemgetter("question")
        }
        | prompt 
        | model 
        | parser
)

chain.invoke({"question": "what is machine learning?"})

'Machine learning is a field of study that involves training computers to learn from data and make decisions without being explicitly programmed.'

In [20]:
itemgetter("abc")({"abc": 123})
itemgetter("question")({"question": "What is machine learning?"})

'What is machine learning?'

In [24]:
questions = [
    "What is the purpose of the course?",
    "How many hours of live sessions ?",
    "How many coding assignments are there in the program?",
    "Is there a program certificate upon completion?",
    "What programming language will be used in the program?",
    "How much does the program cost?"
]

In [28]:
for question in questions:
    print(f"Question: {question}")
    print(f"Answer: {chain.invoke({'question': question})}")

Question: What is the purpose of the course?
Answer: The purpose of the course is to teach practical skills and insights that will help individuals build machine learning systems that work in the real world.
Question: How many hours of live sessions ?
Answer: The program gives you 18 hours of hands-on, live training spread over 3 weeks.
Question: How many coding assignments are there in the program?
Answer: There are 100 coding assignments in the program.
Question: Is there a program certificate upon completion?
Answer: Yes, there is a program certificate upon completion.
Question: What programming language will be used in the program?
Answer: Python
Question: How much does the program cost?
Answer: The program costs $200.


In [29]:
for s in chain.stream({"question": "What is the purpose of the course?"}):
    print(s, end="", flush=True)

The purpose of the course is to teach practical skills and insights that will help individuals build machine learning systems that work in the real world.

In [30]:
chain.batch([{"question": q} for q in questions])

['The purpose of the course is to teach practical skills and insights that will help individuals build machine learning systems that work in the real world.',
 'The program offers 18 hours of live training spread over 3 weeks.',
 'There are 100 coding assignments in the program.',
 'Yes, there is a program certificate upon completion.',
 'The programming language that will be used in the program is Python.',
 'The program costs $200, with a discounted price from $450.']