In [1]:
import os
from langchain_community.llms import Ollama
from langchain_community.embeddings import OllamaEmbeddings
from langchain_core.output_parsers import StrOutputParser

In [2]:
MODEL = "llama3:instruct"

In [3]:
model = Ollama(model=MODEL)

In [11]:
embedding = OllamaEmbeddings(model=MODEL)

In [5]:
model.invoke("Tell a joke")

"Why don't scientists trust atoms?\n\nBecause they make up everything! (get it?)"

In [6]:
parser = StrOutputParser()

chain = model 

chain.invoke("Tell me a fish joke ")

'Why did the fish go to the party?\n\nBecause he heard it was a "reel" good time!'

In [7]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("./msc_df.pdf")

pages = loader.load_and_split()

pages

[Document(page_content="SEMESTER: I                   HOURS/WEEK  : 5  \nCode: P21DS 101            CREDITS    : 4  \n \nCore I: Mathematical Foundation for Data Science  \nCourse Objectives  \n \nEvery day all over the world, large amounts of data are generated by various businesses, research  \norganizations and social media. Data Science is the study, application, and development of \nmethods to learn from this data. So that many improvements can be made in products, services, \nadvertising campaigns, public health a nd safety and others. Linear algebra, geometry, calculus \nand probability play a fundamental role in the theory of Data Science. This course introduces the \nbasic notions of vector spaces, inner product,  matrix decomposition, derivatives and probability \ndistributions.  \n \nCourse  Outcomes  \nOn successful completion of this course, students will be able to  \n \nCO1: Solve systems of linear equations by use of the matrix  \nCO2: Determine the Orthogonality and Ba

In [8]:
len(pages)

7

In [9]:
from langchain.prompts import PromptTemplate

template = """ 
ANSWER THE QUESTION BASED ON THE CONTEXT PROVIDED BELOW.
IF YOU CANNOT ANSWER THE QUESTION, PROVIDE A REPLY "I DON'T KNOW, PROVIDE MORE CONTEXT".

Context: {context}

Question: {question}

"""

prompt = PromptTemplate.from_template(template=template)

print(prompt.format(context="Here is some context", question="Here is a question"))



 
ANSWER THE QUESTION BASED ON THE CONTEXT PROVIDED BELOW.
IF YOU CANNOT ANSWER THE QUESTION, PROVIDE A REPLY "I DON'T KNOW, PROVIDE MORE CONTEXT".

Context: Here is some context

Question: Here is a question




In [12]:
from langchain_community.vectorstores import DocArrayInMemorySearch

vectorstore = DocArrayInMemorySearch.from_documents(
    pages,
    embedding= embedding
)

In [22]:
retriever = vectorstore.as_retriever()

# retriever.invoke("")

[Document(page_content='Fowler. NoSQL Distilled: A Brief Guide to the Emerging World of Polyglot Persistence . Addison -\nWesley. 2012 ISBN: 0321826620  \n2. Adam Fowler. NoSQL for Dummies . John Wiley. 2015. ISBN 978 -1-118 -90574 -6 \n3. Guy Harrison. Next Generation Databases . APress. 2016.  978 -1-484213 -30-8 \n4. Thomas M. Connolly and Carolyn E. Begg. Database Systems: “ A Practical Approach to Design, \nImplementation, and Management”,  6th Edition, Pearson, 2015.', metadata={'source': './msc_df.pdf', 'page': 4}),
 Document(page_content='SEMESTER: I                  HOURS/WEEK : 5 \nCode: P21DS 1:1           CREDITS    : 4  \n \nElective -I: Essential Statistics for Data Science  \nCourse Objectives  \nThis course covers topics in Statistics from basics to advanced level that every Data Science student should \nmaster and apply for the industry appli cations. Great depth  of coverage for the topics from Regression \nAnalysis is also given in this course.  \n \nCourse  Outcomes

In [20]:
vectorstore.as_retriever().invoke("ML")

[Document(page_content='SEMESTER: I                  HOURS/ WEEK : 5 \nCode: P21DS 102           CREDITS    : 4  \n \nCore  II: Problem Solving Using Python and R  \nCourse Objectives  \nThis course introduces students the languag e features of both Python and R . Specifically, data \nstructures, regular expressions , data visualization  and internet programming  features are \nintroduced.  \n \nCourse  Outcomes  \nOn successf ul completion of this course, student s will be able to:  \nCO1: Develop applications  using Python data structures  \nCO2: Develop object oriented programs in Python  \nCO3: Manipulate files using Python  \nCO4: Access internet and database data  \nCO5: Develop R pr ograms for data visualization  \n \nUnit -1. Python Basics, Functions , Loops and Strings  \nVariables  – Getting Inputs – Conditions – Catching exceptions – Function calls – Built -in functions \n– Type conversion functions and math functions – Parameters and arguments –While statement \n– Infinite 

In [23]:
from operator import itemgetter

chain = (
{
    "context": itemgetter("question") | retriever, 
    "question": itemgetter("question")

}
| prompt
| model
)

In [26]:
chain.invoke({
    "question": "what are the Course Outcomes for Problem Solving Using Python and R "
})

'Based on the context provided, the Course Outcomes for "Problem Solving Using Python and R" are:\n\nCO1: Develop applications using Python data structures\nCO2: Develop object-oriented programs in Python\nCO3: Manipulate files using Python\nCO4: Access internet and database data\nCO5: Develop R programs for data visualization'