In [1]:
import os 
import re
from langchain_community.llms import Ollama
from langchain_openai.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.runnables import RunnablePassthrough, RunnableParallel

In [2]:
# Modified following https://www.youtube.com/watch?v=02cdCd43Ccc 
API_KEY = "NA" #Causes ChatOpenAI to look for local models. base-url is needed to use ollama
Model = 'llama3'
gpt_llm = ChatOpenAI(api_key = API_KEY, model= Model, base_url="http://localhost:11434/v1") # Thise parameters uses Ollama with Llama3
gpt_llm.invoke('what is a bot')

AIMessage(content="A bot (short for robot) is a computer program that automates repetitive or specific tasks, often interacting with humans through text or voice commands. Bots can be found in various forms, such as:\n\n1. **Chatbots**: These are computer programs that mimic human conversation, using natural language processing (NLP) and machine learning algorithms to understand and respond to user input.\n2. **Web scrapers**: Bots that extract specific data from websites, often used for research or monitoring purposes.\n3. **Virtual assistants**: AI-powered bots like Siri, Google Assistant, or Alexa, which can perform tasks, provide information, and control smart home devices.\n4. **Customer service chatbots**: Automated systems designed to help customers with simple inquiries or issues, freeing up human customer support agents to focus on more complex problems.\n5. **Social media bots**: Programs that automate social media interactions, such as posting updates, responding to messages

In [3]:
parser = StrOutputParser()
gpt_chain = gpt_llm|parser
gpt_chain.invoke('what is a shark')

"A great question!\n\nA shark is a type of fish that belongs to the class Chondrichthyes. Sharks are characterized by their cartilaginous skeleton (as opposed to bony fish), which means their skeleton is made up of flexible, yet strong, cartilage rather than bones. They have a distinctive body shape with a streamlined torpedo-shaped body, a conical snout, and a caudal fin that is typically rounded.\n\nHere are some key features that define sharks:\n\n1. **Cartilaginous skeleton**: Sharks have no bones; their skeleton is made up of cartilage, which provides flexibility and buoyancy.\n2. **Streamlined body**: Sharks have a torpedo-shaped body that allows them to swim efficiently through the water.\n3. **Conical snout**: Sharks have a distinctive snout that is shaped like a cone or triangle. This helps them detect prey and navigate their surroundings.\n4. **Multiple gill slits**: Sharks have multiple gill slits on either side of their head, which allow for efficient respiration.\n5. **Der

### Text version of an online article

In [6]:
loader = TextLoader('WoodSpaceShip.txt',encoding = 'utf-8')
document = loader.load()

In [7]:
spliter = RecursiveCharacterTextSplitter(chunk_size = 200,chunk_overlap = 50)
chunks = spliter.split_documents(document)
chunks[3]


Document(page_content='The roughly 10-centimetre-long cube is made of magnolia-wood panels and has an aluminium frame, solar panels, circuit boards and sensors. The panels incorporate Japanese wood-joinery methods that do', metadata={'source': 'WoodSpaceShip.txt'})

In [8]:
# vector_storage = FAISS.from_documents(chunks,OpenAIEmbeddings())
# retriever = vector_storage.as_retriever()
from langchain_community.embeddings import OllamaEmbeddings

ollama_emb = OllamaEmbeddings(base_url = 'http://localhost:11434', model="llama3")
# May need: 
# !pip install faiss-cpu
# or 
# !pip install faiss-gpu

vector_storage = FAISS.from_documents(chunks, ollama_emb)
retriever = vector_storage.as_retriever()

In [9]:
retriever.invoke('what is the main characteristic of the satellite')

[Document(page_content='LignoSat will cost about US$191,000 to design, manufacture, launch and operate. Sensors onboard will evaluate strain on the wood, temperature, geomagnetic forces and cosmic radiation, as well as', metadata={'source': 'WoodSpaceShip.txt'}),
 Document(page_content='And there is a precedent for spacecraft with wooden parts. Launched in 1962, NASA’s Ranger 3 lunar probe had a balsa-wood casing intended to protect its capsule as it landed on the lunar surface (the', metadata={'source': 'WoodSpaceShip.txt'}),
 Document(page_content='forces and cosmic radiation, as well as receive and transmit radio signals. The satellite has been handed over to the Japan Aerospace Exploration Agency (JAXA) and will be transferred to the', metadata={'source': 'WoodSpaceShip.txt'}),
 Document(page_content='When LignoSat plunges back to Earth, after six months to a year of service, the magnolia will incinerate completely and release only water vapour and carbon dioxide, says Takao Doi, a

In [10]:
template = ("""
You are AI-powered chatbot designed to provide 
information and assistance for customers
based on the context provided to you only. 
            
Context:{context}
Question:{question}
""")

In [11]:
prompt = PromptTemplate.from_template(template=template)
prompt.format(
    context = ' Here is a context to use',
    question = ' This is a question to answer'
)

'\nYou are AI-powered chatbot designed to provide \ninformation and assistance for customers\nbased on the context provided to you only. \n            \nContext: Here is a context to use\nQuestion: This is a question to answer\n'

In [12]:
result = RunnableParallel(context = retriever,question = RunnablePassthrough())
chain = 27 |prompt | gpt_llm | parser

TypeError: Expected a Runnable, callable or dict.Instead got an unsupported type: <class 'int'>

In [None]:
chain.invoke('What is the recommended material for satelites? ')

In [None]:
chain.invoke('are there any precedents to wood satellites? Do they work?')

### Online Document

In [None]:
# https://python.langchain.com/v0.2/docs/integrations/document_loaders/url/
# !pip install unstructured
from langchain_community.document_loaders import UnstructuredURLLoader

loaderhmlt = UnstructuredURLLoader(urls = ["https://www.nature.com/articles/d41586-024-01456-z"])
data = loaderhmlt.load()
spliterhtml = RecursiveCharacterTextSplitter(chunk_size = 200,chunk_overlap = 50)
chunkhtml = spliterhtml.split_documents(data)
chunkhtml[3]

In [None]:
# The article of interest is inclluded in the first 44 chunks 
vector_storagehtml = FAISS.from_documents(chunkhtml[:44], ollama_emb)
retrieverhtml = vector_storagehtml.as_retriever()

In [None]:
resulthtml = RunnableParallel(context = retrieverhtml,question = RunnablePassthrough())
chainhtml = resulthtml |prompt | gpt_llm | parser
chain_htmlr = chainhtml.invoke('are there any precedents to wood satellites? Do they work?')
chain_htmlr

In [None]:
chain_htmlr2 = chainhtml.invoke('What is the recommended material for satelites? ')
chain_htmlr2

In [None]:
chain_htmlr2 = chainhtml.invoke('What is the recommended material for satelites? ')
chain_htmlr2

In [None]:
chain_htmlr2 = re.sub(r'\. ',r'.\n', chain_htmlr2)
print(chain_htmlr2)

In [None]:
chain_htmlr3 = chainhtml.invoke('What is the main idea of the context provided? ')
chain_htmlr3
chain_htmlr3 = re.sub(r'\. ',r'.\n', chain_htmlr3)
print(chain_htmlr3)