In [1]:
import os 
import re
from langchain_community.llms import Ollama
from langchain_openai.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.runnables import RunnablePassthrough, RunnableParallel

In [2]:
# Modified following https://www.youtube.com/watch?v=02cdCd43Ccc 
API_KEY = "NA" #Causes ChatOpenAI to look for local models. base-url is needed to use ollama
Model = 'llama3'
gpt_llm = ChatOpenAI(api_key = API_KEY,model= Model, base_url="http://localhost:11434/v1")
gpt_llm.invoke('what is a bot')

AIMessage(content="A bot (short for robot) is a computer program that is designed to perform a specific task or set of tasks automatically. Bots can interact with users, other computers, or systems in various ways, such as:\n\n1. **Text-based interfaces**: Bots can communicate with humans through text messages, instant messaging platforms, online chat windows, or social media.\n2. **Audio or voice assistants**: Bots can process voice commands and respond accordingly, like Amazon's Alexa or Google Assistant.\n3. **Web scraping or crawling**: Bots can extract data from websites, search engines, or databases for analysis, research, or marketing purposes.\n\nBots often perform tasks such as:\n\n1. **Automating repetitive processes**: Like updating spreadsheets or sending reminders.\n2. **Providing customer support**: Answering frequent questions, helping with order tracking, or offering product recommendations.\n3. **Analyzing data**: Collecting and processing large amounts of data to prov

In [3]:
parser = StrOutputParser()
gpt_chain = gpt_llm|parser
gpt_chain.invoke('what is a shark')

'Sharks are a type of fish that belong to the class Chondrichthyes. They are characterized by their cartilaginous skeleton, which is made up of flexible yet strong connective tissue.\n\nHere are some key features that define sharks:\n\n1. **Cartilaginous skeleton**: Sharks have a skeleton made of cartilage, unlike most other fish which have bony skeletons.\n2. **Streamlined body**: Sharks have a torpedo-shaped body designed for swimming and maneuverability in the water.\n3. **Fins instead of legs**: Sharks use their powerful fins to move through the water, rather than their legs (like humans or other terrestrial animals).\n4. **Sharp teeth**: Most sharks have sharp, replaceable teeth that are used to capture and eat prey.\n5. **Cold-blooded**: Like all fish, sharks are ectothermic, meaning they regulate their body temperature using external sources, such as the surrounding water.\n\nThere are over 500 species of sharks, ranging in size from the tiny dwarf lanternshark (which is only ab

In [4]:
loader = TextLoader('data.txt',encoding = 'utf-8')
document = loader.load()

In [5]:
spliter = RecursiveCharacterTextSplitter(chunk_size = 200,chunk_overlap = 50)
chunks = spliter.split_documents(document)
chunks[3]


Document(page_content='The roughly 10-centimetre-long cube is made of magnolia-wood panels and has an aluminium frame, solar panels, circuit boards and sensors. The panels incorporate Japanese wood-joinery methods that do', metadata={'source': 'data.txt'})

In [6]:
# vector_storage = FAISS.from_documents(chunks,OpenAIEmbeddings())
# retriever = vector_storage.as_retriever()

from langchain_community.embeddings import OllamaEmbeddings

ollama_emb = OllamaEmbeddings(base_url = 'http://localhost:11434', model="llama3")
# May need: 
# !pip install faiss-cpu
# or 
# !pip install faiss-gpu

vector_storage = FAISS.from_documents(chunks, ollama_emb)
retriever = vector_storage.as_retriever()

In [7]:
retriever.invoke('what is the main characteristic of the satellite')

[Document(page_content='LignoSat will cost about US$191,000 to design, manufacture, launch and operate. Sensors onboard will evaluate strain on the wood, temperature, geomagnetic forces and cosmic radiation, as well as', metadata={'source': 'data.txt'}),
 Document(page_content='And there is a precedent for spacecraft with wooden parts. Launched in 1962, NASA’s Ranger 3 lunar probe had a balsa-wood casing intended to protect its capsule as it landed on the lunar surface (the', metadata={'source': 'data.txt'}),
 Document(page_content='forces and cosmic radiation, as well as receive and transmit radio signals. The satellite has been handed over to the Japan Aerospace Exploration Agency (JAXA) and will be transferred to the', metadata={'source': 'data.txt'}),
 Document(page_content='When LignoSat plunges back to Earth, after six months to a year of service, the magnolia will incinerate completely and release only water vapour and carbon dioxide, says Takao Doi, an astronaut and', metadata=

In [8]:
template = ("""
You are AI-powered chatbot designed to provide 
information and assistance for customers
based on the context provided to you only. 
            
Context:{context}
Question:{question}
""")

In [9]:
prompt = PromptTemplate.from_template(template=template)
prompt.format(
    context = ' Here is a context to use',
    question = ' This is a question to answer'
)

'\nYou are AI-powered chatbot designed to provide \ninformation and assistance for customers\nbased on the context provided to you only. \n            \nContext: Here is a context to use\nQuestion: This is a question to answer\n'

In [10]:
result = RunnableParallel(context = retriever,question = RunnablePassthrough())
chain = 27 |prompt | gpt_llm | parser

In [11]:
chain.invoke('What is the recommended material for satelites? ')

"Based on the provided context, there is no direct recommendation for a specific material for satellites. However, it is mentioned that wood could be explored as a potential material due to its resilience in space and ability to enclose an antenna without blocking radio waves.\n\nIt's also noted by Salim that the structural integrity, safety, and longevity of wood need to be confirmed in space before considering its use. This implies that some assessment or testing needs to be done to determine if wood is a viable option for satellite construction.\n\nTherefore, based on the context, it can be inferred that wood might be a potential material being considered for satellites, but no specific recommendation is given at this time."

In [None]:
chain.invoke('are there any precedents to wood satellites? Do they work?')

In [None]:
# https://python.langchain.com/v0.2/docs/integrations/document_loaders/url/
# !pip install unstructured
from langchain_community.document_loaders import UnstructuredURLLoader

loaderhmlt = UnstructuredURLLoader(urls = ["https://www.nature.com/articles/d41586-024-01456-z"])
data = loaderhmlt.load()
spliterhtml = RecursiveCharacterTextSplitter(chunk_size = 200,chunk_overlap = 50)
chunkhtml = spliterhtml.split_documents(data)
chunkhtml[3]

In [None]:
# The article of interest is inclluded in the first 44 chunks 
vector_storagehtml = FAISS.from_documents(chunkhtml[:44], ollama_emb)
retrieverhtml = vector_storagehtml.as_retriever()

In [None]:
resulthtml = RunnableParallel(context = retrieverhtml,question = RunnablePassthrough())
chainhtml = resulthtml |prompt | gpt_llm | parser
chain_htmlr = chainhtml.invoke('are there any precedents to wood satellites? Do they work?')
chain_htmlr

In [None]:
chain_htmlr2 = chainhtml.invoke('What is the recommended material for satelites? ')
chain_htmlr2

In [None]:
chain_htmlr2 = chainhtml.invoke('What is the recommended material for satelites? ')
chain_htmlr2

In [None]:
chain_htmlr2 = re.sub(r'\. ',r'.\n', chain_htmlr2)
print(chain_htmlr2)

In [None]:
chain_htmlr3 = chainhtml.invoke('What is the main idea of the context provided? ')
chain_htmlr3
chain_htmlr3 = re.sub(r'\. ',r'.\n', chain_htmlr3)
print(chain_htmlr3)