In [2]:
!pip install langchain_community



In [3]:
!pip install unstructured



In [13]:
from langchain_community.document_loaders import UnstructuredURLLoader
urls = ['https://openai.com/news/',
        'https://blogboard.io/source-feed/source/openai-blog',
        'https://apiumhub.com/tech-blog-barcelona/tech-of-the-future-technology-predictions/',
        'https://www.linkedin.com/pulse/future-ai-2025-realistic-look-trends-opportunities-blaine-qrmvc#:~:text=Looking%20ahead%2C%20there%20is%20reason,figures%20are%20projections%2C%20not%20certainties.']
loader = UnstructuredURLLoader(urls=urls)
data = loader.load()

In [14]:
len(data)

4

In [15]:
data

[Document(metadata={'source': 'https://openai.com/news/'}, page_content=''),
 Document(metadata={'source': 'https://blogboard.io/source-feed/source/openai-blog'}, page_content=''),
 Document(metadata={'source': 'https://apiumhub.com/tech-blog-barcelona/tech-of-the-future-technology-predictions/'}, page_content="Tech of the future: technology predictions for our world in 2050\n\nEkaterina Novoseltseva\n\nJanuary 9, 2022\n\nTechnology industry trends\n\nInnovation\n\nShare This Post\n\nTable of Contents\n\nWhen we think about 2050 it seems like it is ages from now and we imagine a completely different world, but in reality, it is just 30 years from now and we can already know what will be possible to have by that time. We have a lot of environmental, social problems and let’s see how technology may solve them by 2050. Today’s article is about tech of the future!\n\nWant to become an Artificial Intelligence Certified professional? Intellipaat online Artificial Intelligence Course helps yo

In [16]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# split data
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
docs = text_splitter.split_documents(data)


print("Total number of documents: ",len(docs))

Total number of documents:  43


In [17]:
docs[7]

Document(metadata={'source': 'https://apiumhub.com/tech-blog-barcelona/tech-of-the-future-technology-predictions/'}, page_content="Charge your iphone with the power of a plant\n\nDriving Software Innovation: Apiumhub's Successful Cases with Leading Companies\n\nDid you know that you can charge your iphone with the power of a plant? Forests can become the energy stations of the future. Now it is becoming possible, for example, Bioo is a clean-tech company capable of generating electricity from plant’s photosynthesis. Here you can watch a video to know more about it.\n\nOcean Thermal Energy can take us to 100% renewable-energy")

In [18]:
# Embedding models: https://python.langchain.com/v0.1/docs/integrations/text_embedding/
# Let's load the Hugging Face Embedding class.  sentence_transformers
from langchain_community.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings()

vector = embeddings.embed_query("hello, world!")
vector[:5]
#vector

  embeddings = HuggingFaceEmbeddings()


[0.034922681748867035,
 0.018830012530088425,
 -0.01785474270582199,
 0.0001388596574543044,
 0.07407369464635849]

In [19]:
from langchain_chroma import Chroma
vectorstore = Chroma.from_documents(documents=docs, embedding=HuggingFaceEmbeddings())

  vectorstore = Chroma.from_documents(documents=docs, embedding=HuggingFaceEmbeddings())


In [20]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

retrieved_docs = retriever.invoke("openai")

In [21]:
len(retrieved_docs)

3

In [22]:
print(retrieved_docs[2].page_content)

That said, progress in natural language understanding is advancing quickly. If your business relies on customer service, consumer engagement, or marketing, voice integration is a compelling space to watch—yet it’s prudent to view the timeline for mass adoption as a multi-year journey rather than a guaranteed reality by 2025.

AI-Generated Video and Multimedia Content

Tools like Runway ML, HeyGen, and OpenAI’s Sora are pushing the boundaries of AI-driven video generation. Verified use cases include rapid prototyping of marketing videos, certain forms of “faceless” YouTube content, and specialized creative effects for film. However, some reports about the ease and profitability of running entire networks of AI-generated channels—generating viral content at scale—are difficult to verify. While the technology is undeniably maturing, crossing the “uncanny valley” where AI videos become indistinguishable from human-made content is still, for many experts, a work in progress.


In [24]:
!pip install langchain_huggingface

Collecting langchain_huggingface
  Downloading langchain_huggingface-0.1.2-py3-none-any.whl.metadata (1.3 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==1

In [26]:
from langchain_huggingface import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from transformers import pipeline
from langchain_core.output_parsers import StrOutputParser
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


#model_id = "meta-llama/Meta-Llama-3-8B"
model_id = "tiiuae/falcon-7b"

text_generation_pipeline = pipeline(
    "text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, max_new_tokens=400, device=0)

llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

prompt_template = """
<|system|>
Answer the question based on your knowledge. Use the following context to help:

{context}

</s>
<|user|>
{question}
</s>
<|assistant|>

 """

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

llm_chain = prompt | llm | StrOutputParser()

config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/17.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.48G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/287 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.73M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/281 [00:00<?, ?B/s]

Device set to use cpu


In [27]:
from langchain_core.runnables import RunnablePassthrough

rag_chain = {"context": retriever, "question": RunnablePassthrough()} | llm_chain

In [28]:
question = "what is openai?"

In [None]:
rag_chain.invoke(question)

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


In [None]:
question = "what is future of tech?"
response = rag_chain.invoke(question)

# Making the response readable
response = response.replace("</s>", "").strip()
print("Response:", response)



In [None]:
from langchain_huggingface import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from transformers import pipeline
from langchain_core.output_parsers import StrOutputParser
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name = "EleutherAI/gpt-neo-1.3B"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.1,
    do_sample=True,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=400,
)
llm = HuggingFacePipeline(pipeline=text_generation_pipeline)


In [None]:


prompt_template = """
<|system|>
Answer the question based on your knowledge. Use the following context to help:

{context}

</s>
<|user|>
{question}
</s>
<|assistant|>

 """

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)



In [None]:
llm_chain = prompt | llm | StrOutputParser()